From 0889a9441d98af7951c5377647413d79c84c9efa Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Sat, 23 Sep 2006 22:11:07 +0000
Subject: CIFS: Use SEEK_END instead of hardcoded value

Signed-off-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index c3ef1c0d0e68..f5ba41132488 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -508,7 +508,7 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const char __user *buf,
 static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
 {
 	/* origin == SEEK_END => we must revalidate the cached file length */
-	if (origin == 2) {
+	if (origin == SEEK_END) {
 		int retval = cifs_revalidate(file->f_dentry);
 		if (retval < 0)
 			return (loff_t)retval;
-- 
cgit v1.2.3


From 1bd5bbcb6531776a8f73e2cc6287fc4dd542e1c7 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 28 Sep 2006 03:35:57 +0000
Subject: [CIFS] Legacy time handling for Win9x and OS/2 part 1

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsproto.h |  3 +++
 fs/cifs/cifssmb.c   | 10 +++++++++-
 fs/cifs/inode.c     |  7 +++++--
 fs/cifs/link.c      |  6 +++++-
 fs/cifs/netmisc.c   | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/cifs/readdir.c   |  7 +++++++
 6 files changed, 80 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index b35c55c3c8bb..2fbc982aa13d 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -80,6 +80,9 @@ extern struct oplock_q_entry * AllocOplockQEntry(struct inode *, u16,
 extern void DeleteOplockQEntry(struct oplock_q_entry *);
 extern struct timespec cifs_NTtimeToUnix(u64 /* utc nanoseconds since 1601 */ );
 extern u64 cifs_UnixTimeToNT(struct timespec);
+extern __le64 cnvrtDosCifsTm(__u16 date, __u16 time);
+extern struct timespec cnvrtDosUnixTm(__u16 date, __u16 time);
+
 extern int cifs_get_inode_info(struct inode **pinode,
 			const unsigned char *search_path, 
 			FILE_ALL_INFO * pfile_info,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 075d8fb3d376..2851d6e0d823 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2856,7 +2856,6 @@ qsec_out:
 	return rc;
 }
 
-
 /* Legacy Query Path Information call for lookup to old servers such
    as Win9x/WinME */
 int SMBQueryInformation(const int xid, struct cifsTconInfo *tcon,
@@ -2898,7 +2897,16 @@ QInfRetry:
 	if (rc) {
 		cFYI(1, ("Send error in QueryInfo = %d", rc));
 	} else if (pFinfo) {            /* decode response */
+		struct timespec ts;
+		__u32 time = le32_to_cpu(pSMBr->last_write_time);
+		/* BB FIXME - add time zone adjustment BB */
 		memset(pFinfo, 0, sizeof(FILE_ALL_INFO));
+		ts.tv_nsec = 0;
+		ts.tv_sec = time;
+		/* decode time fields */
+		pFinfo->ChangeTime = cifs_UnixTimeToNT(ts);
+		pFinfo->LastWriteTime = pFinfo->ChangeTime;
+		pFinfo->LastAccessTime = 0;
 		pFinfo->AllocationSize =
 			cpu_to_le64(le32_to_cpu(pSMBr->size));
 		pFinfo->EndOfFile = pFinfo->AllocationSize;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index b88147c1dc27..06dbce3a1815 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -432,8 +432,11 @@ int cifs_get_inode_info(struct inode **pinode,
 		(pTcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE) & 0xFFFFFE00;*/
 
 		/* Linux can not store file creation time so ignore it */
-		inode->i_atime =
-		    cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastAccessTime));
+		if(pfindData->LastAccessTime)
+			inode->i_atime = cifs_NTtimeToUnix
+				(le64_to_cpu(pfindData->LastAccessTime));
+		else /* do not need to use current_fs_time - time not stored */
+			inode->i_atime = CURRENT_TIME;
 		inode->i_mtime =
 		    cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastWriteTime));
 		inode->i_ctime =
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index a57f5d6e6213..0bee8b7e521a 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -254,7 +254,11 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen)
 				tmpbuffer,
 				len - 1,
 				cifs_sb->local_nls);
-	else {
+	else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
+		cERROR(1,("SFU style symlinks not implemented yet"));
+		/* add open and read as in fs/cifs/inode.c */
+	
+	} else {
 		rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, GENERIC_READ,
 				OPEN_REPARSE_POINT,&fid, &oplock, NULL, 
 				cifs_sb->local_nls, 
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index ce87550e918f..fa5124d9af19 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -909,3 +909,54 @@ cifs_UnixTimeToNT(struct timespec t)
 	/* Convert to 100ns intervals and then add the NTFS time offset. */
 	return (u64) t.tv_sec * 10000000 + t.tv_nsec/100 + NTFS_TIME_OFFSET;
 }
+
+static int total_days_of_prev_months[] =
+{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334};
+
+
+__le64 cnvrtDosCifsTm(__u16 date, __u16 time)
+{
+	return cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm(date, time)));
+}
+struct timespec cnvrtDosUnixTm(__u16 date, __u16 time)
+{
+	__u8  dt[2];
+	__u8  tm[2];
+	struct timespec ts;
+	int sec,min, days, month, year;
+	struct timespec removeme; /* BB removeme BB */
+/*	SMB_TIME * st = (SMB_TIME *)&time;*/
+
+	cFYI(1,("date %d time %d",date, time));
+
+	dt[0] = date & 0xFF;
+	dt[1] = (date & 0xFF00) >> 8;
+	tm[0] = time & 0xFF;
+	tm[1] = (time & 0xFF00) >> 8;
+
+	sec = tm[0] & 0x1F;
+	sec = 2 * sec;
+	min = ((tm[0] >>5)&0xFF) + ((tm[1] & 0x7)<<3);
+
+	sec += (min * 60);
+	sec += 60 * 60 * ((tm[1] >> 3) &0xFF) /* hours */;
+	days = (dt[0] & 0x1F) - 1;
+	month = ((dt[0] >> 5) & 0xFF) + ((dt[1] & 0x1) <<3);
+	if(month > 12)
+		cERROR(1,("illegal month %d in date", month));
+	month -= 1;
+	days += total_days_of_prev_months[month];
+	days += 3653; /* account for difference in days between 1980 and 1970 */
+	year = (dt[1]>>1) & 0xFF;
+	days += year * 365;
+	days += (year/4); /* leap year */
+	/* adjust for leap year where we are still before leap day */
+	days -= ((year & 0x03) == 0) && (month < 2 ? 1 : 0);
+	sec += 24 * 60 * 60 * days; 
+
+	removeme = CURRENT_TIME; /* BB removeme BB */
+	ts.tv_sec = sec;
+
+	ts.tv_nsec = 0;
+	return ts;
+} 
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 9aeb58a7d369..71e86c38e632 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -135,12 +135,19 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
 		tmp_inode->i_ctime =
 		      cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime));
 	} else { /* legacy, OS2 and DOS style */
+/*		struct timespec ts;*/
 		FIND_FILE_STANDARD_INFO * pfindData = 
 			(FIND_FILE_STANDARD_INFO *)buf;
 
+/*		ts = cnvrtDosUnixTm(
+				le16_to_cpu(pfindData->LastWriteDate),
+				le16_to_cpu(pfindData->LastWriteTime));*/
 		attr = le16_to_cpu(pfindData->Attributes);
 		allocation_size = le32_to_cpu(pfindData->AllocationSize);
 		end_of_file = le32_to_cpu(pfindData->DataSize);
+		/* do not need to use current_fs_time helper function since
+		 time not stored for this case so atime can not "go backwards"
+		 by pulling newer older from disk when inode refrenshed */
 		tmp_inode->i_atime = CURRENT_TIME;
 		/* tmp_inode->i_mtime =  BB FIXME - add dos time handling
 		tmp_inode->i_ctime = 0;   BB FIXME */
-- 
cgit v1.2.3


From 2cd646a2d1d5e0e46aa4bb55b1847b0cb35bd855 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 28 Sep 2006 19:43:08 +0000
Subject: [CIFS] Remove static and unused symbols

Most cases of the ones found by Shaggy by
	"make namespacecheck"
could be removed or made static

Ack: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsacl.h     |  4 ++--
 fs/cifs/cifsencrypt.h |  2 --
 fs/cifs/cifsfs.c      |  5 +++--
 fs/cifs/cifsfs.h      |  2 +-
 fs/cifs/cifsglob.h    |  4 ++--
 fs/cifs/cifsproto.h   |  6 ++----
 fs/cifs/cifssmb.c     |  6 ++++--
 fs/cifs/connect.c     | 20 ++++++++++++--------
 fs/cifs/md5.c         |  4 ++--
 fs/cifs/md5.h         |  8 ++++----
 fs/cifs/misc.c        |  2 +-
 fs/cifs/smbdes.c      |  4 ++--
 fs/cifs/smbencrypt.c  |  4 ++--
 13 files changed, 37 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
index d0776ac2b804..5eff35d6e564 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/cifs/cifsacl.h
@@ -31,8 +31,8 @@ struct cifs_sid {
 } __attribute__((packed));
 
 /* everyone */
-extern const struct cifs_sid sid_everyone;
+/* extern const struct cifs_sid sid_everyone;*/
 /* group users */
-extern const struct cifs_sid sid_user;
+/* extern const struct cifs_sid sid_user;*/
 
 #endif /* _CIFSACL_H */
diff --git a/fs/cifs/cifsencrypt.h b/fs/cifs/cifsencrypt.h
index 03e359b32861..152fa2dcfc6c 100644
--- a/fs/cifs/cifsencrypt.h
+++ b/fs/cifs/cifsencrypt.h
@@ -27,8 +27,6 @@ extern void mdfour(unsigned char *out, unsigned char *in, int n);
 /* smbdes.c */
 extern void E_P16(unsigned char *p14, unsigned char *p16);
 extern void E_P24(unsigned char *p21, unsigned char *c8, unsigned char *p24);
-extern void D_P16(unsigned char *p14, unsigned char *in, unsigned char *out);
-extern void E_old_pw_hash(unsigned char *, unsigned char *, unsigned char *);
 
 
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index f5ba41132488..cd17d4b78173 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -442,7 +442,7 @@ static int cifs_remount(struct super_block *sb, int *flags, char *data)
 	return 0;
 }
 
-struct super_operations cifs_super_ops = {
+static struct super_operations cifs_super_ops = {
 	.read_inode = cifs_read_inode,
 	.put_super = cifs_put_super,
 	.statfs = cifs_statfs,
@@ -930,7 +930,7 @@ init_cifs(void)
 #ifdef CONFIG_PROC_FS
 	cifs_proc_init();
 #endif
-	INIT_LIST_HEAD(&GlobalServerList);	/* BB not implemented yet */
+/*	INIT_LIST_HEAD(&GlobalServerList);*/	/* BB not implemented yet */
 	INIT_LIST_HEAD(&GlobalSMBSessionList);
 	INIT_LIST_HEAD(&GlobalTreeConnectionList);
 	INIT_LIST_HEAD(&GlobalOplock_Q);
@@ -958,6 +958,7 @@ init_cifs(void)
 	GlobalCurrentXid = 0;
 	GlobalTotalActiveXid = 0;
 	GlobalMaxActiveXid = 0;
+	memset(Local_System_Name, 0, 15);
 	rwlock_init(&GlobalSMBSeslock);
 	spin_lock_init(&GlobalMid_Lock);
 
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index bea875d9a46a..a243f779b363 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -36,7 +36,7 @@ extern const struct address_space_operations cifs_addr_ops;
 extern const struct address_space_operations cifs_addr_ops_smallbuf;
 
 /* Functions related to super block operations */
-extern struct super_operations cifs_super_ops;
+/* extern struct super_operations cifs_super_ops;*/
 extern void cifs_read_inode(struct inode *);
 extern void cifs_delete_inode(struct inode *);
 /* extern void cifs_write_inode(struct inode *); *//* BB not needed yet */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index b24006c47df1..441f8d2514fa 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -512,7 +512,8 @@ require use of the stronger protocol */
  * This list helps improve performance and eliminate the messages indicating
  * that we had a communications error talking to the server in this list. 
  */
-GLOBAL_EXTERN struct servers_not_supported *NotSuppList;	/*@z4a */
+/* Feature not supported */
+/* GLOBAL_EXTERN struct servers_not_supported *NotSuppList; */
 
 /*
  * The following is a hash table of all the users we know about.
@@ -568,7 +569,6 @@ GLOBAL_EXTERN unsigned int lookupCacheEnabled;
 GLOBAL_EXTERN unsigned int extended_security;	/* if on, session setup sent 
 				with more secure ntlmssp2 challenge/resp */
 GLOBAL_EXTERN unsigned int sign_CIFS_PDUs;  /* enable smb packet signing */
-GLOBAL_EXTERN unsigned int secFlags;
 GLOBAL_EXTERN unsigned int linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/
 GLOBAL_EXTERN unsigned int CIFSMaxBufSize;  /* max size not including hdr */
 GLOBAL_EXTERN unsigned int cifs_min_rcv;    /* min size of big ntwrk buf pool */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 2fbc982aa13d..7dd2f48a4073 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -50,11 +50,11 @@ extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *,
 extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *,
 			struct kvec *, int /* nvec to send */, 
 			int * /* type of buf returned */ , const int long_op);
-extern int SendReceiveBlockingLock(const unsigned int /* xid */ , struct cifsTconInfo *,
+extern int SendReceiveBlockingLock(const unsigned int /* xid */ , 
+					struct cifsTconInfo *,
 				struct smb_hdr * /* input */ ,
 				struct smb_hdr * /* out */ ,
 				int * /* bytes returned */);
-extern int checkSMBhdr(struct smb_hdr *smb, __u16 mid);
 extern int checkSMB(struct smb_hdr *smb, __u16 mid, int length);
 extern int is_valid_oplock_break(struct smb_hdr *smb, struct TCP_Server_Info *);
 extern int is_size_safe_to_change(struct cifsInodeInfo *);
@@ -282,8 +282,6 @@ extern void sesInfoFree(struct cifsSesInfo *);
 extern struct cifsTconInfo *tconInfoAlloc(void);
 extern void tconInfoFree(struct cifsTconInfo *);
 
-extern int cifs_reconnect(struct TCP_Server_Info *server);
-
 extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *,__u32 *);
 extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
 			  __u32 *);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 2851d6e0d823..dcd7087a1ae8 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2773,9 +2773,11 @@ GetExtAttrOut:
 
 
 /* security id for everyone */
-const struct cifs_sid sid_everyone = {1, 1, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0}};
+const static struct cifs_sid sid_everyone = 
+		{1, 1, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0}};
 /* group users */
-const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {32, 545, 0, 0}};
+const static struct cifs_sid sid_user = 
+		{1, 2 , {0, 0, 0, 0, 0, 5}, {32, 545, 0, 0}};
 
 /* Convert CIFS ACL to POSIX form */
 static int parse_sec_desc(struct cifs_sid * psec_desc, int acl_len)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0e9ba0b9d71e..b3268e53ab95 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -109,7 +109,7 @@ static int ipv6_connect(struct sockaddr_in6 *psin_server,
 	 * wake up waiters on reconnection? - (not needed currently)
 	 */
 
-int
+static int
 cifs_reconnect(struct TCP_Server_Info *server)
 {
 	int rc = 0;
@@ -771,13 +771,17 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
 	separator[0] = ',';
 	separator[1] = 0; 
 
-	memset(vol->source_rfc1001_name,0x20,15);
-	for(i=0;i < strnlen(system_utsname.nodename,15);i++) {
-		/* does not have to be a perfect mapping since the field is
-		informational, only used for servers that do not support
-		port 445 and it can be overridden at mount time */
-		vol->source_rfc1001_name[i] = 
-			toupper(system_utsname.nodename[i]);
+	if(Local_System_Name[0] != 0)
+		memcpy(vol->source_rfc1001_name, Local_System_Name,15);
+	else {
+		memset(vol->source_rfc1001_name,0x20,15);
+		for(i=0;i < strnlen(system_utsname.nodename,15);i++) {
+			/* does not have to be perfect mapping since field is
+			informational, only used for servers that do not support
+			port 445 and it can be overridden at mount time */
+			vol->source_rfc1001_name[i] = 
+				toupper(system_utsname.nodename[i]);
+		}
 	}
 	vol->source_rfc1001_name[15] = 0;
 	/* null target name indicates to use *SMBSERVR default called name
diff --git a/fs/cifs/md5.c b/fs/cifs/md5.c
index 7aa23490541f..273aa0383f27 100644
--- a/fs/cifs/md5.c
+++ b/fs/cifs/md5.c
@@ -255,7 +255,7 @@ MD5Transform(__u32 buf[4], __u32 const in[16])
 /***********************************************************************
  the rfc 2104 version of hmac_md5 initialisation.
 ***********************************************************************/
-void
+static void
 hmac_md5_init_rfc2104(unsigned char *key, int key_len,
 		      struct HMACMD5Context *ctx)
 {
@@ -350,7 +350,7 @@ hmac_md5_final(unsigned char *digest, struct HMACMD5Context *ctx)
  single function to calculate an HMAC MD5 digest from data.
  use the microsoft hmacmd5 init method because the key is 16 bytes.
 ************************************************************/
-void
+static void
 hmac_md5(unsigned char key[16], unsigned char *data, int data_len,
 	 unsigned char *digest)
 {
diff --git a/fs/cifs/md5.h b/fs/cifs/md5.h
index 00e1c5394fe1..f7d4f4197bac 100644
--- a/fs/cifs/md5.h
+++ b/fs/cifs/md5.h
@@ -27,12 +27,12 @@ void MD5Final(unsigned char digest[16], struct MD5Context *context);
 
 /* The following definitions come from lib/hmacmd5.c  */
 
-void hmac_md5_init_rfc2104(unsigned char *key, int key_len,
-			struct HMACMD5Context *ctx);
+/* void hmac_md5_init_rfc2104(unsigned char *key, int key_len,
+			struct HMACMD5Context *ctx);*/
 void hmac_md5_init_limK_to_64(const unsigned char *key, int key_len,
 			struct HMACMD5Context *ctx);
 void hmac_md5_update(const unsigned char *text, int text_len,
 			struct HMACMD5Context *ctx);
 void hmac_md5_final(unsigned char *digest, struct HMACMD5Context *ctx);
-void hmac_md5(unsigned char key[16], unsigned char *data, int data_len,
-			unsigned char *digest);
+/* void hmac_md5(unsigned char key[16], unsigned char *data, int data_len,
+			unsigned char *digest);*/
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 22c937e5884f..ca6e9b1413fa 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -389,7 +389,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
 	return;
 }
 
-int
+static int
 checkSMBhdr(struct smb_hdr *smb, __u16 mid)
 {
 	/* Make sure that this really is an SMB, that it is a response, 
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c
index efaa044523a7..2b193e422f83 100644
--- a/fs/cifs/smbdes.c
+++ b/fs/cifs/smbdes.c
@@ -364,14 +364,14 @@ E_P24(unsigned char *p21, unsigned char *c8, unsigned char *p24)
 	smbhash(p24 + 16, c8, p21 + 14, 1);
 }
 
-void
+static void
 D_P16(unsigned char *p14, unsigned char *in, unsigned char *out)
 {
 	smbhash(out, in, p14, 0);
 	smbhash(out + 8, in + 8, p14 + 7, 0);
 }
 
-void
+static void
 E_old_pw_hash(unsigned char *p14, unsigned char *in, unsigned char *out)
 {
 	smbhash(out, in, p14, 1);
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index f518c5e45035..c7e55a940e2e 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -145,7 +145,7 @@ E_md4hash(const unsigned char *passwd, unsigned char *p16)
 }
 
 /* Does both the NT and LM owfs of a user's password */
-void
+static void
 nt_lm_owf_gen(char *pwd, unsigned char nt_p16[16], unsigned char p16[16])
 {
 	char passwd[514];
@@ -223,7 +223,7 @@ SMBOWFencrypt(unsigned char passwd[16], unsigned char *c8,
 }
 
 /* Does the des encryption from the FIRST 8 BYTES of the NT or LM MD4 hash. */
-void
+static void
 NTLMSSPOWFencrypt(unsigned char passwd[8],
 		  unsigned char *ntlmchalresp, unsigned char p24[24])
 {
-- 
cgit v1.2.3


From e33c74d06e2b46a5f187ec7f60248da774c84e72 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 28 Sep 2006 20:35:48 +0000
Subject: [CIFS] Fix build break

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsfs.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index cd17d4b78173..51e888fcef2d 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -63,6 +63,7 @@ extern struct task_struct * oplockThread; /* remove sparse warning */
 struct task_struct * oplockThread = NULL;
 extern struct task_struct * dnotifyThread; /* remove sparse warning */
 struct task_struct * dnotifyThread = NULL;
+static struct super_operations cifs_super_ops; 
 unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE;
 module_param(CIFSMaxBufSize, int, 0);
 MODULE_PARM_DESC(CIFSMaxBufSize,"Network buffer size (not including header). Default: 16384 Range: 8192 to 130048");
-- 
cgit v1.2.3


From 2eaf55862e8eb03999169d84f21eadffc88a36ce Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 28 Sep 2006 20:41:48 +0000
Subject: [CIFS] Remove unused prototypes

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/smbencrypt.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index c7e55a940e2e..48314e5e785d 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -51,11 +51,8 @@
 
 void SMBencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24);
 void E_md4hash(const unsigned char *passwd, unsigned char *p16);
-void nt_lm_owf_gen(char *pwd, unsigned char nt_p16[16], unsigned char p16[16]);
 static void SMBOWFencrypt(unsigned char passwd[16], unsigned char *c8,
 		   unsigned char p24[24]);
-void NTLMSSPOWFencrypt(unsigned char passwd[8],
-		       unsigned char *ntlmchalresp, unsigned char p24[24]);
 void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24);
 
 /*
-- 
cgit v1.2.3


From e10847ed499cb86bf8ce12f3a686be8a98f8e140 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 28 Sep 2006 20:49:01 +0000
Subject: [CIFS] More removing of unused functions

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/md5.c        | 4 ++++
 fs/cifs/smbdes.c     | 2 +-
 fs/cifs/smbencrypt.c | 4 ++++
 3 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/md5.c b/fs/cifs/md5.c
index 273aa0383f27..e6a2097d836b 100644
--- a/fs/cifs/md5.c
+++ b/fs/cifs/md5.c
@@ -252,6 +252,7 @@ MD5Transform(__u32 buf[4], __u32 const in[16])
 	buf[3] += d;
 }
 
+#if 0   /* currently unused */
 /***********************************************************************
  the rfc 2104 version of hmac_md5 initialisation.
 ***********************************************************************/
@@ -289,6 +290,7 @@ hmac_md5_init_rfc2104(unsigned char *key, int key_len,
 	MD5Init(&ctx->ctx);
 	MD5Update(&ctx->ctx, ctx->k_ipad, 64);
 }
+#endif
 
 /***********************************************************************
  the microsoft version of hmac_md5 initialisation.
@@ -333,6 +335,7 @@ hmac_md5_update(const unsigned char *text, int text_len,
 /***********************************************************************
  finish off hmac_md5 "inner" buffer and generate outer one.
 ***********************************************************************/
+#if 0   /* currently unused */
 void
 hmac_md5_final(unsigned char *digest, struct HMACMD5Context *ctx)
 {
@@ -361,3 +364,4 @@ hmac_md5(unsigned char key[16], unsigned char *data, int data_len,
 	}
 	hmac_md5_final(digest, &ctx);
 }
+#endif
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c
index 2b193e422f83..7a1b2b961ec8 100644
--- a/fs/cifs/smbdes.c
+++ b/fs/cifs/smbdes.c
@@ -364,6 +364,7 @@ E_P24(unsigned char *p21, unsigned char *c8, unsigned char *p24)
 	smbhash(p24 + 16, c8, p21 + 14, 1);
 }
 
+#if 0 /* currently unsued */
 static void
 D_P16(unsigned char *p14, unsigned char *in, unsigned char *out)
 {
@@ -377,7 +378,6 @@ E_old_pw_hash(unsigned char *p14, unsigned char *in, unsigned char *out)
 	smbhash(out, in, p14, 1);
 	smbhash(out + 8, in + 8, p14 + 7, 1);
 }
-#if 0
 /* these routines are currently unneeded, but may be
 	needed later */
 void
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index 48314e5e785d..4b25ba92180d 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -141,6 +141,7 @@ E_md4hash(const unsigned char *passwd, unsigned char *p16)
 	memset(wpwd,0,129 * 2);
 }
 
+#if 0 /* currently unused */
 /* Does both the NT and LM owfs of a user's password */
 static void
 nt_lm_owf_gen(char *pwd, unsigned char nt_p16[16], unsigned char p16[16])
@@ -168,6 +169,7 @@ nt_lm_owf_gen(char *pwd, unsigned char nt_p16[16], unsigned char p16[16])
 	/* clear out local copy of user's password (just being paranoid). */
 	memset(passwd, '\0', sizeof (passwd));
 }
+#endif
 
 /* Does the NTLMv2 owfs of a user's password */
 #if 0  /* function not needed yet - but will be soon */
@@ -220,6 +222,7 @@ SMBOWFencrypt(unsigned char passwd[16], unsigned char *c8,
 }
 
 /* Does the des encryption from the FIRST 8 BYTES of the NT or LM MD4 hash. */
+#if 0 /* currently unused */
 static void
 NTLMSSPOWFencrypt(unsigned char passwd[8],
 		  unsigned char *ntlmchalresp, unsigned char p24[24])
@@ -232,6 +235,7 @@ NTLMSSPOWFencrypt(unsigned char passwd[8],
 
 	E_P24(p21, ntlmchalresp, p24);
 }
+#endif
 
 /* Does the NT MD4 hash then des encryption. */
 
-- 
cgit v1.2.3


From a3ab41f10e2f5087e515da358680c88dd61d4832 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 28 Sep 2006 20:52:08 +0000
Subject: [CIFS] Fix build break ifdef in wrong place

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/md5.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/md5.c b/fs/cifs/md5.c
index e6a2097d836b..ccebf9b7eb86 100644
--- a/fs/cifs/md5.c
+++ b/fs/cifs/md5.c
@@ -335,7 +335,6 @@ hmac_md5_update(const unsigned char *text, int text_len,
 /***********************************************************************
  finish off hmac_md5 "inner" buffer and generate outer one.
 ***********************************************************************/
-#if 0   /* currently unused */
 void
 hmac_md5_final(unsigned char *digest, struct HMACMD5Context *ctx)
 {
@@ -353,6 +352,7 @@ hmac_md5_final(unsigned char *digest, struct HMACMD5Context *ctx)
  single function to calculate an HMAC MD5 digest from data.
  use the microsoft hmacmd5 init method because the key is 16 bytes.
 ************************************************************/
+#if 0 /* currently unused */
 static void
 hmac_md5(unsigned char key[16], unsigned char *data, int data_len,
 	 unsigned char *digest)
-- 
cgit v1.2.3


From bf97d28711e2dc4dc947faa6477cd1b36b91a2da Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 28 Sep 2006 21:34:06 +0000
Subject: [CIFS] CIFS support for /proc/<pid>/mountstats part 1

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsfs.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 51e888fcef2d..7ecfcbf31e55 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -437,6 +437,14 @@ static void cifs_umount_begin(struct vfsmount * vfsmnt, int flags)
 	return;
 }
 
+#ifdef CONFIG_CIFS_STATS2
+static int cifs_show_stats(struct seq_file *s, struct vfsmount *mnt)
+{
+	/* BB FIXME */
+	return 0;
+}
+#endif
+
 static int cifs_remount(struct super_block *sb, int *flags, char *data)
 {
 	*flags |= MS_NODIRATIME;
@@ -456,6 +464,9 @@ static struct super_operations cifs_super_ops = {
 	.show_options = cifs_show_options,
 	.umount_begin   = cifs_umount_begin,
 	.remount_fs = cifs_remount,
+#ifdef CONFIG_CIFS_STATS2
+	cifs_show_stats,
+#endif
 };
 
 static int
-- 
cgit v1.2.3


From 25ee4a98c662317a7973f3053567d4ec51857511 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Sat, 30 Sep 2006 00:54:23 +0000
Subject: [CIFS] Handle legacy servers which return undefined time zone

Signed-off-by: Guenter Kukkukk <linux@kukkukk.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifssmb.c | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index dcd7087a1ae8..99718591ea29 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -447,6 +447,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 #ifdef CONFIG_CIFS_WEAK_PW_HASH 
 	} else if((pSMBr->hdr.WordCount == 13)
 			&& (pSMBr->DialectIndex == LANMAN_PROT)) {
+		int tmp, adjust;
 		struct lanman_neg_rsp * rsp = (struct lanman_neg_rsp *)pSMBr;
 
 		if((secFlags & CIFSSEC_MAY_LANMAN) || 
@@ -473,11 +474,36 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 			server->capabilities = CAP_MPX_MODE;
 		}
 		server->timeZone = le16_to_cpu(rsp->ServerTimeZone);
+		tmp = le16_to_cpu(rsp->ServerTimeZone);
+		if (tmp == (int)0xffff) {
+			/* OS/2 often does not set timezone therefore
+			 * we must use server time to calc time zone.
+			 * Could deviate slightly from the right zone. Not easy
+			 * to adjust, since timezones are not always a multiple
+			 * of 60 (sometimes 30 minutes - are there smaller?)
+			 */
+			struct timespec ts, utc;
+			utc = CURRENT_TIME;
+			ts = cnvrtDosUnixTm(le16_to_cpu(rsp->SrvTime.Date),
+						le16_to_cpu(rsp->SrvTime.Time));
+			cFYI(1,("SrvTime: %d sec since 1970 (utc: %d) diff: %d",
+				(int)ts.tv_sec, (int)utc.tv_sec, 
+				(int)(utc.tv_sec - ts.tv_sec)));
+			tmp = (int)(utc.tv_sec - ts.tv_sec);
+			adjust = tmp < 0 ? -29 : 29;
+			tmp = ((tmp + adjust) / 60) * 60;
+			server->timeZone = tmp;
+		} else {
+			server->timeZone = tmp * 60; /* also in seconds */
+		}
+		cFYI(1,("server->timeZone: %d seconds", server->timeZone));
+
 
 		/* BB get server time for time conversions and add
 		code to use it and timezone since this is not UTC */	
 
-		if (rsp->EncryptionKeyLength == cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) {
+		if (rsp->EncryptionKeyLength == 
+				cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) {
 			memcpy(server->cryptKey, rsp->EncryptionKey,
 				CIFS_CRYPTO_KEY_SIZE);
 		} else if (server->secMode & SECMODE_PW_ENCRYPT) {
-- 
cgit v1.2.3


From 175ec9e11cf18f8373b32f7a33e75a4cf7ce25e3 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Sat, 30 Sep 2006 01:07:38 +0000
Subject: [CIFS] Rename server time zone field

Server time zone is not really a time zone, rather a time adjustement
in seconds.

CC: Guenter Kukkukk <linux@kukkukk.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsfs.c   | 2 +-
 fs/cifs/cifsglob.h | 2 +-
 fs/cifs/cifspdu.h  | 5 ++++-
 fs/cifs/cifssmb.c  | 9 ++++-----
 fs/cifs/connect.c  | 9 +++++----
 5 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 7ecfcbf31e55..e7641f9a13bb 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -465,7 +465,7 @@ static struct super_operations cifs_super_ops = {
 	.umount_begin   = cifs_umount_begin,
 	.remount_fs = cifs_remount,
 #ifdef CONFIG_CIFS_STATS2
-	cifs_show_stats,
+	.cifs_show_stats,
 #endif
 };
 
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 441f8d2514fa..98eb5446e8c1 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -153,7 +153,7 @@ struct TCP_Server_Info {
 	char sessid[4];		/* unique token id for this session */
 	/* (returned on Negotiate */
 	int capabilities; /* allow selective disabling of caps by smb sess */
-	__u16 timeZone;
+	__u16 timeAdj;  /* Adjust for difference in server time zone in sec */
 	__u16 CurrentMid;         /* multiplex id - rotating counter */
 	char cryptKey[CIFS_CRYPTO_KEY_SIZE];
 	/* 16th byte of RFC1001 workstation name is always null */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 81df2bf8e75a..e5dd8708d636 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -417,7 +417,10 @@ typedef struct lanman_neg_rsp {
 	__le16 MaxNumberVcs;
 	__le16 RawMode;
 	__le32 SessionKey;
-	__le32 ServerTime;
+	struct {
+		__le16 Time;
+		__le16 Date;
+	} __attribute__((packed)) SrvTime;
 	__le16 ServerTimeZone;
 	__le16 EncryptionKeyLength;
 	__le16 Reserved;
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 99718591ea29..6e004587fa48 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -473,7 +473,6 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 			server->maxRw = 0;/* we do not need to use raw anyway */
 			server->capabilities = CAP_MPX_MODE;
 		}
-		server->timeZone = le16_to_cpu(rsp->ServerTimeZone);
 		tmp = le16_to_cpu(rsp->ServerTimeZone);
 		if (tmp == (int)0xffff) {
 			/* OS/2 often does not set timezone therefore
@@ -492,11 +491,11 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 			tmp = (int)(utc.tv_sec - ts.tv_sec);
 			adjust = tmp < 0 ? -29 : 29;
 			tmp = ((tmp + adjust) / 60) * 60;
-			server->timeZone = tmp;
+			server->timeAdj = tmp;
 		} else {
-			server->timeZone = tmp * 60; /* also in seconds */
+			server->timeAdj = tmp * 60; /* also in seconds */
 		}
-		cFYI(1,("server->timeZone: %d seconds", server->timeZone));
+		cFYI(1,("server->timeAdj: %d seconds", server->timeAdj));
 
 
 		/* BB get server time for time conversions and add
@@ -557,7 +556,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 	cFYI(0, ("Max buf = %d", ses->server->maxBuf));
 	GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey);
 	server->capabilities = le32_to_cpu(pSMBr->Capabilities);
-	server->timeZone = le16_to_cpu(pSMBr->ServerTimeZone);	
+	server->timeAdj = le16_to_cpu(pSMBr->ServerTimeZone) * 60;	
 	if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) {
 		memcpy(server->cryptKey, pSMBr->u.EncryptionKey,
 		       CIFS_CRYPTO_KEY_SIZE);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index b3268e53ab95..083b2b2c1571 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3320,15 +3320,16 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
 		if(linuxExtEnabled == 0)
 			pSesInfo->capabilities &= (~CAP_UNIX);
 	/*	pSesInfo->sequence_number = 0;*/
-		cFYI(1,("Security Mode: 0x%x Capabilities: 0x%x Time Zone: %d",
+		cFYI(1,("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d",
 			pSesInfo->server->secMode,
 			pSesInfo->server->capabilities,
-			pSesInfo->server->timeZone));
+			pSesInfo->server->timeAdj));
 		if(experimEnabled < 2)
 			rc = CIFS_SessSetup(xid, pSesInfo,
 					    first_time, nls_info);
 		else if (extended_security
-				&& (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
+				&& (pSesInfo->capabilities 
+					& CAP_EXTENDED_SECURITY)
 				&& (pSesInfo->server->secType == NTLMSSP)) {
 			rc = -EOPNOTSUPP;
 		} else if (extended_security
@@ -3342,7 +3343,7 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
 			if (!rc) {
 				if(ntlmv2_flag) {
 					char * v2_response;
-					cFYI(1,("Can use more secure NTLM version 2 password hash"));
+					cFYI(1,("more secure NTLM ver2 hash"));
 					if(CalcNTLMv2_partial_mac_key(pSesInfo, 
 						nls_info)) {
 						rc = -ENOMEM;
-- 
cgit v1.2.3


From f46d3e11903e452924ef2996aa9aca2aae4427e2 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Sat, 30 Sep 2006 01:08:55 +0000
Subject: [CIFS] Fix typo in name of new cifs_show_stats

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index e7641f9a13bb..ca53720fa5b1 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -465,7 +465,7 @@ static struct super_operations cifs_super_ops = {
 	.umount_begin   = cifs_umount_begin,
 	.remount_fs = cifs_remount,
 #ifdef CONFIG_CIFS_STATS2
-	.cifs_show_stats,
+	.show_stats = cifs_show_stats,
 #endif
 };
 
-- 
cgit v1.2.3


From 9ac00b7d96045fa3ce573e0ad5cdc0350ad8e1d2 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Sat, 30 Sep 2006 04:13:17 +0000
Subject: [CIFS] Do not send newer QFSInfo to legacy servers which can not
 support it

Fix dialect negotiation to save off when we have negotiated lanman.
This allows us to avoid sending some somewhat newer requests that the server
can not handle and go directly to the older version (infolevel) of the same
call. Make sure we try to negotiate a level which allows us to get the
server OS (which we check so we can detect Win9x vs. other legacy servers
and eventually work around the Win9x DOS time bug (they reverse date/time
fields).

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsfs.c   |  8 +++++---
 fs/cifs/cifsglob.h |  9 +++++++--
 fs/cifs/cifspdu.h  |  3 ++-
 fs/cifs/cifssmb.c  |  8 +++++---
 fs/cifs/connect.c  |  1 +
 fs/cifs/sess.c     | 23 ++++++++++++-----------
 6 files changed, 32 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index ca53720fa5b1..d6d226addde2 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -199,10 +199,12 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
     /* Only need to call the old QFSInfo if failed
     on newer one */
     if(rc)
-	rc = CIFSSMBQFSInfo(xid, pTcon, buf);
+	if((pTcon->ses->flags & CIFS_SES_LANMAN) == 0)
+		rc = CIFSSMBQFSInfo(xid, pTcon, buf); /* not supported by OS2 */
 
-	/* Old Windows servers do not support level 103, retry with level 
-	   one if old server failed the previous call */ 
+	/* Some old Windows servers also do not support level 103, retry with
+	   older level one if old server failed the previous call or we
+	   bypassed it because we detected that this was an older LANMAN sess */
 	if(rc)
 		rc = SMBOldQFSInfo(xid, pTcon, buf);
 	/*     
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 98eb5446e8c1..597afdf4c69c 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -203,9 +203,14 @@ struct cifsSesInfo {
 	char * domainName;
 	char * password;
 };
-/* session flags */
+/* no more than one of the following three session flags may be set */
 #define CIFS_SES_NT4 1
-
+#define CIFS_SES_OS2 2
+#define CIFS_SES_W9X 4
+/* following flag is set for old servers such as OS2 (and Win95?)
+   which do not negotiate NTLM or POSIX dialects, but instead
+   negotiate one of the older LANMAN dialects */
+#define CIFS_SES_LANMAN 8
 /*
  * there is one of these for each connection to a resource on a particular
  * session 
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index e5dd8708d636..50505422dbb4 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -26,7 +26,8 @@
 
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 #define LANMAN_PROT 0
-#define CIFS_PROT   1
+#define LANMAN2_PROT 1
+#define CIFS_PROT   2
 #else
 #define CIFS_PROT   0
 #endif
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 6e004587fa48..f2fa05bbcb47 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -46,6 +46,7 @@ static struct {
 } protocols[] = {
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 	{LANMAN_PROT, "\2LM1.2X002"},
+	{LANMAN2_PROT, "\2LANMAN2.1"},
 #endif /* weak password hashing for legacy clients */
 	{CIFS_PROT, "\2NT LM 0.12"}, 
 	{POSIX_PROT, "\2POSIX 2"},
@@ -67,13 +68,13 @@ static struct {
 /* define the number of elements in the cifs dialect array */
 #ifdef CONFIG_CIFS_POSIX
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
-#define CIFS_NUM_PROT 3
+#define CIFS_NUM_PROT 4
 #else
 #define CIFS_NUM_PROT 2
 #endif /* CIFS_WEAK_PW_HASH */
 #else /* not posix */
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
-#define CIFS_NUM_PROT 2
+#define CIFS_NUM_PROT 3
 #else
 #define CIFS_NUM_PROT 1
 #endif /* CONFIG_CIFS_WEAK_PW_HASH */
@@ -446,7 +447,8 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 		goto neg_err_exit;
 #ifdef CONFIG_CIFS_WEAK_PW_HASH 
 	} else if((pSMBr->hdr.WordCount == 13)
-			&& (pSMBr->DialectIndex == LANMAN_PROT)) {
+			&& ((pSMBr->DialectIndex == LANMAN_PROT)
+				|| (pSMBr->DialectIndex == LANMAN2_PROT))) {
 		int tmp, adjust;
 		struct lanman_neg_rsp * rsp = (struct lanman_neg_rsp *)pSMBr;
 
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 083b2b2c1571..c96f3edf1b9c 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3316,6 +3316,7 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
 		first_time = 1;
 	}
 	if (!rc) {
+		pSesInfo->flags = 0;
 		pSesInfo->capabilities = pSesInfo->server->capabilities;
 		if(linuxExtEnabled == 0)
 			pSesInfo->capabilities &= (~CAP_UNIX);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index d1705ab8136e..e4c4e466e320 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -268,6 +268,10 @@ static int decode_ascii_ssetup(char ** pbcc_area, int bleft, struct cifsSesInfo
 	ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
 	if(ses->serverOS)
 		strncpy(ses->serverOS, bcc_ptr, len);
+	if(strncmp(ses->serverOS, "OS/2",4) == 0) {
+			cFYI(1,("OS/2 server"));
+			ses->flags |= CIFS_SES_OS2;
+	}
 
 	bcc_ptr += len + 1;
 	bleft -= len + 1;
@@ -290,16 +294,11 @@ static int decode_ascii_ssetup(char ** pbcc_area, int bleft, struct cifsSesInfo
         if(len > bleft)
                 return rc;
 
-        if(ses->serverDomain)
-                kfree(ses->serverDomain);
-
-        ses->serverDomain = kzalloc(len + 1, GFP_KERNEL);
-        if(ses->serverOS)
-                strncpy(ses->serverOS, bcc_ptr, len);
-
-        bcc_ptr += len + 1;
-	bleft -= len + 1;
-
+	/* No domain field in LANMAN case. Domain is
+	   returned by old servers in the SMB negprot response */
+	/* BB For newer servers which do not support Unicode,
+	   but thus do return domain here we could add parsing
+	   for it later, but it is not very important */
 	cFYI(1,("ascii: bytes left %d",bleft));
 
 	return rc;
@@ -366,6 +365,8 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
 	str_area = kmalloc(2000, GFP_KERNEL);
 	bcc_ptr = str_area;
 
+	ses->flags &= ~CIFS_SES_LANMAN;
+
 	if(type == LANMAN) {
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 		char lnm_session_key[CIFS_SESS_KEY_SIZE];
@@ -377,7 +378,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
 		/* and copy into bcc */
 
 		calc_lanman_hash(ses, lnm_session_key);
-
+		ses->flags |= CIFS_SES_LANMAN; 
 /* #ifdef CONFIG_CIFS_DEBUG2
 		cifs_dump_mem("cryptkey: ",ses->server->cryptKey,
 			CIFS_SESS_KEY_SIZE);
-- 
cgit v1.2.3


From de7ed55dbb2f2c44be669d56c4adf28cbffb26ce Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Sat, 30 Sep 2006 13:25:52 +0000
Subject: [CIFS] Make use of newer QFSInfo dependent on capability bit instead
 of whether we negotiated legacy lanman dialect so we do not keep retrying for
 mount to WindowsME

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsfs.c   | 2 +-
 fs/cifs/cifsglob.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index d6d226addde2..43364361276e 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -199,7 +199,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
     /* Only need to call the old QFSInfo if failed
     on newer one */
     if(rc)
-	if((pTcon->ses->flags & CIFS_SES_LANMAN) == 0)
+	if(pTcon->ses->capabilities & CAP_NT_SMBS)
 		rc = CIFSSMBQFSInfo(xid, pTcon, buf); /* not supported by OS2 */
 
 	/* Some old Windows servers also do not support level 103, retry with
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 597afdf4c69c..74d3ccbb103b 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -153,7 +153,7 @@ struct TCP_Server_Info {
 	char sessid[4];		/* unique token id for this session */
 	/* (returned on Negotiate */
 	int capabilities; /* allow selective disabling of caps by smb sess */
-	__u16 timeAdj;  /* Adjust for difference in server time zone in sec */
+	int timeAdj;  /* Adjust for difference in server time zone in sec */
 	__u16 CurrentMid;         /* multiplex id - rotating counter */
 	char cryptKey[CIFS_CRYPTO_KEY_SIZE];
 	/* 16th byte of RFC1001 workstation name is always null */
-- 
cgit v1.2.3


From 18f75ca0dc0d5b6a2ec15d89d517b3c67e0f1c87 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Sun, 1 Oct 2006 03:13:01 +0000
Subject: [CIFS] Allow LANMAN21 support even in both POSIX non-POSIX path

Signed-off-by: Guenter Kukkukk <linux@kukkukk.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifssmb.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index f2fa05bbcb47..75f060328e29 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -59,6 +59,7 @@ static struct {
 } protocols[] = {
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 	{LANMAN_PROT, "\2LM1.2X002"},
+	{LANMAN2_PROT, "\2LANMAN2.1"},
 #endif /* weak password hashing for legacy clients */
 	{CIFS_PROT, "\2NT LM 0.12"}, 
 	{BAD_PROT, "\2"}
-- 
cgit v1.2.3


From 203cf2fc13a5db1fb202c294948fa9cb43bf69fa Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Sun, 1 Oct 2006 19:59:41 +0000
Subject: [CIFS] Fix readdir of large directories for backlevel servers

(were not setting all of resume key)

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/readdir.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 71e86c38e632..b0e5db10664c 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -946,6 +946,7 @@ static int cifs_save_resume_key(const char *current_entry,
 		filename = &pFindData->FileName[0];
 		/* one byte length, no name conversion */
 		len = (unsigned int)pFindData->FileNameLength;
+		cifsFile->srch_inf.resume_key = pFindData->ResumeKey;
 	} else {
 		cFYI(1,("Unknown findfirst level %d",level));
 		return -EINVAL;
-- 
cgit v1.2.3


From b815f1e559e7cbdf3e561cf0c7cffc4a4a57a013 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Mon, 2 Oct 2006 05:53:29 +0000
Subject: [CIFS] Allow for 15 minute TZs (e.g. Nepal) and be more explicit
 about not setting time on close

Signed-off-by: Guenter Kukkukk <linux@kukkukk.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifspdu.h |  4 +++-
 fs/cifs/cifssmb.c | 35 ++++++++++++++++++++++-------------
 2 files changed, 25 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 50505422dbb4..6df9dadba647 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -409,6 +409,8 @@ typedef struct negotiate_req {
 
 /* Dialect index is 13 for LANMAN */
 
+#define MIN_TZ_ADJ (15 * 60) /* minimum grid for timezones in seconds */
+
 typedef struct lanman_neg_rsp {
 	struct smb_hdr hdr;	/* wct = 13 */
 	__le16 DialectIndex;
@@ -678,7 +680,7 @@ typedef union smb_com_tree_disconnect {	/* as an altetnative can use flag on
 typedef struct smb_com_close_req {
 	struct smb_hdr hdr;	/* wct = 3 */
 	__u16 FileID;
-	__u32 LastWriteTime;	/* should be zero */
+	__u32 LastWriteTime;	/* should be zero or -1 */
 	__u16 ByteCount;	/* 0 */
 } __attribute__((packed)) CLOSE_REQ;
 
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 75f060328e29..8d30a5c4f244 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -450,7 +450,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 	} else if((pSMBr->hdr.WordCount == 13)
 			&& ((pSMBr->DialectIndex == LANMAN_PROT)
 				|| (pSMBr->DialectIndex == LANMAN2_PROT))) {
-		int tmp, adjust;
+		__s16 tmp;
 		struct lanman_neg_rsp * rsp = (struct lanman_neg_rsp *)pSMBr;
 
 		if((secFlags & CIFSSEC_MAY_LANMAN) || 
@@ -476,14 +476,16 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 			server->maxRw = 0;/* we do not need to use raw anyway */
 			server->capabilities = CAP_MPX_MODE;
 		}
-		tmp = le16_to_cpu(rsp->ServerTimeZone);
-		if (tmp == (int)0xffff) {
+		tmp = (__s16)le16_to_cpu(rsp->ServerTimeZone);
+		if (tmp == 0xffff) {
 			/* OS/2 often does not set timezone therefore
 			 * we must use server time to calc time zone.
-			 * Could deviate slightly from the right zone. Not easy
-			 * to adjust, since timezones are not always a multiple
-			 * of 60 (sometimes 30 minutes - are there smaller?)
+			 * Could deviate slightly from the right zone.
+			 * Smallest defined timezone difference is 15 minutes
+			 * (i.e. Nepal).  Rounding up/down is done to match
+			 * this requirement.
 			 */
+			int val, seconds, remain, result;
 			struct timespec ts, utc;
 			utc = CURRENT_TIME;
 			ts = cnvrtDosUnixTm(le16_to_cpu(rsp->SrvTime.Date),
@@ -491,12 +493,18 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 			cFYI(1,("SrvTime: %d sec since 1970 (utc: %d) diff: %d",
 				(int)ts.tv_sec, (int)utc.tv_sec, 
 				(int)(utc.tv_sec - ts.tv_sec)));
-			tmp = (int)(utc.tv_sec - ts.tv_sec);
-			adjust = tmp < 0 ? -29 : 29;
-			tmp = ((tmp + adjust) / 60) * 60;
-			server->timeAdj = tmp;
+			val = (int)(utc.tv_sec - ts.tv_sec);
+			seconds = val < 0 ? -val : val;
+			result = (seconds / IN_TZ_ADJ) * MIN_TZ_ADJ;
+			remain = seconds % MIN_TZ_ADJ;
+			if(remain >= (MIN_TZ_ADJ / 2))
+				result += MIN_TZ_ADJ;
+			if(val < 0)
+				result = - result;
+			server->timeAdj = result;
 		} else {
-			server->timeAdj = tmp * 60; /* also in seconds */
+			server->timeAdj = (int)tmp;
+			server->timeAdj *= 60; /* also in seconds */
 		}
 		cFYI(1,("server->timeAdj: %d seconds", server->timeAdj));
 
@@ -559,7 +567,8 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 	cFYI(0, ("Max buf = %d", ses->server->maxBuf));
 	GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey);
 	server->capabilities = le32_to_cpu(pSMBr->Capabilities);
-	server->timeAdj = le16_to_cpu(pSMBr->ServerTimeZone) * 60;	
+	server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone);
+	server->timeAdj *= 60;
 	if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) {
 		memcpy(server->cryptKey, pSMBr->u.EncryptionKey,
 		       CIFS_CRYPTO_KEY_SIZE);
@@ -1645,7 +1654,7 @@ CIFSSMBClose(const int xid, struct cifsTconInfo *tcon, int smb_file_id)
 	pSMBr = (CLOSE_RSP *)pSMB; /* BB removeme BB */
 
 	pSMB->FileID = (__u16) smb_file_id;
-	pSMB->LastWriteTime = 0;
+	pSMB->LastWriteTime = 0xFFFFFFFF;
 	pSMB->ByteCount = 0;
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
-- 
cgit v1.2.3


From 947a50679570ef7a66e3e3107e95943a1cb14d08 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Mon, 2 Oct 2006 05:55:25 +0000
Subject: [CIFS] Fix typo

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifssmb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 8d30a5c4f244..005fb315477c 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -495,7 +495,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 				(int)(utc.tv_sec - ts.tv_sec)));
 			val = (int)(utc.tv_sec - ts.tv_sec);
 			seconds = val < 0 ? -val : val;
-			result = (seconds / IN_TZ_ADJ) * MIN_TZ_ADJ;
+			result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ;
 			remain = seconds % MIN_TZ_ADJ;
 			if(remain >= (MIN_TZ_ADJ / 2))
 				result += MIN_TZ_ADJ;
-- 
cgit v1.2.3


From 1a70d6529ad9f5978af846440f8a809784d6e813 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Mon, 2 Oct 2006 05:59:18 +0000
Subject: [CIFS] Fix compiler warning with previous patch

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifssmb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 005fb315477c..79a01d35a783 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -477,7 +477,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 			server->capabilities = CAP_MPX_MODE;
 		}
 		tmp = (__s16)le16_to_cpu(rsp->ServerTimeZone);
-		if (tmp == 0xffff) {
+		if (tmp == -1) {
 			/* OS/2 often does not set timezone therefore
 			 * we must use server time to calc time zone.
 			 * Could deviate slightly from the right zone.
-- 
cgit v1.2.3


From 268f3be177ce93791da38facc34126b5038cd851 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Fri, 6 Oct 2006 21:47:09 +0000
Subject: [CIFS] readdir (ffirst) enablement of accurate timestamps from legacy
 servers

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/netmisc.c | 39 ++++++++++++++++++---------------------
 fs/cifs/readdir.c | 18 +++++++++---------
 2 files changed, 27 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index fa5124d9af19..3d86b31cf2e4 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -918,45 +918,42 @@ __le64 cnvrtDosCifsTm(__u16 date, __u16 time)
 {
 	return cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm(date, time)));
 }
+
 struct timespec cnvrtDosUnixTm(__u16 date, __u16 time)
 {
-	__u8  dt[2];
-	__u8  tm[2];
 	struct timespec ts;
-	int sec,min, days, month, year;
-	struct timespec removeme; /* BB removeme BB */
-/*	SMB_TIME * st = (SMB_TIME *)&time;*/
+	int sec, min, days, month, year;
+	SMB_TIME * st = (SMB_TIME *)&time;
+	SMB_DATE * sd = (SMB_DATE *)&date;
 
 	cFYI(1,("date %d time %d",date, time));
 
-	dt[0] = date & 0xFF;
-	dt[1] = (date & 0xFF00) >> 8;
-	tm[0] = time & 0xFF;
-	tm[1] = (time & 0xFF00) >> 8;
-
-	sec = tm[0] & 0x1F;
-	sec = 2 * sec;
-	min = ((tm[0] >>5)&0xFF) + ((tm[1] & 0x7)<<3);
-
+	sec = 2 * st->TwoSeconds;
+	min = st->Minutes;
+	if((sec > 59) || (min > 59))
+		cERROR(1,("illegal time min %d sec %d", min, sec));
 	sec += (min * 60);
-	sec += 60 * 60 * ((tm[1] >> 3) &0xFF) /* hours */;
-	days = (dt[0] & 0x1F) - 1;
-	month = ((dt[0] >> 5) & 0xFF) + ((dt[1] & 0x1) <<3);
-	if(month > 12)
-		cERROR(1,("illegal month %d in date", month));
+	sec += 60 * 60 * st->Hours;
+	if(st->Hours > 24)
+		cERROR(1,("illegal hours %d",st->Hours));
+	days = sd->Day;
+	month = sd->Month;
+	if((days > 31) || (month > 12))
+		cERROR(1,("illegal date, month %d day: %d", month, days));
 	month -= 1;
 	days += total_days_of_prev_months[month];
 	days += 3653; /* account for difference in days between 1980 and 1970 */
-	year = (dt[1]>>1) & 0xFF;
+	year = sd->Year;
 	days += year * 365;
 	days += (year/4); /* leap year */
 	/* adjust for leap year where we are still before leap day */
 	days -= ((year & 0x03) == 0) && (month < 2 ? 1 : 0);
 	sec += 24 * 60 * 60 * days; 
 
-	removeme = CURRENT_TIME; /* BB removeme BB */
 	ts.tv_sec = sec;
 
+	/* cFYI(1,("sec after cnvrt dos to unix time %d",sec)); */
+
 	ts.tv_nsec = 0;
 	return ts;
 } 
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index b0e5db10664c..81e7b2e5fb4d 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -139,19 +139,19 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
 		FIND_FILE_STANDARD_INFO * pfindData = 
 			(FIND_FILE_STANDARD_INFO *)buf;
 
-/*		ts = cnvrtDosUnixTm(
+		tmp_inode->i_mtime = cnvrtDosUnixTm(
 				le16_to_cpu(pfindData->LastWriteDate),
-				le16_to_cpu(pfindData->LastWriteTime));*/
+				le16_to_cpu(pfindData->LastWriteTime));
+		tmp_inode->i_atime = cnvrtDosUnixTm(
+				le16_to_cpu(pfindData->LastAccessDate),
+				le16_to_cpu(pfindData->LastAccessTime));
+                tmp_inode->i_ctime = cnvrtDosUnixTm(
+                                le16_to_cpu(pfindData->LastWriteDate),
+                                le16_to_cpu(pfindData->LastWriteTime));
+
 		attr = le16_to_cpu(pfindData->Attributes);
 		allocation_size = le32_to_cpu(pfindData->AllocationSize);
 		end_of_file = le32_to_cpu(pfindData->DataSize);
-		/* do not need to use current_fs_time helper function since
-		 time not stored for this case so atime can not "go backwards"
-		 by pulling newer older from disk when inode refrenshed */
-		tmp_inode->i_atime = CURRENT_TIME;
-		/* tmp_inode->i_mtime =  BB FIXME - add dos time handling
-		tmp_inode->i_ctime = 0;   BB FIXME */
-
 	}
 
 	/* Linux can not store file creation time unfortunately so ignore it */
-- 
cgit v1.2.3


From 438dd926260f11ff01fc3441ac6dd4c412d20ea4 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Wed, 11 Oct 2006 03:49:30 +0000
Subject: [CIFS] Fix leaps year calculation for years after 2100

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/netmisc.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 3d86b31cf2e4..32562d199552 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -946,6 +946,15 @@ struct timespec cnvrtDosUnixTm(__u16 date, __u16 time)
 	year = sd->Year;
 	days += year * 365;
 	days += (year/4); /* leap year */
+	/* generalized leap year calculation is more complex, ie no leap year
+	for years/100 except for years/400, but since the maximum number for DOS
+	 year is 2**7, the last year is 1980+127, which means we need only
+	 consider 2 special case years, ie the years 2000 and 2100, and only
+	 adjust for the lack of leap year for the year 2100, as 2000 was a 
+	 leap year (divisable by 400) */
+	if(year >= 120)  /* the year 2100 */
+		days = days - 1;  /* do not count leap year for the year 2100 */
+
 	/* adjust for leap year where we are still before leap day */
 	days -= ((year & 0x03) == 0) && (month < 2 ? 1 : 0);
 	sec += 24 * 60 * 60 * days; 
-- 
cgit v1.2.3


From 41716c7c21b15e7ecf14f0caf1eef3980707fb74 Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Wed, 11 Oct 2006 01:20:37 -0700
Subject: [PATCH] null dereference in fs/jbd/journal.c

Since commit d1807793e1e7e502e3dc047115e9dbc3b50e4534 we dereference a NULL
pointer.  Coverity id #1432.  We set journal to NULL, and use it directly
afterwards.

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd/journal.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index c518dd8fe60a..b85c686b60db 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -725,6 +725,7 @@ journal_t * journal_init_dev(struct block_device *bdev,
 			__FUNCTION__);
 		kfree(journal);
 		journal = NULL;
+		goto out;
 	}
 	journal->j_dev = bdev;
 	journal->j_fs_dev = fs_dev;
@@ -735,7 +736,7 @@ journal_t * journal_init_dev(struct block_device *bdev,
 	J_ASSERT(bh != NULL);
 	journal->j_sb_buffer = bh;
 	journal->j_superblock = (journal_superblock_t *)bh->b_data;
-
+out:
 	return journal;
 }
 
-- 
cgit v1.2.3


From 502717f4e112b18d9c37753a32f675bec9f2838b Mon Sep 17 00:00:00 2001
From: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Date: Wed, 11 Oct 2006 01:20:46 -0700
Subject: [PATCH] hugetlb: fix linked list corruption in unmap_hugepage_range()

commit fe1668ae5bf0145014c71797febd9ad5670d5d05 causes kernel to oops with
libhugetlbfs test suite.  The problem is that hugetlb pages can be shared
by multiple mappings.  Multiple threads can fight over page->lru in the
unmap path and bad things happen.  We now serialize __unmap_hugepage_range
to void concurrent linked list manipulation.  Such serialization is also
needed for shared page table page on hugetlb area.  This patch will fixed
the bug and also serve as a prepatch for shared page table.

Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hugetlbfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 5e03b2f67b93..4ee3f006b861 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -293,7 +293,7 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff)
 		if (h_vm_pgoff >= h_pgoff)
 			v_offset = 0;
 
-		unmap_hugepage_range(vma,
+		__unmap_hugepage_range(vma,
 				vma->vm_start + v_offset, vma->vm_end);
 	}
 }
-- 
cgit v1.2.3


From ac27a0ec112a089f1a5102bc8dffc79c8c815571 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Wed, 11 Oct 2006 01:20:50 -0700
Subject: [PATCH] ext4: initial copy of files from ext3

Start of the ext4 patch series.  See Documentation/filesystems/ext4.txt for
details.

This is a simple copy of the files in fs/ext3 to fs/ext4 and
/usr/incude/linux/ext3* to /usr/include/ex4*

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/Makefile         |   12 +
 fs/ext4/acl.c            |  551 ++++++++
 fs/ext4/acl.h            |   81 ++
 fs/ext4/balloc.c         | 1818 ++++++++++++++++++++++++++
 fs/ext4/bitmap.c         |   32 +
 fs/ext4/dir.c            |  518 ++++++++
 fs/ext4/file.c           |  139 ++
 fs/ext4/fsync.c          |   88 ++
 fs/ext4/hash.c           |  152 +++
 fs/ext4/ialloc.c         |  758 +++++++++++
 fs/ext4/inode.c          | 3219 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/ext4/ioctl.c          |  307 +++++
 fs/ext4/namei.c          | 2397 ++++++++++++++++++++++++++++++++++
 fs/ext4/namei.h          |    8 +
 fs/ext4/resize.c         | 1042 +++++++++++++++
 fs/ext4/super.c          | 2754 +++++++++++++++++++++++++++++++++++++++
 fs/ext4/symlink.c        |   54 +
 fs/ext4/xattr.c          | 1317 +++++++++++++++++++
 fs/ext4/xattr.h          |  145 +++
 fs/ext4/xattr_security.c |   77 ++
 fs/ext4/xattr_trusted.c  |   62 +
 fs/ext4/xattr_user.c     |   64 +
 22 files changed, 15595 insertions(+)
 create mode 100644 fs/ext4/Makefile
 create mode 100644 fs/ext4/acl.c
 create mode 100644 fs/ext4/acl.h
 create mode 100644 fs/ext4/balloc.c
 create mode 100644 fs/ext4/bitmap.c
 create mode 100644 fs/ext4/dir.c
 create mode 100644 fs/ext4/file.c
 create mode 100644 fs/ext4/fsync.c
 create mode 100644 fs/ext4/hash.c
 create mode 100644 fs/ext4/ialloc.c
 create mode 100644 fs/ext4/inode.c
 create mode 100644 fs/ext4/ioctl.c
 create mode 100644 fs/ext4/namei.c
 create mode 100644 fs/ext4/namei.h
 create mode 100644 fs/ext4/resize.c
 create mode 100644 fs/ext4/super.c
 create mode 100644 fs/ext4/symlink.c
 create mode 100644 fs/ext4/xattr.c
 create mode 100644 fs/ext4/xattr.h
 create mode 100644 fs/ext4/xattr_security.c
 create mode 100644 fs/ext4/xattr_trusted.c
 create mode 100644 fs/ext4/xattr_user.c

(limited to 'fs')

diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
new file mode 100644
index 000000000000..704cd44a40c2
--- /dev/null
+++ b/fs/ext4/Makefile
@@ -0,0 +1,12 @@
+#
+# Makefile for the linux ext3-filesystem routines.
+#
+
+obj-$(CONFIG_EXT3_FS) += ext3.o
+
+ext3-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+	   ioctl.o namei.o super.o symlink.o hash.o resize.o
+
+ext3-$(CONFIG_EXT3_FS_XATTR)	 += xattr.o xattr_user.o xattr_trusted.o
+ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+ext3-$(CONFIG_EXT3_FS_SECURITY)	 += xattr_security.o
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
new file mode 100644
index 000000000000..1e5038d9a01b
--- /dev/null
+++ b/fs/ext4/acl.c
@@ -0,0 +1,551 @@
+/*
+ * linux/fs/ext3/acl.c
+ *
+ * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/capability.h>
+#include <linux/fs.h>
+#include <linux/ext3_jbd.h>
+#include <linux/ext3_fs.h>
+#include "xattr.h"
+#include "acl.h"
+
+/*
+ * Convert from filesystem to in-memory representation.
+ */
+static struct posix_acl *
+ext3_acl_from_disk(const void *value, size_t size)
+{
+	const char *end = (char *)value + size;
+	int n, count;
+	struct posix_acl *acl;
+
+	if (!value)
+		return NULL;
+	if (size < sizeof(ext3_acl_header))
+		 return ERR_PTR(-EINVAL);
+	if (((ext3_acl_header *)value)->a_version !=
+	    cpu_to_le32(EXT3_ACL_VERSION))
+		return ERR_PTR(-EINVAL);
+	value = (char *)value + sizeof(ext3_acl_header);
+	count = ext3_acl_count(size);
+	if (count < 0)
+		return ERR_PTR(-EINVAL);
+	if (count == 0)
+		return NULL;
+	acl = posix_acl_alloc(count, GFP_KERNEL);
+	if (!acl)
+		return ERR_PTR(-ENOMEM);
+	for (n=0; n < count; n++) {
+		ext3_acl_entry *entry =
+			(ext3_acl_entry *)value;
+		if ((char *)value + sizeof(ext3_acl_entry_short) > end)
+			goto fail;
+		acl->a_entries[n].e_tag  = le16_to_cpu(entry->e_tag);
+		acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
+		switch(acl->a_entries[n].e_tag) {
+			case ACL_USER_OBJ:
+			case ACL_GROUP_OBJ:
+			case ACL_MASK:
+			case ACL_OTHER:
+				value = (char *)value +
+					sizeof(ext3_acl_entry_short);
+				acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
+				break;
+
+			case ACL_USER:
+			case ACL_GROUP:
+				value = (char *)value + sizeof(ext3_acl_entry);
+				if ((char *)value > end)
+					goto fail;
+				acl->a_entries[n].e_id =
+					le32_to_cpu(entry->e_id);
+				break;
+
+			default:
+				goto fail;
+		}
+	}
+	if (value != end)
+		goto fail;
+	return acl;
+
+fail:
+	posix_acl_release(acl);
+	return ERR_PTR(-EINVAL);
+}
+
+/*
+ * Convert from in-memory to filesystem representation.
+ */
+static void *
+ext3_acl_to_disk(const struct posix_acl *acl, size_t *size)
+{
+	ext3_acl_header *ext_acl;
+	char *e;
+	size_t n;
+
+	*size = ext3_acl_size(acl->a_count);
+	ext_acl = kmalloc(sizeof(ext3_acl_header) + acl->a_count *
+			sizeof(ext3_acl_entry), GFP_KERNEL);
+	if (!ext_acl)
+		return ERR_PTR(-ENOMEM);
+	ext_acl->a_version = cpu_to_le32(EXT3_ACL_VERSION);
+	e = (char *)ext_acl + sizeof(ext3_acl_header);
+	for (n=0; n < acl->a_count; n++) {
+		ext3_acl_entry *entry = (ext3_acl_entry *)e;
+		entry->e_tag  = cpu_to_le16(acl->a_entries[n].e_tag);
+		entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
+		switch(acl->a_entries[n].e_tag) {
+			case ACL_USER:
+			case ACL_GROUP:
+				entry->e_id =
+					cpu_to_le32(acl->a_entries[n].e_id);
+				e += sizeof(ext3_acl_entry);
+				break;
+
+			case ACL_USER_OBJ:
+			case ACL_GROUP_OBJ:
+			case ACL_MASK:
+			case ACL_OTHER:
+				e += sizeof(ext3_acl_entry_short);
+				break;
+
+			default:
+				goto fail;
+		}
+	}
+	return (char *)ext_acl;
+
+fail:
+	kfree(ext_acl);
+	return ERR_PTR(-EINVAL);
+}
+
+static inline struct posix_acl *
+ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl)
+{
+	struct posix_acl *acl = EXT3_ACL_NOT_CACHED;
+
+	spin_lock(&inode->i_lock);
+	if (*i_acl != EXT3_ACL_NOT_CACHED)
+		acl = posix_acl_dup(*i_acl);
+	spin_unlock(&inode->i_lock);
+
+	return acl;
+}
+
+static inline void
+ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
+                  struct posix_acl *acl)
+{
+	spin_lock(&inode->i_lock);
+	if (*i_acl != EXT3_ACL_NOT_CACHED)
+		posix_acl_release(*i_acl);
+	*i_acl = posix_acl_dup(acl);
+	spin_unlock(&inode->i_lock);
+}
+
+/*
+ * Inode operation get_posix_acl().
+ *
+ * inode->i_mutex: don't care
+ */
+static struct posix_acl *
+ext3_get_acl(struct inode *inode, int type)
+{
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	int name_index;
+	char *value = NULL;
+	struct posix_acl *acl;
+	int retval;
+
+	if (!test_opt(inode->i_sb, POSIX_ACL))
+		return NULL;
+
+	switch(type) {
+		case ACL_TYPE_ACCESS:
+			acl = ext3_iget_acl(inode, &ei->i_acl);
+			if (acl != EXT3_ACL_NOT_CACHED)
+				return acl;
+			name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
+			break;
+
+		case ACL_TYPE_DEFAULT:
+			acl = ext3_iget_acl(inode, &ei->i_default_acl);
+			if (acl != EXT3_ACL_NOT_CACHED)
+				return acl;
+			name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
+			break;
+
+		default:
+			return ERR_PTR(-EINVAL);
+	}
+	retval = ext3_xattr_get(inode, name_index, "", NULL, 0);
+	if (retval > 0) {
+		value = kmalloc(retval, GFP_KERNEL);
+		if (!value)
+			return ERR_PTR(-ENOMEM);
+		retval = ext3_xattr_get(inode, name_index, "", value, retval);
+	}
+	if (retval > 0)
+		acl = ext3_acl_from_disk(value, retval);
+	else if (retval == -ENODATA || retval == -ENOSYS)
+		acl = NULL;
+	else
+		acl = ERR_PTR(retval);
+	kfree(value);
+
+	if (!IS_ERR(acl)) {
+		switch(type) {
+			case ACL_TYPE_ACCESS:
+				ext3_iset_acl(inode, &ei->i_acl, acl);
+				break;
+
+			case ACL_TYPE_DEFAULT:
+				ext3_iset_acl(inode, &ei->i_default_acl, acl);
+				break;
+		}
+	}
+	return acl;
+}
+
+/*
+ * Set the access or default ACL of an inode.
+ *
+ * inode->i_mutex: down unless called from ext3_new_inode
+ */
+static int
+ext3_set_acl(handle_t *handle, struct inode *inode, int type,
+	     struct posix_acl *acl)
+{
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	int name_index;
+	void *value = NULL;
+	size_t size = 0;
+	int error;
+
+	if (S_ISLNK(inode->i_mode))
+		return -EOPNOTSUPP;
+
+	switch(type) {
+		case ACL_TYPE_ACCESS:
+			name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
+			if (acl) {
+				mode_t mode = inode->i_mode;
+				error = posix_acl_equiv_mode(acl, &mode);
+				if (error < 0)
+					return error;
+				else {
+					inode->i_mode = mode;
+					ext3_mark_inode_dirty(handle, inode);
+					if (error == 0)
+						acl = NULL;
+				}
+			}
+			break;
+
+		case ACL_TYPE_DEFAULT:
+			name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
+			if (!S_ISDIR(inode->i_mode))
+				return acl ? -EACCES : 0;
+			break;
+
+		default:
+			return -EINVAL;
+	}
+	if (acl) {
+		value = ext3_acl_to_disk(acl, &size);
+		if (IS_ERR(value))
+			return (int)PTR_ERR(value);
+	}
+
+	error = ext3_xattr_set_handle(handle, inode, name_index, "",
+				      value, size, 0);
+
+	kfree(value);
+	if (!error) {
+		switch(type) {
+			case ACL_TYPE_ACCESS:
+				ext3_iset_acl(inode, &ei->i_acl, acl);
+				break;
+
+			case ACL_TYPE_DEFAULT:
+				ext3_iset_acl(inode, &ei->i_default_acl, acl);
+				break;
+		}
+	}
+	return error;
+}
+
+static int
+ext3_check_acl(struct inode *inode, int mask)
+{
+	struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
+
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	if (acl) {
+		int error = posix_acl_permission(inode, acl, mask);
+		posix_acl_release(acl);
+		return error;
+	}
+
+	return -EAGAIN;
+}
+
+int
+ext3_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+	return generic_permission(inode, mask, ext3_check_acl);
+}
+
+/*
+ * Initialize the ACLs of a new inode. Called from ext3_new_inode.
+ *
+ * dir->i_mutex: down
+ * inode->i_mutex: up (access to inode is still exclusive)
+ */
+int
+ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
+{
+	struct posix_acl *acl = NULL;
+	int error = 0;
+
+	if (!S_ISLNK(inode->i_mode)) {
+		if (test_opt(dir->i_sb, POSIX_ACL)) {
+			acl = ext3_get_acl(dir, ACL_TYPE_DEFAULT);
+			if (IS_ERR(acl))
+				return PTR_ERR(acl);
+		}
+		if (!acl)
+			inode->i_mode &= ~current->fs->umask;
+	}
+	if (test_opt(inode->i_sb, POSIX_ACL) && acl) {
+		struct posix_acl *clone;
+		mode_t mode;
+
+		if (S_ISDIR(inode->i_mode)) {
+			error = ext3_set_acl(handle, inode,
+					     ACL_TYPE_DEFAULT, acl);
+			if (error)
+				goto cleanup;
+		}
+		clone = posix_acl_clone(acl, GFP_KERNEL);
+		error = -ENOMEM;
+		if (!clone)
+			goto cleanup;
+
+		mode = inode->i_mode;
+		error = posix_acl_create_masq(clone, &mode);
+		if (error >= 0) {
+			inode->i_mode = mode;
+			if (error > 0) {
+				/* This is an extended ACL */
+				error = ext3_set_acl(handle, inode,
+						     ACL_TYPE_ACCESS, clone);
+			}
+		}
+		posix_acl_release(clone);
+	}
+cleanup:
+	posix_acl_release(acl);
+	return error;
+}
+
+/*
+ * Does chmod for an inode that may have an Access Control List. The
+ * inode->i_mode field must be updated to the desired value by the caller
+ * before calling this function.
+ * Returns 0 on success, or a negative error number.
+ *
+ * We change the ACL rather than storing some ACL entries in the file
+ * mode permission bits (which would be more efficient), because that
+ * would break once additional permissions (like  ACL_APPEND, ACL_DELETE
+ * for directories) are added. There are no more bits available in the
+ * file mode.
+ *
+ * inode->i_mutex: down
+ */
+int
+ext3_acl_chmod(struct inode *inode)
+{
+	struct posix_acl *acl, *clone;
+        int error;
+
+	if (S_ISLNK(inode->i_mode))
+		return -EOPNOTSUPP;
+	if (!test_opt(inode->i_sb, POSIX_ACL))
+		return 0;
+	acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
+	if (IS_ERR(acl) || !acl)
+		return PTR_ERR(acl);
+	clone = posix_acl_clone(acl, GFP_KERNEL);
+	posix_acl_release(acl);
+	if (!clone)
+		return -ENOMEM;
+	error = posix_acl_chmod_masq(clone, inode->i_mode);
+	if (!error) {
+		handle_t *handle;
+		int retries = 0;
+
+	retry:
+		handle = ext3_journal_start(inode,
+				EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
+		if (IS_ERR(handle)) {
+			error = PTR_ERR(handle);
+			ext3_std_error(inode->i_sb, error);
+			goto out;
+		}
+		error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, clone);
+		ext3_journal_stop(handle);
+		if (error == -ENOSPC &&
+		    ext3_should_retry_alloc(inode->i_sb, &retries))
+			goto retry;
+	}
+out:
+	posix_acl_release(clone);
+	return error;
+}
+
+/*
+ * Extended attribute handlers
+ */
+static size_t
+ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
+			   const char *name, size_t name_len)
+{
+	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
+
+	if (!test_opt(inode->i_sb, POSIX_ACL))
+		return 0;
+	if (list && size <= list_len)
+		memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
+	return size;
+}
+
+static size_t
+ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
+			    const char *name, size_t name_len)
+{
+	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
+
+	if (!test_opt(inode->i_sb, POSIX_ACL))
+		return 0;
+	if (list && size <= list_len)
+		memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
+	return size;
+}
+
+static int
+ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
+{
+	struct posix_acl *acl;
+	int error;
+
+	if (!test_opt(inode->i_sb, POSIX_ACL))
+		return -EOPNOTSUPP;
+
+	acl = ext3_get_acl(inode, type);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	if (acl == NULL)
+		return -ENODATA;
+	error = posix_acl_to_xattr(acl, buffer, size);
+	posix_acl_release(acl);
+
+	return error;
+}
+
+static int
+ext3_xattr_get_acl_access(struct inode *inode, const char *name,
+			  void *buffer, size_t size)
+{
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	return ext3_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
+}
+
+static int
+ext3_xattr_get_acl_default(struct inode *inode, const char *name,
+			   void *buffer, size_t size)
+{
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	return ext3_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
+}
+
+static int
+ext3_xattr_set_acl(struct inode *inode, int type, const void *value,
+		   size_t size)
+{
+	handle_t *handle;
+	struct posix_acl *acl;
+	int error, retries = 0;
+
+	if (!test_opt(inode->i_sb, POSIX_ACL))
+		return -EOPNOTSUPP;
+	if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
+		return -EPERM;
+
+	if (value) {
+		acl = posix_acl_from_xattr(value, size);
+		if (IS_ERR(acl))
+			return PTR_ERR(acl);
+		else if (acl) {
+			error = posix_acl_valid(acl);
+			if (error)
+				goto release_and_out;
+		}
+	} else
+		acl = NULL;
+
+retry:
+	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	error = ext3_set_acl(handle, inode, type, acl);
+	ext3_journal_stop(handle);
+	if (error == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
+		goto retry;
+
+release_and_out:
+	posix_acl_release(acl);
+	return error;
+}
+
+static int
+ext3_xattr_set_acl_access(struct inode *inode, const char *name,
+			  const void *value, size_t size, int flags)
+{
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	return ext3_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
+}
+
+static int
+ext3_xattr_set_acl_default(struct inode *inode, const char *name,
+			   const void *value, size_t size, int flags)
+{
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	return ext3_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
+}
+
+struct xattr_handler ext3_xattr_acl_access_handler = {
+	.prefix	= POSIX_ACL_XATTR_ACCESS,
+	.list	= ext3_xattr_list_acl_access,
+	.get	= ext3_xattr_get_acl_access,
+	.set	= ext3_xattr_set_acl_access,
+};
+
+struct xattr_handler ext3_xattr_acl_default_handler = {
+	.prefix	= POSIX_ACL_XATTR_DEFAULT,
+	.list	= ext3_xattr_list_acl_default,
+	.get	= ext3_xattr_get_acl_default,
+	.set	= ext3_xattr_set_acl_default,
+};
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
new file mode 100644
index 000000000000..0d1e6279cbfd
--- /dev/null
+++ b/fs/ext4/acl.h
@@ -0,0 +1,81 @@
+/*
+  File: fs/ext3/acl.h
+
+  (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
+*/
+
+#include <linux/posix_acl_xattr.h>
+
+#define EXT3_ACL_VERSION	0x0001
+
+typedef struct {
+	__le16		e_tag;
+	__le16		e_perm;
+	__le32		e_id;
+} ext3_acl_entry;
+
+typedef struct {
+	__le16		e_tag;
+	__le16		e_perm;
+} ext3_acl_entry_short;
+
+typedef struct {
+	__le32		a_version;
+} ext3_acl_header;
+
+static inline size_t ext3_acl_size(int count)
+{
+	if (count <= 4) {
+		return sizeof(ext3_acl_header) +
+		       count * sizeof(ext3_acl_entry_short);
+	} else {
+		return sizeof(ext3_acl_header) +
+		       4 * sizeof(ext3_acl_entry_short) +
+		       (count - 4) * sizeof(ext3_acl_entry);
+	}
+}
+
+static inline int ext3_acl_count(size_t size)
+{
+	ssize_t s;
+	size -= sizeof(ext3_acl_header);
+	s = size - 4 * sizeof(ext3_acl_entry_short);
+	if (s < 0) {
+		if (size % sizeof(ext3_acl_entry_short))
+			return -1;
+		return size / sizeof(ext3_acl_entry_short);
+	} else {
+		if (s % sizeof(ext3_acl_entry))
+			return -1;
+		return s / sizeof(ext3_acl_entry) + 4;
+	}
+}
+
+#ifdef CONFIG_EXT3_FS_POSIX_ACL
+
+/* Value for inode->u.ext3_i.i_acl and inode->u.ext3_i.i_default_acl
+   if the ACL has not been cached */
+#define EXT3_ACL_NOT_CACHED ((void *)-1)
+
+/* acl.c */
+extern int ext3_permission (struct inode *, int, struct nameidata *);
+extern int ext3_acl_chmod (struct inode *);
+extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
+
+#else  /* CONFIG_EXT3_FS_POSIX_ACL */
+#include <linux/sched.h>
+#define ext3_permission NULL
+
+static inline int
+ext3_acl_chmod(struct inode *inode)
+{
+	return 0;
+}
+
+static inline int
+ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
+{
+	return 0;
+}
+#endif  /* CONFIG_EXT3_FS_POSIX_ACL */
+
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
new file mode 100644
index 000000000000..b41a7d7e20f0
--- /dev/null
+++ b/fs/ext4/balloc.c
@@ -0,0 +1,1818 @@
+/*
+ *  linux/fs/ext3/balloc.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
+ *  Big-endian to little-endian byte-swapping/bitmaps by
+ *        David S. Miller (davem@caip.rutgers.edu), 1995
+ */
+
+#include <linux/time.h>
+#include <linux/capability.h>
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
+#include <linux/ext3_jbd.h>
+#include <linux/quotaops.h>
+#include <linux/buffer_head.h>
+
+/*
+ * balloc.c contains the blocks allocation and deallocation routines
+ */
+
+/*
+ * The free blocks are managed by bitmaps.  A file system contains several
+ * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
+ * block for inodes, N blocks for the inode table and data blocks.
+ *
+ * The file system contains group descriptors which are located after the
+ * super block.  Each descriptor contains the number of the bitmap block and
+ * the free blocks count in the block.  The descriptors are loaded in memory
+ * when a file system is mounted (see ext3_read_super).
+ */
+
+
+#define in_range(b, first, len)	((b) >= (first) && (b) <= (first) + (len) - 1)
+
+/**
+ * ext3_get_group_desc() -- load group descriptor from disk
+ * @sb:			super block
+ * @block_group:	given block group
+ * @bh:			pointer to the buffer head to store the block
+ *			group descriptor
+ */
+struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
+					     unsigned int block_group,
+					     struct buffer_head ** bh)
+{
+	unsigned long group_desc;
+	unsigned long offset;
+	struct ext3_group_desc * desc;
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+
+	if (block_group >= sbi->s_groups_count) {
+		ext3_error (sb, "ext3_get_group_desc",
+			    "block_group >= groups_count - "
+			    "block_group = %d, groups_count = %lu",
+			    block_group, sbi->s_groups_count);
+
+		return NULL;
+	}
+	smp_rmb();
+
+	group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb);
+	offset = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1);
+	if (!sbi->s_group_desc[group_desc]) {
+		ext3_error (sb, "ext3_get_group_desc",
+			    "Group descriptor not loaded - "
+			    "block_group = %d, group_desc = %lu, desc = %lu",
+			     block_group, group_desc, offset);
+		return NULL;
+	}
+
+	desc = (struct ext3_group_desc *) sbi->s_group_desc[group_desc]->b_data;
+	if (bh)
+		*bh = sbi->s_group_desc[group_desc];
+	return desc + offset;
+}
+
+/**
+ * read_block_bitmap()
+ * @sb:			super block
+ * @block_group:	given block group
+ *
+ * Read the bitmap for a given block_group, reading into the specified
+ * slot in the superblock's bitmap cache.
+ *
+ * Return buffer_head on success or NULL in case of failure.
+ */
+static struct buffer_head *
+read_block_bitmap(struct super_block *sb, unsigned int block_group)
+{
+	struct ext3_group_desc * desc;
+	struct buffer_head * bh = NULL;
+
+	desc = ext3_get_group_desc (sb, block_group, NULL);
+	if (!desc)
+		goto error_out;
+	bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
+	if (!bh)
+		ext3_error (sb, "read_block_bitmap",
+			    "Cannot read block bitmap - "
+			    "block_group = %d, block_bitmap = %u",
+			    block_group, le32_to_cpu(desc->bg_block_bitmap));
+error_out:
+	return bh;
+}
+/*
+ * The reservation window structure operations
+ * --------------------------------------------
+ * Operations include:
+ * dump, find, add, remove, is_empty, find_next_reservable_window, etc.
+ *
+ * We use a red-black tree to represent per-filesystem reservation
+ * windows.
+ *
+ */
+
+/**
+ * __rsv_window_dump() -- Dump the filesystem block allocation reservation map
+ * @rb_root:		root of per-filesystem reservation rb tree
+ * @verbose:		verbose mode
+ * @fn:			function which wishes to dump the reservation map
+ *
+ * If verbose is turned on, it will print the whole block reservation
+ * windows(start, end).	Otherwise, it will only print out the "bad" windows,
+ * those windows that overlap with their immediate neighbors.
+ */
+#if 1
+static void __rsv_window_dump(struct rb_root *root, int verbose,
+			      const char *fn)
+{
+	struct rb_node *n;
+	struct ext3_reserve_window_node *rsv, *prev;
+	int bad;
+
+restart:
+	n = rb_first(root);
+	bad = 0;
+	prev = NULL;
+
+	printk("Block Allocation Reservation Windows Map (%s):\n", fn);
+	while (n) {
+		rsv = list_entry(n, struct ext3_reserve_window_node, rsv_node);
+		if (verbose)
+			printk("reservation window 0x%p "
+			       "start:  %lu, end:  %lu\n",
+			       rsv, rsv->rsv_start, rsv->rsv_end);
+		if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) {
+			printk("Bad reservation %p (start >= end)\n",
+			       rsv);
+			bad = 1;
+		}
+		if (prev && prev->rsv_end >= rsv->rsv_start) {
+			printk("Bad reservation %p (prev->end >= start)\n",
+			       rsv);
+			bad = 1;
+		}
+		if (bad) {
+			if (!verbose) {
+				printk("Restarting reservation walk in verbose mode\n");
+				verbose = 1;
+				goto restart;
+			}
+		}
+		n = rb_next(n);
+		prev = rsv;
+	}
+	printk("Window map complete.\n");
+	if (bad)
+		BUG();
+}
+#define rsv_window_dump(root, verbose) \
+	__rsv_window_dump((root), (verbose), __FUNCTION__)
+#else
+#define rsv_window_dump(root, verbose) do {} while (0)
+#endif
+
+/**
+ * goal_in_my_reservation()
+ * @rsv:		inode's reservation window
+ * @grp_goal:		given goal block relative to the allocation block group
+ * @group:		the current allocation block group
+ * @sb:			filesystem super block
+ *
+ * Test if the given goal block (group relative) is within the file's
+ * own block reservation window range.
+ *
+ * If the reservation window is outside the goal allocation group, return 0;
+ * grp_goal (given goal block) could be -1, which means no specific
+ * goal block. In this case, always return 1.
+ * If the goal block is within the reservation window, return 1;
+ * otherwise, return 0;
+ */
+static int
+goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal,
+			unsigned int group, struct super_block * sb)
+{
+	ext3_fsblk_t group_first_block, group_last_block;
+
+	group_first_block = ext3_group_first_block_no(sb, group);
+	group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
+
+	if ((rsv->_rsv_start > group_last_block) ||
+	    (rsv->_rsv_end < group_first_block))
+		return 0;
+	if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start)
+		|| (grp_goal + group_first_block > rsv->_rsv_end)))
+		return 0;
+	return 1;
+}
+
+/**
+ * search_reserve_window()
+ * @rb_root:		root of reservation tree
+ * @goal:		target allocation block
+ *
+ * Find the reserved window which includes the goal, or the previous one
+ * if the goal is not in any window.
+ * Returns NULL if there are no windows or if all windows start after the goal.
+ */
+static struct ext3_reserve_window_node *
+search_reserve_window(struct rb_root *root, ext3_fsblk_t goal)
+{
+	struct rb_node *n = root->rb_node;
+	struct ext3_reserve_window_node *rsv;
+
+	if (!n)
+		return NULL;
+
+	do {
+		rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node);
+
+		if (goal < rsv->rsv_start)
+			n = n->rb_left;
+		else if (goal > rsv->rsv_end)
+			n = n->rb_right;
+		else
+			return rsv;
+	} while (n);
+	/*
+	 * We've fallen off the end of the tree: the goal wasn't inside
+	 * any particular node.  OK, the previous node must be to one
+	 * side of the interval containing the goal.  If it's the RHS,
+	 * we need to back up one.
+	 */
+	if (rsv->rsv_start > goal) {
+		n = rb_prev(&rsv->rsv_node);
+		rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node);
+	}
+	return rsv;
+}
+
+/**
+ * ext3_rsv_window_add() -- Insert a window to the block reservation rb tree.
+ * @sb:			super block
+ * @rsv:		reservation window to add
+ *
+ * Must be called with rsv_lock hold.
+ */
+void ext3_rsv_window_add(struct super_block *sb,
+		    struct ext3_reserve_window_node *rsv)
+{
+	struct rb_root *root = &EXT3_SB(sb)->s_rsv_window_root;
+	struct rb_node *node = &rsv->rsv_node;
+	ext3_fsblk_t start = rsv->rsv_start;
+
+	struct rb_node ** p = &root->rb_node;
+	struct rb_node * parent = NULL;
+	struct ext3_reserve_window_node *this;
+
+	while (*p)
+	{
+		parent = *p;
+		this = rb_entry(parent, struct ext3_reserve_window_node, rsv_node);
+
+		if (start < this->rsv_start)
+			p = &(*p)->rb_left;
+		else if (start > this->rsv_end)
+			p = &(*p)->rb_right;
+		else {
+			rsv_window_dump(root, 1);
+			BUG();
+		}
+	}
+
+	rb_link_node(node, parent, p);
+	rb_insert_color(node, root);
+}
+
+/**
+ * ext3_rsv_window_remove() -- unlink a window from the reservation rb tree
+ * @sb:			super block
+ * @rsv:		reservation window to remove
+ *
+ * Mark the block reservation window as not allocated, and unlink it
+ * from the filesystem reservation window rb tree. Must be called with
+ * rsv_lock hold.
+ */
+static void rsv_window_remove(struct super_block *sb,
+			      struct ext3_reserve_window_node *rsv)
+{
+	rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+	rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+	rsv->rsv_alloc_hit = 0;
+	rb_erase(&rsv->rsv_node, &EXT3_SB(sb)->s_rsv_window_root);
+}
+
+/*
+ * rsv_is_empty() -- Check if the reservation window is allocated.
+ * @rsv:		given reservation window to check
+ *
+ * returns 1 if the end block is EXT3_RESERVE_WINDOW_NOT_ALLOCATED.
+ */
+static inline int rsv_is_empty(struct ext3_reserve_window *rsv)
+{
+	/* a valid reservation end block could not be 0 */
+	return rsv->_rsv_end == EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+}
+
+/**
+ * ext3_init_block_alloc_info()
+ * @inode:		file inode structure
+ *
+ * Allocate and initialize the	reservation window structure, and
+ * link the window to the ext3 inode structure at last
+ *
+ * The reservation window structure is only dynamically allocated
+ * and linked to ext3 inode the first time the open file
+ * needs a new block. So, before every ext3_new_block(s) call, for
+ * regular files, we should check whether the reservation window
+ * structure exists or not. In the latter case, this function is called.
+ * Fail to do so will result in block reservation being turned off for that
+ * open file.
+ *
+ * This function is called from ext3_get_blocks_handle(), also called
+ * when setting the reservation window size through ioctl before the file
+ * is open for write (needs block allocation).
+ *
+ * Needs truncate_mutex protection prior to call this function.
+ */
+void ext3_init_block_alloc_info(struct inode *inode)
+{
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info;
+	struct super_block *sb = inode->i_sb;
+
+	block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
+	if (block_i) {
+		struct ext3_reserve_window_node *rsv = &block_i->rsv_window_node;
+
+		rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+		rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+
+		/*
+		 * if filesystem is mounted with NORESERVATION, the goal
+		 * reservation window size is set to zero to indicate
+		 * block reservation is off
+		 */
+		if (!test_opt(sb, RESERVATION))
+			rsv->rsv_goal_size = 0;
+		else
+			rsv->rsv_goal_size = EXT3_DEFAULT_RESERVE_BLOCKS;
+		rsv->rsv_alloc_hit = 0;
+		block_i->last_alloc_logical_block = 0;
+		block_i->last_alloc_physical_block = 0;
+	}
+	ei->i_block_alloc_info = block_i;
+}
+
+/**
+ * ext3_discard_reservation()
+ * @inode:		inode
+ *
+ * Discard(free) block reservation window on last file close, or truncate
+ * or at last iput().
+ *
+ * It is being called in three cases:
+ *	ext3_release_file(): last writer close the file
+ *	ext3_clear_inode(): last iput(), when nobody link to this file.
+ *	ext3_truncate(): when the block indirect map is about to change.
+ *
+ */
+void ext3_discard_reservation(struct inode *inode)
+{
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info;
+	struct ext3_reserve_window_node *rsv;
+	spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock;
+
+	if (!block_i)
+		return;
+
+	rsv = &block_i->rsv_window_node;
+	if (!rsv_is_empty(&rsv->rsv_window)) {
+		spin_lock(rsv_lock);
+		if (!rsv_is_empty(&rsv->rsv_window))
+			rsv_window_remove(inode->i_sb, rsv);
+		spin_unlock(rsv_lock);
+	}
+}
+
+/**
+ * ext3_free_blocks_sb() -- Free given blocks and update quota
+ * @handle:			handle to this transaction
+ * @sb:				super block
+ * @block:			start physcial block to free
+ * @count:			number of blocks to free
+ * @pdquot_freed_blocks:	pointer to quota
+ */
+void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb,
+			 ext3_fsblk_t block, unsigned long count,
+			 unsigned long *pdquot_freed_blocks)
+{
+	struct buffer_head *bitmap_bh = NULL;
+	struct buffer_head *gd_bh;
+	unsigned long block_group;
+	ext3_grpblk_t bit;
+	unsigned long i;
+	unsigned long overflow;
+	struct ext3_group_desc * desc;
+	struct ext3_super_block * es;
+	struct ext3_sb_info *sbi;
+	int err = 0, ret;
+	ext3_grpblk_t group_freed;
+
+	*pdquot_freed_blocks = 0;
+	sbi = EXT3_SB(sb);
+	es = sbi->s_es;
+	if (block < le32_to_cpu(es->s_first_data_block) ||
+	    block + count < block ||
+	    block + count > le32_to_cpu(es->s_blocks_count)) {
+		ext3_error (sb, "ext3_free_blocks",
+			    "Freeing blocks not in datazone - "
+			    "block = "E3FSBLK", count = %lu", block, count);
+		goto error_return;
+	}
+
+	ext3_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
+
+do_more:
+	overflow = 0;
+	block_group = (block - le32_to_cpu(es->s_first_data_block)) /
+		      EXT3_BLOCKS_PER_GROUP(sb);
+	bit = (block - le32_to_cpu(es->s_first_data_block)) %
+		      EXT3_BLOCKS_PER_GROUP(sb);
+	/*
+	 * Check to see if we are freeing blocks across a group
+	 * boundary.
+	 */
+	if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
+		overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
+		count -= overflow;
+	}
+	brelse(bitmap_bh);
+	bitmap_bh = read_block_bitmap(sb, block_group);
+	if (!bitmap_bh)
+		goto error_return;
+	desc = ext3_get_group_desc (sb, block_group, &gd_bh);
+	if (!desc)
+		goto error_return;
+
+	if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) ||
+	    in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) ||
+	    in_range (block, le32_to_cpu(desc->bg_inode_table),
+		      sbi->s_itb_per_group) ||
+	    in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table),
+		      sbi->s_itb_per_group))
+		ext3_error (sb, "ext3_free_blocks",
+			    "Freeing blocks in system zones - "
+			    "Block = "E3FSBLK", count = %lu",
+			    block, count);
+
+	/*
+	 * We are about to start releasing blocks in the bitmap,
+	 * so we need undo access.
+	 */
+	/* @@@ check errors */
+	BUFFER_TRACE(bitmap_bh, "getting undo access");
+	err = ext3_journal_get_undo_access(handle, bitmap_bh);
+	if (err)
+		goto error_return;
+
+	/*
+	 * We are about to modify some metadata.  Call the journal APIs
+	 * to unshare ->b_data if a currently-committing transaction is
+	 * using it
+	 */
+	BUFFER_TRACE(gd_bh, "get_write_access");
+	err = ext3_journal_get_write_access(handle, gd_bh);
+	if (err)
+		goto error_return;
+
+	jbd_lock_bh_state(bitmap_bh);
+
+	for (i = 0, group_freed = 0; i < count; i++) {
+		/*
+		 * An HJ special.  This is expensive...
+		 */
+#ifdef CONFIG_JBD_DEBUG
+		jbd_unlock_bh_state(bitmap_bh);
+		{
+			struct buffer_head *debug_bh;
+			debug_bh = sb_find_get_block(sb, block + i);
+			if (debug_bh) {
+				BUFFER_TRACE(debug_bh, "Deleted!");
+				if (!bh2jh(bitmap_bh)->b_committed_data)
+					BUFFER_TRACE(debug_bh,
+						"No commited data in bitmap");
+				BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap");
+				__brelse(debug_bh);
+			}
+		}
+		jbd_lock_bh_state(bitmap_bh);
+#endif
+		if (need_resched()) {
+			jbd_unlock_bh_state(bitmap_bh);
+			cond_resched();
+			jbd_lock_bh_state(bitmap_bh);
+		}
+		/* @@@ This prevents newly-allocated data from being
+		 * freed and then reallocated within the same
+		 * transaction.
+		 *
+		 * Ideally we would want to allow that to happen, but to
+		 * do so requires making journal_forget() capable of
+		 * revoking the queued write of a data block, which
+		 * implies blocking on the journal lock.  *forget()
+		 * cannot block due to truncate races.
+		 *
+		 * Eventually we can fix this by making journal_forget()
+		 * return a status indicating whether or not it was able
+		 * to revoke the buffer.  On successful revoke, it is
+		 * safe not to set the allocation bit in the committed
+		 * bitmap, because we know that there is no outstanding
+		 * activity on the buffer any more and so it is safe to
+		 * reallocate it.
+		 */
+		BUFFER_TRACE(bitmap_bh, "set in b_committed_data");
+		J_ASSERT_BH(bitmap_bh,
+				bh2jh(bitmap_bh)->b_committed_data != NULL);
+		ext3_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i,
+				bh2jh(bitmap_bh)->b_committed_data);
+
+		/*
+		 * We clear the bit in the bitmap after setting the committed
+		 * data bit, because this is the reverse order to that which
+		 * the allocator uses.
+		 */
+		BUFFER_TRACE(bitmap_bh, "clear bit");
+		if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
+						bit + i, bitmap_bh->b_data)) {
+			jbd_unlock_bh_state(bitmap_bh);
+			ext3_error(sb, __FUNCTION__,
+				"bit already cleared for block "E3FSBLK,
+				 block + i);
+			jbd_lock_bh_state(bitmap_bh);
+			BUFFER_TRACE(bitmap_bh, "bit already cleared");
+		} else {
+			group_freed++;
+		}
+	}
+	jbd_unlock_bh_state(bitmap_bh);
+
+	spin_lock(sb_bgl_lock(sbi, block_group));
+	desc->bg_free_blocks_count =
+		cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) +
+			group_freed);
+	spin_unlock(sb_bgl_lock(sbi, block_group));
+	percpu_counter_mod(&sbi->s_freeblocks_counter, count);
+
+	/* We dirtied the bitmap block */
+	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
+	err = ext3_journal_dirty_metadata(handle, bitmap_bh);
+
+	/* And the group descriptor block */
+	BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
+	ret = ext3_journal_dirty_metadata(handle, gd_bh);
+	if (!err) err = ret;
+	*pdquot_freed_blocks += group_freed;
+
+	if (overflow && !err) {
+		block += count;
+		count = overflow;
+		goto do_more;
+	}
+	sb->s_dirt = 1;
+error_return:
+	brelse(bitmap_bh);
+	ext3_std_error(sb, err);
+	return;
+}
+
+/**
+ * ext3_free_blocks() -- Free given blocks and update quota
+ * @handle:		handle for this transaction
+ * @inode:		inode
+ * @block:		start physical block to free
+ * @count:		number of blocks to count
+ */
+void ext3_free_blocks(handle_t *handle, struct inode *inode,
+			ext3_fsblk_t block, unsigned long count)
+{
+	struct super_block * sb;
+	unsigned long dquot_freed_blocks;
+
+	sb = inode->i_sb;
+	if (!sb) {
+		printk ("ext3_free_blocks: nonexistent device");
+		return;
+	}
+	ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
+	if (dquot_freed_blocks)
+		DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
+	return;
+}
+
+/**
+ * ext3_test_allocatable()
+ * @nr:			given allocation block group
+ * @bh:			bufferhead contains the bitmap of the given block group
+ *
+ * For ext3 allocations, we must not reuse any blocks which are
+ * allocated in the bitmap buffer's "last committed data" copy.  This
+ * prevents deletes from freeing up the page for reuse until we have
+ * committed the delete transaction.
+ *
+ * If we didn't do this, then deleting something and reallocating it as
+ * data would allow the old block to be overwritten before the
+ * transaction committed (because we force data to disk before commit).
+ * This would lead to corruption if we crashed between overwriting the
+ * data and committing the delete.
+ *
+ * @@@ We may want to make this allocation behaviour conditional on
+ * data-writes at some point, and disable it for metadata allocations or
+ * sync-data inodes.
+ */
+static int ext3_test_allocatable(ext3_grpblk_t nr, struct buffer_head *bh)
+{
+	int ret;
+	struct journal_head *jh = bh2jh(bh);
+
+	if (ext3_test_bit(nr, bh->b_data))
+		return 0;
+
+	jbd_lock_bh_state(bh);
+	if (!jh->b_committed_data)
+		ret = 1;
+	else
+		ret = !ext3_test_bit(nr, jh->b_committed_data);
+	jbd_unlock_bh_state(bh);
+	return ret;
+}
+
+/**
+ * bitmap_search_next_usable_block()
+ * @start:		the starting block (group relative) of the search
+ * @bh:			bufferhead contains the block group bitmap
+ * @maxblocks:		the ending block (group relative) of the reservation
+ *
+ * The bitmap search --- search forward alternately through the actual
+ * bitmap on disk and the last-committed copy in journal, until we find a
+ * bit free in both bitmaps.
+ */
+static ext3_grpblk_t
+bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
+					ext3_grpblk_t maxblocks)
+{
+	ext3_grpblk_t next;
+	struct journal_head *jh = bh2jh(bh);
+
+	while (start < maxblocks) {
+		next = ext3_find_next_zero_bit(bh->b_data, maxblocks, start);
+		if (next >= maxblocks)
+			return -1;
+		if (ext3_test_allocatable(next, bh))
+			return next;
+		jbd_lock_bh_state(bh);
+		if (jh->b_committed_data)
+			start = ext3_find_next_zero_bit(jh->b_committed_data,
+							maxblocks, next);
+		jbd_unlock_bh_state(bh);
+	}
+	return -1;
+}
+
+/**
+ * find_next_usable_block()
+ * @start:		the starting block (group relative) to find next
+ *			allocatable block in bitmap.
+ * @bh:			bufferhead contains the block group bitmap
+ * @maxblocks:		the ending block (group relative) for the search
+ *
+ * Find an allocatable block in a bitmap.  We honor both the bitmap and
+ * its last-committed copy (if that exists), and perform the "most
+ * appropriate allocation" algorithm of looking for a free block near
+ * the initial goal; then for a free byte somewhere in the bitmap; then
+ * for any free bit in the bitmap.
+ */
+static ext3_grpblk_t
+find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
+			ext3_grpblk_t maxblocks)
+{
+	ext3_grpblk_t here, next;
+	char *p, *r;
+
+	if (start > 0) {
+		/*
+		 * The goal was occupied; search forward for a free
+		 * block within the next XX blocks.
+		 *
+		 * end_goal is more or less random, but it has to be
+		 * less than EXT3_BLOCKS_PER_GROUP. Aligning up to the
+		 * next 64-bit boundary is simple..
+		 */
+		ext3_grpblk_t end_goal = (start + 63) & ~63;
+		if (end_goal > maxblocks)
+			end_goal = maxblocks;
+		here = ext3_find_next_zero_bit(bh->b_data, end_goal, start);
+		if (here < end_goal && ext3_test_allocatable(here, bh))
+			return here;
+		ext3_debug("Bit not found near goal\n");
+	}
+
+	here = start;
+	if (here < 0)
+		here = 0;
+
+	p = ((char *)bh->b_data) + (here >> 3);
+	r = memscan(p, 0, (maxblocks - here + 7) >> 3);
+	next = (r - ((char *)bh->b_data)) << 3;
+
+	if (next < maxblocks && next >= start && ext3_test_allocatable(next, bh))
+		return next;
+
+	/*
+	 * The bitmap search --- search forward alternately through the actual
+	 * bitmap and the last-committed copy until we find a bit free in
+	 * both
+	 */
+	here = bitmap_search_next_usable_block(here, bh, maxblocks);
+	return here;
+}
+
+/**
+ * claim_block()
+ * @block:		the free block (group relative) to allocate
+ * @bh:			the bufferhead containts the block group bitmap
+ *
+ * We think we can allocate this block in this bitmap.  Try to set the bit.
+ * If that succeeds then check that nobody has allocated and then freed the
+ * block since we saw that is was not marked in b_committed_data.  If it _was_
+ * allocated and freed then clear the bit in the bitmap again and return
+ * zero (failure).
+ */
+static inline int
+claim_block(spinlock_t *lock, ext3_grpblk_t block, struct buffer_head *bh)
+{
+	struct journal_head *jh = bh2jh(bh);
+	int ret;
+
+	if (ext3_set_bit_atomic(lock, block, bh->b_data))
+		return 0;
+	jbd_lock_bh_state(bh);
+	if (jh->b_committed_data && ext3_test_bit(block,jh->b_committed_data)) {
+		ext3_clear_bit_atomic(lock, block, bh->b_data);
+		ret = 0;
+	} else {
+		ret = 1;
+	}
+	jbd_unlock_bh_state(bh);
+	return ret;
+}
+
+/**
+ * ext3_try_to_allocate()
+ * @sb:			superblock
+ * @handle:		handle to this transaction
+ * @group:		given allocation block group
+ * @bitmap_bh:		bufferhead holds the block bitmap
+ * @grp_goal:		given target block within the group
+ * @count:		target number of blocks to allocate
+ * @my_rsv:		reservation window
+ *
+ * Attempt to allocate blocks within a give range. Set the range of allocation
+ * first, then find the first free bit(s) from the bitmap (within the range),
+ * and at last, allocate the blocks by claiming the found free bit as allocated.
+ *
+ * To set the range of this allocation:
+ *	if there is a reservation window, only try to allocate block(s) from the
+ *	file's own reservation window;
+ *	Otherwise, the allocation range starts from the give goal block, ends at
+ *	the block group's last block.
+ *
+ * If we failed to allocate the desired block then we may end up crossing to a
+ * new bitmap.  In that case we must release write access to the old one via
+ * ext3_journal_release_buffer(), else we'll run out of credits.
+ */
+static ext3_grpblk_t
+ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
+			struct buffer_head *bitmap_bh, ext3_grpblk_t grp_goal,
+			unsigned long *count, struct ext3_reserve_window *my_rsv)
+{
+	ext3_fsblk_t group_first_block;
+	ext3_grpblk_t start, end;
+	unsigned long num = 0;
+
+	/* we do allocation within the reservation window if we have a window */
+	if (my_rsv) {
+		group_first_block = ext3_group_first_block_no(sb, group);
+		if (my_rsv->_rsv_start >= group_first_block)
+			start = my_rsv->_rsv_start - group_first_block;
+		else
+			/* reservation window cross group boundary */
+			start = 0;
+		end = my_rsv->_rsv_end - group_first_block + 1;
+		if (end > EXT3_BLOCKS_PER_GROUP(sb))
+			/* reservation window crosses group boundary */
+			end = EXT3_BLOCKS_PER_GROUP(sb);
+		if ((start <= grp_goal) && (grp_goal < end))
+			start = grp_goal;
+		else
+			grp_goal = -1;
+	} else {
+		if (grp_goal > 0)
+			start = grp_goal;
+		else
+			start = 0;
+		end = EXT3_BLOCKS_PER_GROUP(sb);
+	}
+
+	BUG_ON(start > EXT3_BLOCKS_PER_GROUP(sb));
+
+repeat:
+	if (grp_goal < 0 || !ext3_test_allocatable(grp_goal, bitmap_bh)) {
+		grp_goal = find_next_usable_block(start, bitmap_bh, end);
+		if (grp_goal < 0)
+			goto fail_access;
+		if (!my_rsv) {
+			int i;
+
+			for (i = 0; i < 7 && grp_goal > start &&
+					ext3_test_allocatable(grp_goal - 1,
+								bitmap_bh);
+					i++, grp_goal--)
+				;
+		}
+	}
+	start = grp_goal;
+
+	if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group),
+		grp_goal, bitmap_bh)) {
+		/*
+		 * The block was allocated by another thread, or it was
+		 * allocated and then freed by another thread
+		 */
+		start++;
+		grp_goal++;
+		if (start >= end)
+			goto fail_access;
+		goto repeat;
+	}
+	num++;
+	grp_goal++;
+	while (num < *count && grp_goal < end
+		&& ext3_test_allocatable(grp_goal, bitmap_bh)
+		&& claim_block(sb_bgl_lock(EXT3_SB(sb), group),
+				grp_goal, bitmap_bh)) {
+		num++;
+		grp_goal++;
+	}
+	*count = num;
+	return grp_goal - num;
+fail_access:
+	*count = num;
+	return -1;
+}
+
+/**
+ *	find_next_reservable_window():
+ *		find a reservable space within the given range.
+ *		It does not allocate the reservation window for now:
+ *		alloc_new_reservation() will do the work later.
+ *
+ *	@search_head: the head of the searching list;
+ *		This is not necessarily the list head of the whole filesystem
+ *
+ *		We have both head and start_block to assist the search
+ *		for the reservable space. The list starts from head,
+ *		but we will shift to the place where start_block is,
+ *		then start from there, when looking for a reservable space.
+ *
+ *	@size: the target new reservation window size
+ *
+ *	@group_first_block: the first block we consider to start
+ *			the real search from
+ *
+ *	@last_block:
+ *		the maximum block number that our goal reservable space
+ *		could start from. This is normally the last block in this
+ *		group. The search will end when we found the start of next
+ *		possible reservable space is out of this boundary.
+ *		This could handle the cross boundary reservation window
+ *		request.
+ *
+ *	basically we search from the given range, rather than the whole
+ *	reservation double linked list, (start_block, last_block)
+ *	to find a free region that is of my size and has not
+ *	been reserved.
+ *
+ */
+static int find_next_reservable_window(
+				struct ext3_reserve_window_node *search_head,
+				struct ext3_reserve_window_node *my_rsv,
+				struct super_block * sb,
+				ext3_fsblk_t start_block,
+				ext3_fsblk_t last_block)
+{
+	struct rb_node *next;
+	struct ext3_reserve_window_node *rsv, *prev;
+	ext3_fsblk_t cur;
+	int size = my_rsv->rsv_goal_size;
+
+	/* TODO: make the start of the reservation window byte-aligned */
+	/* cur = *start_block & ~7;*/
+	cur = start_block;
+	rsv = search_head;
+	if (!rsv)
+		return -1;
+
+	while (1) {
+		if (cur <= rsv->rsv_end)
+			cur = rsv->rsv_end + 1;
+
+		/* TODO?
+		 * in the case we could not find a reservable space
+		 * that is what is expected, during the re-search, we could
+		 * remember what's the largest reservable space we could have
+		 * and return that one.
+		 *
+		 * For now it will fail if we could not find the reservable
+		 * space with expected-size (or more)...
+		 */
+		if (cur > last_block)
+			return -1;		/* fail */
+
+		prev = rsv;
+		next = rb_next(&rsv->rsv_node);
+		rsv = list_entry(next,struct ext3_reserve_window_node,rsv_node);
+
+		/*
+		 * Reached the last reservation, we can just append to the
+		 * previous one.
+		 */
+		if (!next)
+			break;
+
+		if (cur + size <= rsv->rsv_start) {
+			/*
+			 * Found a reserveable space big enough.  We could
+			 * have a reservation across the group boundary here
+			 */
+			break;
+		}
+	}
+	/*
+	 * we come here either :
+	 * when we reach the end of the whole list,
+	 * and there is empty reservable space after last entry in the list.
+	 * append it to the end of the list.
+	 *
+	 * or we found one reservable space in the middle of the list,
+	 * return the reservation window that we could append to.
+	 * succeed.
+	 */
+
+	if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window)))
+		rsv_window_remove(sb, my_rsv);
+
+	/*
+	 * Let's book the whole avaliable window for now.  We will check the
+	 * disk bitmap later and then, if there are free blocks then we adjust
+	 * the window size if it's larger than requested.
+	 * Otherwise, we will remove this node from the tree next time
+	 * call find_next_reservable_window.
+	 */
+	my_rsv->rsv_start = cur;
+	my_rsv->rsv_end = cur + size - 1;
+	my_rsv->rsv_alloc_hit = 0;
+
+	if (prev != my_rsv)
+		ext3_rsv_window_add(sb, my_rsv);
+
+	return 0;
+}
+
+/**
+ *	alloc_new_reservation()--allocate a new reservation window
+ *
+ *		To make a new reservation, we search part of the filesystem
+ *		reservation list (the list that inside the group). We try to
+ *		allocate a new reservation window near the allocation goal,
+ *		or the beginning of the group, if there is no goal.
+ *
+ *		We first find a reservable space after the goal, then from
+ *		there, we check the bitmap for the first free block after
+ *		it. If there is no free block until the end of group, then the
+ *		whole group is full, we failed. Otherwise, check if the free
+ *		block is inside the expected reservable space, if so, we
+ *		succeed.
+ *		If the first free block is outside the reservable space, then
+ *		start from the first free block, we search for next available
+ *		space, and go on.
+ *
+ *	on succeed, a new reservation will be found and inserted into the list
+ *	It contains at least one free block, and it does not overlap with other
+ *	reservation windows.
+ *
+ *	failed: we failed to find a reservation window in this group
+ *
+ *	@rsv: the reservation
+ *
+ *	@grp_goal: The goal (group-relative).  It is where the search for a
+ *		free reservable space should start from.
+ *		if we have a grp_goal(grp_goal >0 ), then start from there,
+ *		no grp_goal(grp_goal = -1), we start from the first block
+ *		of the group.
+ *
+ *	@sb: the super block
+ *	@group: the group we are trying to allocate in
+ *	@bitmap_bh: the block group block bitmap
+ *
+ */
+static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
+		ext3_grpblk_t grp_goal, struct super_block *sb,
+		unsigned int group, struct buffer_head *bitmap_bh)
+{
+	struct ext3_reserve_window_node *search_head;
+	ext3_fsblk_t group_first_block, group_end_block, start_block;
+	ext3_grpblk_t first_free_block;
+	struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root;
+	unsigned long size;
+	int ret;
+	spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
+
+	group_first_block = ext3_group_first_block_no(sb, group);
+	group_end_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
+
+	if (grp_goal < 0)
+		start_block = group_first_block;
+	else
+		start_block = grp_goal + group_first_block;
+
+	size = my_rsv->rsv_goal_size;
+
+	if (!rsv_is_empty(&my_rsv->rsv_window)) {
+		/*
+		 * if the old reservation is cross group boundary
+		 * and if the goal is inside the old reservation window,
+		 * we will come here when we just failed to allocate from
+		 * the first part of the window. We still have another part
+		 * that belongs to the next group. In this case, there is no
+		 * point to discard our window and try to allocate a new one
+		 * in this group(which will fail). we should
+		 * keep the reservation window, just simply move on.
+		 *
+		 * Maybe we could shift the start block of the reservation
+		 * window to the first block of next group.
+		 */
+
+		if ((my_rsv->rsv_start <= group_end_block) &&
+				(my_rsv->rsv_end > group_end_block) &&
+				(start_block >= my_rsv->rsv_start))
+			return -1;
+
+		if ((my_rsv->rsv_alloc_hit >
+		     (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) {
+			/*
+			 * if the previously allocation hit ratio is
+			 * greater than 1/2, then we double the size of
+			 * the reservation window the next time,
+			 * otherwise we keep the same size window
+			 */
+			size = size * 2;
+			if (size > EXT3_MAX_RESERVE_BLOCKS)
+				size = EXT3_MAX_RESERVE_BLOCKS;
+			my_rsv->rsv_goal_size= size;
+		}
+	}
+
+	spin_lock(rsv_lock);
+	/*
+	 * shift the search start to the window near the goal block
+	 */
+	search_head = search_reserve_window(fs_rsv_root, start_block);
+
+	/*
+	 * find_next_reservable_window() simply finds a reservable window
+	 * inside the given range(start_block, group_end_block).
+	 *
+	 * To make sure the reservation window has a free bit inside it, we
+	 * need to check the bitmap after we found a reservable window.
+	 */
+retry:
+	ret = find_next_reservable_window(search_head, my_rsv, sb,
+						start_block, group_end_block);
+
+	if (ret == -1) {
+		if (!rsv_is_empty(&my_rsv->rsv_window))
+			rsv_window_remove(sb, my_rsv);
+		spin_unlock(rsv_lock);
+		return -1;
+	}
+
+	/*
+	 * On success, find_next_reservable_window() returns the
+	 * reservation window where there is a reservable space after it.
+	 * Before we reserve this reservable space, we need
+	 * to make sure there is at least a free block inside this region.
+	 *
+	 * searching the first free bit on the block bitmap and copy of
+	 * last committed bitmap alternatively, until we found a allocatable
+	 * block. Search start from the start block of the reservable space
+	 * we just found.
+	 */
+	spin_unlock(rsv_lock);
+	first_free_block = bitmap_search_next_usable_block(
+			my_rsv->rsv_start - group_first_block,
+			bitmap_bh, group_end_block - group_first_block + 1);
+
+	if (first_free_block < 0) {
+		/*
+		 * no free block left on the bitmap, no point
+		 * to reserve the space. return failed.
+		 */
+		spin_lock(rsv_lock);
+		if (!rsv_is_empty(&my_rsv->rsv_window))
+			rsv_window_remove(sb, my_rsv);
+		spin_unlock(rsv_lock);
+		return -1;		/* failed */
+	}
+
+	start_block = first_free_block + group_first_block;
+	/*
+	 * check if the first free block is within the
+	 * free space we just reserved
+	 */
+	if (start_block >= my_rsv->rsv_start && start_block < my_rsv->rsv_end)
+		return 0;		/* success */
+	/*
+	 * if the first free bit we found is out of the reservable space
+	 * continue search for next reservable space,
+	 * start from where the free block is,
+	 * we also shift the list head to where we stopped last time
+	 */
+	search_head = my_rsv;
+	spin_lock(rsv_lock);
+	goto retry;
+}
+
+/**
+ * try_to_extend_reservation()
+ * @my_rsv:		given reservation window
+ * @sb:			super block
+ * @size:		the delta to extend
+ *
+ * Attempt to expand the reservation window large enough to have
+ * required number of free blocks
+ *
+ * Since ext3_try_to_allocate() will always allocate blocks within
+ * the reservation window range, if the window size is too small,
+ * multiple blocks allocation has to stop at the end of the reservation
+ * window. To make this more efficient, given the total number of
+ * blocks needed and the current size of the window, we try to
+ * expand the reservation window size if necessary on a best-effort
+ * basis before ext3_new_blocks() tries to allocate blocks,
+ */
+static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
+			struct super_block *sb, int size)
+{
+	struct ext3_reserve_window_node *next_rsv;
+	struct rb_node *next;
+	spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
+
+	if (!spin_trylock(rsv_lock))
+		return;
+
+	next = rb_next(&my_rsv->rsv_node);
+
+	if (!next)
+		my_rsv->rsv_end += size;
+	else {
+		next_rsv = list_entry(next, struct ext3_reserve_window_node, rsv_node);
+
+		if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size)
+			my_rsv->rsv_end += size;
+		else
+			my_rsv->rsv_end = next_rsv->rsv_start - 1;
+	}
+	spin_unlock(rsv_lock);
+}
+
+/**
+ * ext3_try_to_allocate_with_rsv()
+ * @sb:			superblock
+ * @handle:		handle to this transaction
+ * @group:		given allocation block group
+ * @bitmap_bh:		bufferhead holds the block bitmap
+ * @grp_goal:		given target block within the group
+ * @count:		target number of blocks to allocate
+ * @my_rsv:		reservation window
+ * @errp:		pointer to store the error code
+ *
+ * This is the main function used to allocate a new block and its reservation
+ * window.
+ *
+ * Each time when a new block allocation is need, first try to allocate from
+ * its own reservation.  If it does not have a reservation window, instead of
+ * looking for a free bit on bitmap first, then look up the reservation list to
+ * see if it is inside somebody else's reservation window, we try to allocate a
+ * reservation window for it starting from the goal first. Then do the block
+ * allocation within the reservation window.
+ *
+ * This will avoid keeping on searching the reservation list again and
+ * again when somebody is looking for a free block (without
+ * reservation), and there are lots of free blocks, but they are all
+ * being reserved.
+ *
+ * We use a red-black tree for the per-filesystem reservation list.
+ *
+ */
+static ext3_grpblk_t
+ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
+			unsigned int group, struct buffer_head *bitmap_bh,
+			ext3_grpblk_t grp_goal,
+			struct ext3_reserve_window_node * my_rsv,
+			unsigned long *count, int *errp)
+{
+	ext3_fsblk_t group_first_block, group_last_block;
+	ext3_grpblk_t ret = 0;
+	int fatal;
+	unsigned long num = *count;
+
+	*errp = 0;
+
+	/*
+	 * Make sure we use undo access for the bitmap, because it is critical
+	 * that we do the frozen_data COW on bitmap buffers in all cases even
+	 * if the buffer is in BJ_Forget state in the committing transaction.
+	 */
+	BUFFER_TRACE(bitmap_bh, "get undo access for new block");
+	fatal = ext3_journal_get_undo_access(handle, bitmap_bh);
+	if (fatal) {
+		*errp = fatal;
+		return -1;
+	}
+
+	/*
+	 * we don't deal with reservation when
+	 * filesystem is mounted without reservation
+	 * or the file is not a regular file
+	 * or last attempt to allocate a block with reservation turned on failed
+	 */
+	if (my_rsv == NULL ) {
+		ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
+						grp_goal, count, NULL);
+		goto out;
+	}
+	/*
+	 * grp_goal is a group relative block number (if there is a goal)
+	 * 0 < grp_goal < EXT3_BLOCKS_PER_GROUP(sb)
+	 * first block is a filesystem wide block number
+	 * first block is the block number of the first block in this group
+	 */
+	group_first_block = ext3_group_first_block_no(sb, group);
+	group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
+
+	/*
+	 * Basically we will allocate a new block from inode's reservation
+	 * window.
+	 *
+	 * We need to allocate a new reservation window, if:
+	 * a) inode does not have a reservation window; or
+	 * b) last attempt to allocate a block from existing reservation
+	 *    failed; or
+	 * c) we come here with a goal and with a reservation window
+	 *
+	 * We do not need to allocate a new reservation window if we come here
+	 * at the beginning with a goal and the goal is inside the window, or
+	 * we don't have a goal but already have a reservation window.
+	 * then we could go to allocate from the reservation window directly.
+	 */
+	while (1) {
+		if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
+			!goal_in_my_reservation(&my_rsv->rsv_window,
+						grp_goal, group, sb)) {
+			if (my_rsv->rsv_goal_size < *count)
+				my_rsv->rsv_goal_size = *count;
+			ret = alloc_new_reservation(my_rsv, grp_goal, sb,
+							group, bitmap_bh);
+			if (ret < 0)
+				break;			/* failed */
+
+			if (!goal_in_my_reservation(&my_rsv->rsv_window,
+							grp_goal, group, sb))
+				grp_goal = -1;
+		} else if (grp_goal > 0 &&
+			  (my_rsv->rsv_end-grp_goal+1) < *count)
+			try_to_extend_reservation(my_rsv, sb,
+					*count-my_rsv->rsv_end + grp_goal - 1);
+
+		if ((my_rsv->rsv_start > group_last_block) ||
+				(my_rsv->rsv_end < group_first_block)) {
+			rsv_window_dump(&EXT3_SB(sb)->s_rsv_window_root, 1);
+			BUG();
+		}
+		ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
+					   grp_goal, &num, &my_rsv->rsv_window);
+		if (ret >= 0) {
+			my_rsv->rsv_alloc_hit += num;
+			*count = num;
+			break;				/* succeed */
+		}
+		num = *count;
+	}
+out:
+	if (ret >= 0) {
+		BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for "
+					"bitmap block");
+		fatal = ext3_journal_dirty_metadata(handle, bitmap_bh);
+		if (fatal) {
+			*errp = fatal;
+			return -1;
+		}
+		return ret;
+	}
+
+	BUFFER_TRACE(bitmap_bh, "journal_release_buffer");
+	ext3_journal_release_buffer(handle, bitmap_bh);
+	return ret;
+}
+
+/**
+ * ext3_has_free_blocks()
+ * @sbi:		in-core super block structure.
+ *
+ * Check if filesystem has at least 1 free block available for allocation.
+ */
+static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
+{
+	ext3_fsblk_t free_blocks, root_blocks;
+
+	free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
+	root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
+	if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
+		sbi->s_resuid != current->fsuid &&
+		(sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
+		return 0;
+	}
+	return 1;
+}
+
+/**
+ * ext3_should_retry_alloc()
+ * @sb:			super block
+ * @retries		number of attemps has been made
+ *
+ * ext3_should_retry_alloc() is called when ENOSPC is returned, and if
+ * it is profitable to retry the operation, this function will wait
+ * for the current or commiting transaction to complete, and then
+ * return TRUE.
+ *
+ * if the total number of retries exceed three times, return FALSE.
+ */
+int ext3_should_retry_alloc(struct super_block *sb, int *retries)
+{
+	if (!ext3_has_free_blocks(EXT3_SB(sb)) || (*retries)++ > 3)
+		return 0;
+
+	jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
+
+	return journal_force_commit_nested(EXT3_SB(sb)->s_journal);
+}
+
+/**
+ * ext3_new_blocks() -- core block(s) allocation function
+ * @handle:		handle to this transaction
+ * @inode:		file inode
+ * @goal:		given target block(filesystem wide)
+ * @count:		target number of blocks to allocate
+ * @errp:		error code
+ *
+ * ext3_new_blocks uses a goal block to assist allocation.  It tries to
+ * allocate block(s) from the block group contains the goal block first. If that
+ * fails, it will try to allocate block(s) from other block groups without
+ * any specific goal block.
+ *
+ */
+ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
+			ext3_fsblk_t goal, unsigned long *count, int *errp)
+{
+	struct buffer_head *bitmap_bh = NULL;
+	struct buffer_head *gdp_bh;
+	int group_no;
+	int goal_group;
+	ext3_grpblk_t grp_target_blk;	/* blockgroup relative goal block */
+	ext3_grpblk_t grp_alloc_blk;	/* blockgroup-relative allocated block*/
+	ext3_fsblk_t ret_block;		/* filesyetem-wide allocated block */
+	int bgi;			/* blockgroup iteration index */
+	int fatal = 0, err;
+	int performed_allocation = 0;
+	ext3_grpblk_t free_blocks;	/* number of free blocks in a group */
+	struct super_block *sb;
+	struct ext3_group_desc *gdp;
+	struct ext3_super_block *es;
+	struct ext3_sb_info *sbi;
+	struct ext3_reserve_window_node *my_rsv = NULL;
+	struct ext3_block_alloc_info *block_i;
+	unsigned short windowsz = 0;
+#ifdef EXT3FS_DEBUG
+	static int goal_hits, goal_attempts;
+#endif
+	unsigned long ngroups;
+	unsigned long num = *count;
+
+	*errp = -ENOSPC;
+	sb = inode->i_sb;
+	if (!sb) {
+		printk("ext3_new_block: nonexistent device");
+		return 0;
+	}
+
+	/*
+	 * Check quota for allocation of this block.
+	 */
+	if (DQUOT_ALLOC_BLOCK(inode, num)) {
+		*errp = -EDQUOT;
+		return 0;
+	}
+
+	sbi = EXT3_SB(sb);
+	es = EXT3_SB(sb)->s_es;
+	ext3_debug("goal=%lu.\n", goal);
+	/*
+	 * Allocate a block from reservation only when
+	 * filesystem is mounted with reservation(default,-o reservation), and
+	 * it's a regular file, and
+	 * the desired window size is greater than 0 (One could use ioctl
+	 * command EXT3_IOC_SETRSVSZ to set the window size to 0 to turn off
+	 * reservation on that particular file)
+	 */
+	block_i = EXT3_I(inode)->i_block_alloc_info;
+	if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
+		my_rsv = &block_i->rsv_window_node;
+
+	if (!ext3_has_free_blocks(sbi)) {
+		*errp = -ENOSPC;
+		goto out;
+	}
+
+	/*
+	 * First, test whether the goal block is free.
+	 */
+	if (goal < le32_to_cpu(es->s_first_data_block) ||
+	    goal >= le32_to_cpu(es->s_blocks_count))
+		goal = le32_to_cpu(es->s_first_data_block);
+	group_no = (goal - le32_to_cpu(es->s_first_data_block)) /
+			EXT3_BLOCKS_PER_GROUP(sb);
+	goal_group = group_no;
+retry_alloc:
+	gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
+	if (!gdp)
+		goto io_error;
+
+	free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
+	/*
+	 * if there is not enough free blocks to make a new resevation
+	 * turn off reservation for this allocation
+	 */
+	if (my_rsv && (free_blocks < windowsz)
+		&& (rsv_is_empty(&my_rsv->rsv_window)))
+		my_rsv = NULL;
+
+	if (free_blocks > 0) {
+		grp_target_blk = ((goal - le32_to_cpu(es->s_first_data_block)) %
+				EXT3_BLOCKS_PER_GROUP(sb));
+		bitmap_bh = read_block_bitmap(sb, group_no);
+		if (!bitmap_bh)
+			goto io_error;
+		grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle,
+					group_no, bitmap_bh, grp_target_blk,
+					my_rsv,	&num, &fatal);
+		if (fatal)
+			goto out;
+		if (grp_alloc_blk >= 0)
+			goto allocated;
+	}
+
+	ngroups = EXT3_SB(sb)->s_groups_count;
+	smp_rmb();
+
+	/*
+	 * Now search the rest of the groups.  We assume that
+	 * i and gdp correctly point to the last group visited.
+	 */
+	for (bgi = 0; bgi < ngroups; bgi++) {
+		group_no++;
+		if (group_no >= ngroups)
+			group_no = 0;
+		gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
+		if (!gdp) {
+			*errp = -EIO;
+			goto out;
+		}
+		free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
+		/*
+		 * skip this group if the number of
+		 * free blocks is less than half of the reservation
+		 * window size.
+		 */
+		if (free_blocks <= (windowsz/2))
+			continue;
+
+		brelse(bitmap_bh);
+		bitmap_bh = read_block_bitmap(sb, group_no);
+		if (!bitmap_bh)
+			goto io_error;
+		/*
+		 * try to allocate block(s) from this group, without a goal(-1).
+		 */
+		grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle,
+					group_no, bitmap_bh, -1, my_rsv,
+					&num, &fatal);
+		if (fatal)
+			goto out;
+		if (grp_alloc_blk >= 0)
+			goto allocated;
+	}
+	/*
+	 * We may end up a bogus ealier ENOSPC error due to
+	 * filesystem is "full" of reservations, but
+	 * there maybe indeed free blocks avaliable on disk
+	 * In this case, we just forget about the reservations
+	 * just do block allocation as without reservations.
+	 */
+	if (my_rsv) {
+		my_rsv = NULL;
+		group_no = goal_group;
+		goto retry_alloc;
+	}
+	/* No space left on the device */
+	*errp = -ENOSPC;
+	goto out;
+
+allocated:
+
+	ext3_debug("using block group %d(%d)\n",
+			group_no, gdp->bg_free_blocks_count);
+
+	BUFFER_TRACE(gdp_bh, "get_write_access");
+	fatal = ext3_journal_get_write_access(handle, gdp_bh);
+	if (fatal)
+		goto out;
+
+	ret_block = grp_alloc_blk + ext3_group_first_block_no(sb, group_no);
+
+	if (in_range(le32_to_cpu(gdp->bg_block_bitmap), ret_block, num) ||
+	    in_range(le32_to_cpu(gdp->bg_inode_bitmap), ret_block, num) ||
+	    in_range(ret_block, le32_to_cpu(gdp->bg_inode_table),
+		      EXT3_SB(sb)->s_itb_per_group) ||
+	    in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table),
+		      EXT3_SB(sb)->s_itb_per_group))
+		ext3_error(sb, "ext3_new_block",
+			    "Allocating block in system zone - "
+			    "blocks from "E3FSBLK", length %lu",
+			     ret_block, num);
+
+	performed_allocation = 1;
+
+#ifdef CONFIG_JBD_DEBUG
+	{
+		struct buffer_head *debug_bh;
+
+		/* Record bitmap buffer state in the newly allocated block */
+		debug_bh = sb_find_get_block(sb, ret_block);
+		if (debug_bh) {
+			BUFFER_TRACE(debug_bh, "state when allocated");
+			BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state");
+			brelse(debug_bh);
+		}
+	}
+	jbd_lock_bh_state(bitmap_bh);
+	spin_lock(sb_bgl_lock(sbi, group_no));
+	if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) {
+		int i;
+
+		for (i = 0; i < num; i++) {
+			if (ext3_test_bit(grp_alloc_blk+i,
+					bh2jh(bitmap_bh)->b_committed_data)) {
+				printk("%s: block was unexpectedly set in "
+					"b_committed_data\n", __FUNCTION__);
+			}
+		}
+	}
+	ext3_debug("found bit %d\n", grp_alloc_blk);
+	spin_unlock(sb_bgl_lock(sbi, group_no));
+	jbd_unlock_bh_state(bitmap_bh);
+#endif
+
+	if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) {
+		ext3_error(sb, "ext3_new_block",
+			    "block("E3FSBLK") >= blocks count(%d) - "
+			    "block_group = %d, es == %p ", ret_block,
+			le32_to_cpu(es->s_blocks_count), group_no, es);
+		goto out;
+	}
+
+	/*
+	 * It is up to the caller to add the new buffer to a journal
+	 * list of some description.  We don't know in advance whether
+	 * the caller wants to use it as metadata or data.
+	 */
+	ext3_debug("allocating block %lu. Goal hits %d of %d.\n",
+			ret_block, goal_hits, goal_attempts);
+
+	spin_lock(sb_bgl_lock(sbi, group_no));
+	gdp->bg_free_blocks_count =
+			cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
+	spin_unlock(sb_bgl_lock(sbi, group_no));
+	percpu_counter_mod(&sbi->s_freeblocks_counter, -num);
+
+	BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
+	err = ext3_journal_dirty_metadata(handle, gdp_bh);
+	if (!fatal)
+		fatal = err;
+
+	sb->s_dirt = 1;
+	if (fatal)
+		goto out;
+
+	*errp = 0;
+	brelse(bitmap_bh);
+	DQUOT_FREE_BLOCK(inode, *count-num);
+	*count = num;
+	return ret_block;
+
+io_error:
+	*errp = -EIO;
+out:
+	if (fatal) {
+		*errp = fatal;
+		ext3_std_error(sb, fatal);
+	}
+	/*
+	 * Undo the block allocation
+	 */
+	if (!performed_allocation)
+		DQUOT_FREE_BLOCK(inode, *count);
+	brelse(bitmap_bh);
+	return 0;
+}
+
+ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode,
+			ext3_fsblk_t goal, int *errp)
+{
+	unsigned long count = 1;
+
+	return ext3_new_blocks(handle, inode, goal, &count, errp);
+}
+
+/**
+ * ext3_count_free_blocks() -- count filesystem free blocks
+ * @sb:		superblock
+ *
+ * Adds up the number of free blocks from each block group.
+ */
+ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb)
+{
+	ext3_fsblk_t desc_count;
+	struct ext3_group_desc *gdp;
+	int i;
+	unsigned long ngroups = EXT3_SB(sb)->s_groups_count;
+#ifdef EXT3FS_DEBUG
+	struct ext3_super_block *es;
+	ext3_fsblk_t bitmap_count;
+	unsigned long x;
+	struct buffer_head *bitmap_bh = NULL;
+
+	es = EXT3_SB(sb)->s_es;
+	desc_count = 0;
+	bitmap_count = 0;
+	gdp = NULL;
+
+	smp_rmb();
+	for (i = 0; i < ngroups; i++) {
+		gdp = ext3_get_group_desc(sb, i, NULL);
+		if (!gdp)
+			continue;
+		desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
+		brelse(bitmap_bh);
+		bitmap_bh = read_block_bitmap(sb, i);
+		if (bitmap_bh == NULL)
+			continue;
+
+		x = ext3_count_free(bitmap_bh, sb->s_blocksize);
+		printk("group %d: stored = %d, counted = %lu\n",
+			i, le16_to_cpu(gdp->bg_free_blocks_count), x);
+		bitmap_count += x;
+	}
+	brelse(bitmap_bh);
+	printk("ext3_count_free_blocks: stored = "E3FSBLK
+		", computed = "E3FSBLK", "E3FSBLK"\n",
+	       le32_to_cpu(es->s_free_blocks_count),
+		desc_count, bitmap_count);
+	return bitmap_count;
+#else
+	desc_count = 0;
+	smp_rmb();
+	for (i = 0; i < ngroups; i++) {
+		gdp = ext3_get_group_desc(sb, i, NULL);
+		if (!gdp)
+			continue;
+		desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
+	}
+
+	return desc_count;
+#endif
+}
+
+static inline int
+block_in_use(ext3_fsblk_t block, struct super_block *sb, unsigned char *map)
+{
+	return ext3_test_bit ((block -
+		le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) %
+			 EXT3_BLOCKS_PER_GROUP(sb), map);
+}
+
+static inline int test_root(int a, int b)
+{
+	int num = b;
+
+	while (a > num)
+		num *= b;
+	return num == a;
+}
+
+static int ext3_group_sparse(int group)
+{
+	if (group <= 1)
+		return 1;
+	if (!(group & 1))
+		return 0;
+	return (test_root(group, 7) || test_root(group, 5) ||
+		test_root(group, 3));
+}
+
+/**
+ *	ext3_bg_has_super - number of blocks used by the superblock in group
+ *	@sb: superblock for filesystem
+ *	@group: group number to check
+ *
+ *	Return the number of blocks used by the superblock (primary or backup)
+ *	in this group.  Currently this will be only 0 or 1.
+ */
+int ext3_bg_has_super(struct super_block *sb, int group)
+{
+	if (EXT3_HAS_RO_COMPAT_FEATURE(sb,
+				EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
+			!ext3_group_sparse(group))
+		return 0;
+	return 1;
+}
+
+static unsigned long ext3_bg_num_gdb_meta(struct super_block *sb, int group)
+{
+	unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb);
+	unsigned long first = metagroup * EXT3_DESC_PER_BLOCK(sb);
+	unsigned long last = first + EXT3_DESC_PER_BLOCK(sb) - 1;
+
+	if (group == first || group == first + 1 || group == last)
+		return 1;
+	return 0;
+}
+
+static unsigned long ext3_bg_num_gdb_nometa(struct super_block *sb, int group)
+{
+	if (EXT3_HAS_RO_COMPAT_FEATURE(sb,
+				EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
+			!ext3_group_sparse(group))
+		return 0;
+	return EXT3_SB(sb)->s_gdb_count;
+}
+
+/**
+ *	ext3_bg_num_gdb - number of blocks used by the group table in group
+ *	@sb: superblock for filesystem
+ *	@group: group number to check
+ *
+ *	Return the number of blocks used by the group descriptor table
+ *	(primary or backup) in this group.  In the future there may be a
+ *	different number of descriptor blocks in each group.
+ */
+unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
+{
+	unsigned long first_meta_bg =
+			le32_to_cpu(EXT3_SB(sb)->s_es->s_first_meta_bg);
+	unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb);
+
+	if (!EXT3_HAS_INCOMPAT_FEATURE(sb,EXT3_FEATURE_INCOMPAT_META_BG) ||
+			metagroup < first_meta_bg)
+		return ext3_bg_num_gdb_nometa(sb,group);
+
+	return ext3_bg_num_gdb_meta(sb,group);
+
+}
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
new file mode 100644
index 000000000000..b9176eed98d1
--- /dev/null
+++ b/fs/ext4/bitmap.c
@@ -0,0 +1,32 @@
+/*
+ *  linux/fs/ext3/bitmap.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ */
+
+#include <linux/buffer_head.h>
+#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
+
+#ifdef EXT3FS_DEBUG
+
+static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
+
+unsigned long ext3_count_free (struct buffer_head * map, unsigned int numchars)
+{
+	unsigned int i;
+	unsigned long sum = 0;
+
+	if (!map)
+		return (0);
+	for (i = 0; i < numchars; i++)
+		sum += nibblemap[map->b_data[i] & 0xf] +
+			nibblemap[(map->b_data[i] >> 4) & 0xf];
+	return (sum);
+}
+
+#endif  /*  EXT3FS_DEBUG  */
+
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
new file mode 100644
index 000000000000..d0b54f30b914
--- /dev/null
+++ b/fs/ext4/dir.c
@@ -0,0 +1,518 @@
+/*
+ *  linux/fs/ext3/dir.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  from
+ *
+ *  linux/fs/minix/dir.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  ext3 directory handling functions
+ *
+ *  Big-endian to little-endian byte-swapping/bitmaps by
+ *        David S. Miller (davem@caip.rutgers.edu), 1995
+ *
+ * Hash Tree Directory indexing (c) 2001  Daniel Phillips
+ *
+ */
+
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
+#include <linux/buffer_head.h>
+#include <linux/smp_lock.h>
+#include <linux/slab.h>
+#include <linux/rbtree.h>
+
+static unsigned char ext3_filetype_table[] = {
+	DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
+};
+
+static int ext3_readdir(struct file *, void *, filldir_t);
+static int ext3_dx_readdir(struct file * filp,
+			   void * dirent, filldir_t filldir);
+static int ext3_release_dir (struct inode * inode,
+				struct file * filp);
+
+const struct file_operations ext3_dir_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= generic_read_dir,
+	.readdir	= ext3_readdir,		/* we take BKL. needed?*/
+	.ioctl		= ext3_ioctl,		/* BKL held */
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= ext3_compat_ioctl,
+#endif
+	.fsync		= ext3_sync_file,	/* BKL held */
+#ifdef CONFIG_EXT3_INDEX
+	.release	= ext3_release_dir,
+#endif
+};
+
+
+static unsigned char get_dtype(struct super_block *sb, int filetype)
+{
+	if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) ||
+	    (filetype >= EXT3_FT_MAX))
+		return DT_UNKNOWN;
+
+	return (ext3_filetype_table[filetype]);
+}
+
+
+int ext3_check_dir_entry (const char * function, struct inode * dir,
+			  struct ext3_dir_entry_2 * de,
+			  struct buffer_head * bh,
+			  unsigned long offset)
+{
+	const char * error_msg = NULL;
+	const int rlen = le16_to_cpu(de->rec_len);
+
+	if (rlen < EXT3_DIR_REC_LEN(1))
+		error_msg = "rec_len is smaller than minimal";
+	else if (rlen % 4 != 0)
+		error_msg = "rec_len % 4 != 0";
+	else if (rlen < EXT3_DIR_REC_LEN(de->name_len))
+		error_msg = "rec_len is too small for name_len";
+	else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
+		error_msg = "directory entry across blocks";
+	else if (le32_to_cpu(de->inode) >
+			le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count))
+		error_msg = "inode out of bounds";
+
+	if (error_msg != NULL)
+		ext3_error (dir->i_sb, function,
+			"bad entry in directory #%lu: %s - "
+			"offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
+			dir->i_ino, error_msg, offset,
+			(unsigned long) le32_to_cpu(de->inode),
+			rlen, de->name_len);
+	return error_msg == NULL ? 1 : 0;
+}
+
+static int ext3_readdir(struct file * filp,
+			 void * dirent, filldir_t filldir)
+{
+	int error = 0;
+	unsigned long offset;
+	int i, stored;
+	struct ext3_dir_entry_2 *de;
+	struct super_block *sb;
+	int err;
+	struct inode *inode = filp->f_dentry->d_inode;
+	int ret = 0;
+
+	sb = inode->i_sb;
+
+#ifdef CONFIG_EXT3_INDEX
+	if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
+				    EXT3_FEATURE_COMPAT_DIR_INDEX) &&
+	    ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) ||
+	     ((inode->i_size >> sb->s_blocksize_bits) == 1))) {
+		err = ext3_dx_readdir(filp, dirent, filldir);
+		if (err != ERR_BAD_DX_DIR) {
+			ret = err;
+			goto out;
+		}
+		/*
+		 * We don't set the inode dirty flag since it's not
+		 * critical that it get flushed back to the disk.
+		 */
+		EXT3_I(filp->f_dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL;
+	}
+#endif
+	stored = 0;
+	offset = filp->f_pos & (sb->s_blocksize - 1);
+
+	while (!error && !stored && filp->f_pos < inode->i_size) {
+		unsigned long blk = filp->f_pos >> EXT3_BLOCK_SIZE_BITS(sb);
+		struct buffer_head map_bh;
+		struct buffer_head *bh = NULL;
+
+		map_bh.b_state = 0;
+		err = ext3_get_blocks_handle(NULL, inode, blk, 1,
+						&map_bh, 0, 0);
+		if (err > 0) {
+			page_cache_readahead(sb->s_bdev->bd_inode->i_mapping,
+				&filp->f_ra,
+				filp,
+				map_bh.b_blocknr >>
+					(PAGE_CACHE_SHIFT - inode->i_blkbits),
+				1);
+			bh = ext3_bread(NULL, inode, blk, 0, &err);
+		}
+
+		/*
+		 * We ignore I/O errors on directories so users have a chance
+		 * of recovering data when there's a bad sector
+		 */
+		if (!bh) {
+			ext3_error (sb, "ext3_readdir",
+				"directory #%lu contains a hole at offset %lu",
+				inode->i_ino, (unsigned long)filp->f_pos);
+			filp->f_pos += sb->s_blocksize - offset;
+			continue;
+		}
+
+revalidate:
+		/* If the dir block has changed since the last call to
+		 * readdir(2), then we might be pointing to an invalid
+		 * dirent right now.  Scan from the start of the block
+		 * to make sure. */
+		if (filp->f_version != inode->i_version) {
+			for (i = 0; i < sb->s_blocksize && i < offset; ) {
+				de = (struct ext3_dir_entry_2 *)
+					(bh->b_data + i);
+				/* It's too expensive to do a full
+				 * dirent test each time round this
+				 * loop, but we do have to test at
+				 * least that it is non-zero.  A
+				 * failure will be detected in the
+				 * dirent test below. */
+				if (le16_to_cpu(de->rec_len) <
+						EXT3_DIR_REC_LEN(1))
+					break;
+				i += le16_to_cpu(de->rec_len);
+			}
+			offset = i;
+			filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1))
+				| offset;
+			filp->f_version = inode->i_version;
+		}
+
+		while (!error && filp->f_pos < inode->i_size
+		       && offset < sb->s_blocksize) {
+			de = (struct ext3_dir_entry_2 *) (bh->b_data + offset);
+			if (!ext3_check_dir_entry ("ext3_readdir", inode, de,
+						   bh, offset)) {
+				/* On error, skip the f_pos to the
+                                   next block. */
+				filp->f_pos = (filp->f_pos |
+						(sb->s_blocksize - 1)) + 1;
+				brelse (bh);
+				ret = stored;
+				goto out;
+			}
+			offset += le16_to_cpu(de->rec_len);
+			if (le32_to_cpu(de->inode)) {
+				/* We might block in the next section
+				 * if the data destination is
+				 * currently swapped out.  So, use a
+				 * version stamp to detect whether or
+				 * not the directory has been modified
+				 * during the copy operation.
+				 */
+				unsigned long version = filp->f_version;
+
+				error = filldir(dirent, de->name,
+						de->name_len,
+						filp->f_pos,
+						le32_to_cpu(de->inode),
+						get_dtype(sb, de->file_type));
+				if (error)
+					break;
+				if (version != filp->f_version)
+					goto revalidate;
+				stored ++;
+			}
+			filp->f_pos += le16_to_cpu(de->rec_len);
+		}
+		offset = 0;
+		brelse (bh);
+	}
+out:
+	return ret;
+}
+
+#ifdef CONFIG_EXT3_INDEX
+/*
+ * These functions convert from the major/minor hash to an f_pos
+ * value.
+ *
+ * Currently we only use major hash numer.  This is unfortunate, but
+ * on 32-bit machines, the same VFS interface is used for lseek and
+ * llseek, so if we use the 64 bit offset, then the 32-bit versions of
+ * lseek/telldir/seekdir will blow out spectacularly, and from within
+ * the ext2 low-level routine, we don't know if we're being called by
+ * a 64-bit version of the system call or the 32-bit version of the
+ * system call.  Worse yet, NFSv2 only allows for a 32-bit readdir
+ * cookie.  Sigh.
+ */
+#define hash2pos(major, minor)	(major >> 1)
+#define pos2maj_hash(pos)	((pos << 1) & 0xffffffff)
+#define pos2min_hash(pos)	(0)
+
+/*
+ * This structure holds the nodes of the red-black tree used to store
+ * the directory entry in hash order.
+ */
+struct fname {
+	__u32		hash;
+	__u32		minor_hash;
+	struct rb_node	rb_hash;
+	struct fname	*next;
+	__u32		inode;
+	__u8		name_len;
+	__u8		file_type;
+	char		name[0];
+};
+
+/*
+ * This functoin implements a non-recursive way of freeing all of the
+ * nodes in the red-black tree.
+ */
+static void free_rb_tree_fname(struct rb_root *root)
+{
+	struct rb_node	*n = root->rb_node;
+	struct rb_node	*parent;
+	struct fname	*fname;
+
+	while (n) {
+		/* Do the node's children first */
+		if ((n)->rb_left) {
+			n = n->rb_left;
+			continue;
+		}
+		if (n->rb_right) {
+			n = n->rb_right;
+			continue;
+		}
+		/*
+		 * The node has no children; free it, and then zero
+		 * out parent's link to it.  Finally go to the
+		 * beginning of the loop and try to free the parent
+		 * node.
+		 */
+		parent = rb_parent(n);
+		fname = rb_entry(n, struct fname, rb_hash);
+		while (fname) {
+			struct fname * old = fname;
+			fname = fname->next;
+			kfree (old);
+		}
+		if (!parent)
+			root->rb_node = NULL;
+		else if (parent->rb_left == n)
+			parent->rb_left = NULL;
+		else if (parent->rb_right == n)
+			parent->rb_right = NULL;
+		n = parent;
+	}
+	root->rb_node = NULL;
+}
+
+
+static struct dir_private_info *create_dir_info(loff_t pos)
+{
+	struct dir_private_info *p;
+
+	p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL);
+	if (!p)
+		return NULL;
+	p->root.rb_node = NULL;
+	p->curr_node = NULL;
+	p->extra_fname = NULL;
+	p->last_pos = 0;
+	p->curr_hash = pos2maj_hash(pos);
+	p->curr_minor_hash = pos2min_hash(pos);
+	p->next_hash = 0;
+	return p;
+}
+
+void ext3_htree_free_dir_info(struct dir_private_info *p)
+{
+	free_rb_tree_fname(&p->root);
+	kfree(p);
+}
+
+/*
+ * Given a directory entry, enter it into the fname rb tree.
+ */
+int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
+			     __u32 minor_hash,
+			     struct ext3_dir_entry_2 *dirent)
+{
+	struct rb_node **p, *parent = NULL;
+	struct fname * fname, *new_fn;
+	struct dir_private_info *info;
+	int len;
+
+	info = (struct dir_private_info *) dir_file->private_data;
+	p = &info->root.rb_node;
+
+	/* Create and allocate the fname structure */
+	len = sizeof(struct fname) + dirent->name_len + 1;
+	new_fn = kzalloc(len, GFP_KERNEL);
+	if (!new_fn)
+		return -ENOMEM;
+	new_fn->hash = hash;
+	new_fn->minor_hash = minor_hash;
+	new_fn->inode = le32_to_cpu(dirent->inode);
+	new_fn->name_len = dirent->name_len;
+	new_fn->file_type = dirent->file_type;
+	memcpy(new_fn->name, dirent->name, dirent->name_len);
+	new_fn->name[dirent->name_len] = 0;
+
+	while (*p) {
+		parent = *p;
+		fname = rb_entry(parent, struct fname, rb_hash);
+
+		/*
+		 * If the hash and minor hash match up, then we put
+		 * them on a linked list.  This rarely happens...
+		 */
+		if ((new_fn->hash == fname->hash) &&
+		    (new_fn->minor_hash == fname->minor_hash)) {
+			new_fn->next = fname->next;
+			fname->next = new_fn;
+			return 0;
+		}
+
+		if (new_fn->hash < fname->hash)
+			p = &(*p)->rb_left;
+		else if (new_fn->hash > fname->hash)
+			p = &(*p)->rb_right;
+		else if (new_fn->minor_hash < fname->minor_hash)
+			p = &(*p)->rb_left;
+		else /* if (new_fn->minor_hash > fname->minor_hash) */
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&new_fn->rb_hash, parent, p);
+	rb_insert_color(&new_fn->rb_hash, &info->root);
+	return 0;
+}
+
+
+
+/*
+ * This is a helper function for ext3_dx_readdir.  It calls filldir
+ * for all entres on the fname linked list.  (Normally there is only
+ * one entry on the linked list, unless there are 62 bit hash collisions.)
+ */
+static int call_filldir(struct file * filp, void * dirent,
+			filldir_t filldir, struct fname *fname)
+{
+	struct dir_private_info *info = filp->private_data;
+	loff_t	curr_pos;
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct super_block * sb;
+	int error;
+
+	sb = inode->i_sb;
+
+	if (!fname) {
+		printk("call_filldir: called with null fname?!?\n");
+		return 0;
+	}
+	curr_pos = hash2pos(fname->hash, fname->minor_hash);
+	while (fname) {
+		error = filldir(dirent, fname->name,
+				fname->name_len, curr_pos,
+				fname->inode,
+				get_dtype(sb, fname->file_type));
+		if (error) {
+			filp->f_pos = curr_pos;
+			info->extra_fname = fname->next;
+			return error;
+		}
+		fname = fname->next;
+	}
+	return 0;
+}
+
+static int ext3_dx_readdir(struct file * filp,
+			 void * dirent, filldir_t filldir)
+{
+	struct dir_private_info *info = filp->private_data;
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct fname *fname;
+	int	ret;
+
+	if (!info) {
+		info = create_dir_info(filp->f_pos);
+		if (!info)
+			return -ENOMEM;
+		filp->private_data = info;
+	}
+
+	if (filp->f_pos == EXT3_HTREE_EOF)
+		return 0;	/* EOF */
+
+	/* Some one has messed with f_pos; reset the world */
+	if (info->last_pos != filp->f_pos) {
+		free_rb_tree_fname(&info->root);
+		info->curr_node = NULL;
+		info->extra_fname = NULL;
+		info->curr_hash = pos2maj_hash(filp->f_pos);
+		info->curr_minor_hash = pos2min_hash(filp->f_pos);
+	}
+
+	/*
+	 * If there are any leftover names on the hash collision
+	 * chain, return them first.
+	 */
+	if (info->extra_fname &&
+	    call_filldir(filp, dirent, filldir, info->extra_fname))
+		goto finished;
+
+	if (!info->curr_node)
+		info->curr_node = rb_first(&info->root);
+
+	while (1) {
+		/*
+		 * Fill the rbtree if we have no more entries,
+		 * or the inode has changed since we last read in the
+		 * cached entries.
+		 */
+		if ((!info->curr_node) ||
+		    (filp->f_version != inode->i_version)) {
+			info->curr_node = NULL;
+			free_rb_tree_fname(&info->root);
+			filp->f_version = inode->i_version;
+			ret = ext3_htree_fill_tree(filp, info->curr_hash,
+						   info->curr_minor_hash,
+						   &info->next_hash);
+			if (ret < 0)
+				return ret;
+			if (ret == 0) {
+				filp->f_pos = EXT3_HTREE_EOF;
+				break;
+			}
+			info->curr_node = rb_first(&info->root);
+		}
+
+		fname = rb_entry(info->curr_node, struct fname, rb_hash);
+		info->curr_hash = fname->hash;
+		info->curr_minor_hash = fname->minor_hash;
+		if (call_filldir(filp, dirent, filldir, fname))
+			break;
+
+		info->curr_node = rb_next(info->curr_node);
+		if (!info->curr_node) {
+			if (info->next_hash == ~0) {
+				filp->f_pos = EXT3_HTREE_EOF;
+				break;
+			}
+			info->curr_hash = info->next_hash;
+			info->curr_minor_hash = 0;
+		}
+	}
+finished:
+	info->last_pos = filp->f_pos;
+	return 0;
+}
+
+static int ext3_release_dir (struct inode * inode, struct file * filp)
+{
+       if (filp->private_data)
+		ext3_htree_free_dir_info(filp->private_data);
+
+	return 0;
+}
+
+#endif
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
new file mode 100644
index 000000000000..e96c388047e0
--- /dev/null
+++ b/fs/ext4/file.c
@@ -0,0 +1,139 @@
+/*
+ *  linux/fs/ext3/file.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  from
+ *
+ *  linux/fs/minix/file.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  ext3 fs regular file handling primitives
+ *
+ *  64-bit file support on 64-bit platforms by Jakub Jelinek
+ *	(jj@sunsite.ms.mff.cuni.cz)
+ */
+
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
+#include <linux/ext3_jbd.h>
+#include "xattr.h"
+#include "acl.h"
+
+/*
+ * Called when an inode is released. Note that this is different
+ * from ext3_file_open: open gets called at every open, but release
+ * gets called only when /all/ the files are closed.
+ */
+static int ext3_release_file (struct inode * inode, struct file * filp)
+{
+	/* if we are the last writer on the inode, drop the block reservation */
+	if ((filp->f_mode & FMODE_WRITE) &&
+			(atomic_read(&inode->i_writecount) == 1))
+	{
+		mutex_lock(&EXT3_I(inode)->truncate_mutex);
+		ext3_discard_reservation(inode);
+		mutex_unlock(&EXT3_I(inode)->truncate_mutex);
+	}
+	if (is_dx(inode) && filp->private_data)
+		ext3_htree_free_dir_info(filp->private_data);
+
+	return 0;
+}
+
+static ssize_t
+ext3_file_write(struct kiocb *iocb, const struct iovec *iov,
+		unsigned long nr_segs, loff_t pos)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file->f_dentry->d_inode;
+	ssize_t ret;
+	int err;
+
+	ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
+
+	/*
+	 * Skip flushing if there was an error, or if nothing was written.
+	 */
+	if (ret <= 0)
+		return ret;
+
+	/*
+	 * If the inode is IS_SYNC, or is O_SYNC and we are doing data
+	 * journalling then we need to make sure that we force the transaction
+	 * to disk to keep all metadata uptodate synchronously.
+	 */
+	if (file->f_flags & O_SYNC) {
+		/*
+		 * If we are non-data-journaled, then the dirty data has
+		 * already been flushed to backing store by generic_osync_inode,
+		 * and the inode has been flushed too if there have been any
+		 * modifications other than mere timestamp updates.
+		 *
+		 * Open question --- do we care about flushing timestamps too
+		 * if the inode is IS_SYNC?
+		 */
+		if (!ext3_should_journal_data(inode))
+			return ret;
+
+		goto force_commit;
+	}
+
+	/*
+	 * So we know that there has been no forced data flush.  If the inode
+	 * is marked IS_SYNC, we need to force one ourselves.
+	 */
+	if (!IS_SYNC(inode))
+		return ret;
+
+	/*
+	 * Open question #2 --- should we force data to disk here too?  If we
+	 * don't, the only impact is that data=writeback filesystems won't
+	 * flush data to disk automatically on IS_SYNC, only metadata (but
+	 * historically, that is what ext2 has done.)
+	 */
+
+force_commit:
+	err = ext3_force_commit(inode->i_sb);
+	if (err)
+		return err;
+	return ret;
+}
+
+const struct file_operations ext3_file_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= do_sync_read,
+	.write		= do_sync_write,
+	.aio_read	= generic_file_aio_read,
+	.aio_write	= ext3_file_write,
+	.ioctl		= ext3_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= ext3_compat_ioctl,
+#endif
+	.mmap		= generic_file_mmap,
+	.open		= generic_file_open,
+	.release	= ext3_release_file,
+	.fsync		= ext3_sync_file,
+	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
+	.splice_write	= generic_file_splice_write,
+};
+
+struct inode_operations ext3_file_inode_operations = {
+	.truncate	= ext3_truncate,
+	.setattr	= ext3_setattr,
+#ifdef CONFIG_EXT3_FS_XATTR
+	.setxattr	= generic_setxattr,
+	.getxattr	= generic_getxattr,
+	.listxattr	= ext3_listxattr,
+	.removexattr	= generic_removexattr,
+#endif
+	.permission	= ext3_permission,
+};
+
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
new file mode 100644
index 000000000000..dd1fd3c0fc05
--- /dev/null
+++ b/fs/ext4/fsync.c
@@ -0,0 +1,88 @@
+/*
+ *  linux/fs/ext3/fsync.c
+ *
+ *  Copyright (C) 1993  Stephen Tweedie (sct@redhat.com)
+ *  from
+ *  Copyright (C) 1992  Remy Card (card@masi.ibp.fr)
+ *                      Laboratoire MASI - Institut Blaise Pascal
+ *                      Universite Pierre et Marie Curie (Paris VI)
+ *  from
+ *  linux/fs/minix/truncate.c   Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  ext3fs fsync primitive
+ *
+ *  Big-endian to little-endian byte-swapping/bitmaps by
+ *        David S. Miller (davem@caip.rutgers.edu), 1995
+ *
+ *  Removed unnecessary code duplication for little endian machines
+ *  and excessive __inline__s.
+ *        Andi Kleen, 1997
+ *
+ * Major simplications and cleanup - we only need to do the metadata, because
+ * we can depend on generic_block_fdatasync() to sync the data blocks.
+ */
+
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/writeback.h>
+#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
+#include <linux/ext3_jbd.h>
+
+/*
+ * akpm: A new design for ext3_sync_file().
+ *
+ * This is only called from sys_fsync(), sys_fdatasync() and sys_msync().
+ * There cannot be a transaction open by this task.
+ * Another task could have dirtied this inode.  Its data can be in any
+ * state in the journalling system.
+ *
+ * What we do is just kick off a commit and wait on it.  This will snapshot the
+ * inode to disk.
+ */
+
+int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
+{
+	struct inode *inode = dentry->d_inode;
+	int ret = 0;
+
+	J_ASSERT(ext3_journal_current_handle() == 0);
+
+	/*
+	 * data=writeback:
+	 *  The caller's filemap_fdatawrite()/wait will sync the data.
+	 *  sync_inode() will sync the metadata
+	 *
+	 * data=ordered:
+	 *  The caller's filemap_fdatawrite() will write the data and
+	 *  sync_inode() will write the inode if it is dirty.  Then the caller's
+	 *  filemap_fdatawait() will wait on the pages.
+	 *
+	 * data=journal:
+	 *  filemap_fdatawrite won't do anything (the buffers are clean).
+	 *  ext3_force_commit will write the file data into the journal and
+	 *  will wait on that.
+	 *  filemap_fdatawait() will encounter a ton of newly-dirtied pages
+	 *  (they were dirtied by commit).  But that's OK - the blocks are
+	 *  safe in-journal, which is all fsync() needs to ensure.
+	 */
+	if (ext3_should_journal_data(inode)) {
+		ret = ext3_force_commit(inode->i_sb);
+		goto out;
+	}
+
+	/*
+	 * The VFS has written the file data.  If the inode is unaltered
+	 * then we need not start a commit.
+	 */
+	if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) {
+		struct writeback_control wbc = {
+			.sync_mode = WB_SYNC_ALL,
+			.nr_to_write = 0, /* sys_fsync did this */
+		};
+		ret = sync_inode(inode, &wbc);
+	}
+out:
+	return ret;
+}
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
new file mode 100644
index 000000000000..deeb27b5ba83
--- /dev/null
+++ b/fs/ext4/hash.c
@@ -0,0 +1,152 @@
+/*
+ *  linux/fs/ext3/hash.c
+ *
+ * Copyright (C) 2002 by Theodore Ts'o
+ *
+ * This file is released under the GPL v2.
+ *
+ * This file may be redistributed under the terms of the GNU Public
+ * License.
+ */
+
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/sched.h>
+#include <linux/ext3_fs.h>
+#include <linux/cryptohash.h>
+
+#define DELTA 0x9E3779B9
+
+static void TEA_transform(__u32 buf[4], __u32 const in[])
+{
+	__u32	sum = 0;
+	__u32	b0 = buf[0], b1 = buf[1];
+	__u32	a = in[0], b = in[1], c = in[2], d = in[3];
+	int	n = 16;
+
+	do {
+		sum += DELTA;
+		b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
+		b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
+	} while(--n);
+
+	buf[0] += b0;
+	buf[1] += b1;
+}
+
+
+/* The old legacy hash */
+static __u32 dx_hack_hash (const char *name, int len)
+{
+	__u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
+	while (len--) {
+		__u32 hash = hash1 + (hash0 ^ (*name++ * 7152373));
+
+		if (hash & 0x80000000) hash -= 0x7fffffff;
+		hash1 = hash0;
+		hash0 = hash;
+	}
+	return (hash0 << 1);
+}
+
+static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
+{
+	__u32	pad, val;
+	int	i;
+
+	pad = (__u32)len | ((__u32)len << 8);
+	pad |= pad << 16;
+
+	val = pad;
+	if (len > num*4)
+		len = num * 4;
+	for (i=0; i < len; i++) {
+		if ((i % 4) == 0)
+			val = pad;
+		val = msg[i] + (val << 8);
+		if ((i % 4) == 3) {
+			*buf++ = val;
+			val = pad;
+			num--;
+		}
+	}
+	if (--num >= 0)
+		*buf++ = val;
+	while (--num >= 0)
+		*buf++ = pad;
+}
+
+/*
+ * Returns the hash of a filename.  If len is 0 and name is NULL, then
+ * this function can be used to test whether or not a hash version is
+ * supported.
+ *
+ * The seed is an 4 longword (32 bits) "secret" which can be used to
+ * uniquify a hash.  If the seed is all zero's, then some default seed
+ * may be used.
+ *
+ * A particular hash version specifies whether or not the seed is
+ * represented, and whether or not the returned hash is 32 bits or 64
+ * bits.  32 bit hashes will return 0 for the minor hash.
+ */
+int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
+{
+	__u32	hash;
+	__u32	minor_hash = 0;
+	const char	*p;
+	int		i;
+	__u32		in[8], buf[4];
+
+	/* Initialize the default seed for the hash checksum functions */
+	buf[0] = 0x67452301;
+	buf[1] = 0xefcdab89;
+	buf[2] = 0x98badcfe;
+	buf[3] = 0x10325476;
+
+	/* Check to see if the seed is all zero's */
+	if (hinfo->seed) {
+		for (i=0; i < 4; i++) {
+			if (hinfo->seed[i])
+				break;
+		}
+		if (i < 4)
+			memcpy(buf, hinfo->seed, sizeof(buf));
+	}
+
+	switch (hinfo->hash_version) {
+	case DX_HASH_LEGACY:
+		hash = dx_hack_hash(name, len);
+		break;
+	case DX_HASH_HALF_MD4:
+		p = name;
+		while (len > 0) {
+			str2hashbuf(p, len, in, 8);
+			half_md4_transform(buf, in);
+			len -= 32;
+			p += 32;
+		}
+		minor_hash = buf[2];
+		hash = buf[1];
+		break;
+	case DX_HASH_TEA:
+		p = name;
+		while (len > 0) {
+			str2hashbuf(p, len, in, 4);
+			TEA_transform(buf, in);
+			len -= 16;
+			p += 16;
+		}
+		hash = buf[0];
+		minor_hash = buf[1];
+		break;
+	default:
+		hinfo->hash = 0;
+		return -1;
+	}
+	hash = hash & ~1;
+	if (hash == (EXT3_HTREE_EOF << 1))
+		hash = (EXT3_HTREE_EOF-1) << 1;
+	hinfo->hash = hash;
+	hinfo->minor_hash = minor_hash;
+	return 0;
+}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
new file mode 100644
index 000000000000..e45dbd651736
--- /dev/null
+++ b/fs/ext4/ialloc.c
@@ -0,0 +1,758 @@
+/*
+ *  linux/fs/ext3/ialloc.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  BSD ufs-inspired inode and directory allocation by
+ *  Stephen Tweedie (sct@redhat.com), 1993
+ *  Big-endian to little-endian byte-swapping/bitmaps by
+ *        David S. Miller (davem@caip.rutgers.edu), 1995
+ */
+
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
+#include <linux/ext3_jbd.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/quotaops.h>
+#include <linux/buffer_head.h>
+#include <linux/random.h>
+#include <linux/bitops.h>
+
+#include <asm/byteorder.h>
+
+#include "xattr.h"
+#include "acl.h"
+
+/*
+ * ialloc.c contains the inodes allocation and deallocation routines
+ */
+
+/*
+ * The free inodes are managed by bitmaps.  A file system contains several
+ * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
+ * block for inodes, N blocks for the inode table and data blocks.
+ *
+ * The file system contains group descriptors which are located after the
+ * super block.  Each descriptor contains the number of the bitmap block and
+ * the free blocks count in the block.
+ */
+
+
+/*
+ * Read the inode allocation bitmap for a given block_group, reading
+ * into the specified slot in the superblock's bitmap cache.
+ *
+ * Return buffer_head of bitmap on success or NULL.
+ */
+static struct buffer_head *
+read_inode_bitmap(struct super_block * sb, unsigned long block_group)
+{
+	struct ext3_group_desc *desc;
+	struct buffer_head *bh = NULL;
+
+	desc = ext3_get_group_desc(sb, block_group, NULL);
+	if (!desc)
+		goto error_out;
+
+	bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
+	if (!bh)
+		ext3_error(sb, "read_inode_bitmap",
+			    "Cannot read inode bitmap - "
+			    "block_group = %lu, inode_bitmap = %u",
+			    block_group, le32_to_cpu(desc->bg_inode_bitmap));
+error_out:
+	return bh;
+}
+
+/*
+ * NOTE! When we get the inode, we're the only people
+ * that have access to it, and as such there are no
+ * race conditions we have to worry about. The inode
+ * is not on the hash-lists, and it cannot be reached
+ * through the filesystem because the directory entry
+ * has been deleted earlier.
+ *
+ * HOWEVER: we must make sure that we get no aliases,
+ * which means that we have to call "clear_inode()"
+ * _before_ we mark the inode not in use in the inode
+ * bitmaps. Otherwise a newly created file might use
+ * the same inode number (not actually the same pointer
+ * though), and then we'd have two inodes sharing the
+ * same inode number and space on the harddisk.
+ */
+void ext3_free_inode (handle_t *handle, struct inode * inode)
+{
+	struct super_block * sb = inode->i_sb;
+	int is_directory;
+	unsigned long ino;
+	struct buffer_head *bitmap_bh = NULL;
+	struct buffer_head *bh2;
+	unsigned long block_group;
+	unsigned long bit;
+	struct ext3_group_desc * gdp;
+	struct ext3_super_block * es;
+	struct ext3_sb_info *sbi;
+	int fatal = 0, err;
+
+	if (atomic_read(&inode->i_count) > 1) {
+		printk ("ext3_free_inode: inode has count=%d\n",
+					atomic_read(&inode->i_count));
+		return;
+	}
+	if (inode->i_nlink) {
+		printk ("ext3_free_inode: inode has nlink=%d\n",
+			inode->i_nlink);
+		return;
+	}
+	if (!sb) {
+		printk("ext3_free_inode: inode on nonexistent device\n");
+		return;
+	}
+	sbi = EXT3_SB(sb);
+
+	ino = inode->i_ino;
+	ext3_debug ("freeing inode %lu\n", ino);
+
+	/*
+	 * Note: we must free any quota before locking the superblock,
+	 * as writing the quota to disk may need the lock as well.
+	 */
+	DQUOT_INIT(inode);
+	ext3_xattr_delete_inode(handle, inode);
+	DQUOT_FREE_INODE(inode);
+	DQUOT_DROP(inode);
+
+	is_directory = S_ISDIR(inode->i_mode);
+
+	/* Do this BEFORE marking the inode not in use or returning an error */
+	clear_inode (inode);
+
+	es = EXT3_SB(sb)->s_es;
+	if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
+		ext3_error (sb, "ext3_free_inode",
+			    "reserved or nonexistent inode %lu", ino);
+		goto error_return;
+	}
+	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
+	bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
+	bitmap_bh = read_inode_bitmap(sb, block_group);
+	if (!bitmap_bh)
+		goto error_return;
+
+	BUFFER_TRACE(bitmap_bh, "get_write_access");
+	fatal = ext3_journal_get_write_access(handle, bitmap_bh);
+	if (fatal)
+		goto error_return;
+
+	/* Ok, now we can actually update the inode bitmaps.. */
+	if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
+					bit, bitmap_bh->b_data))
+		ext3_error (sb, "ext3_free_inode",
+			      "bit already cleared for inode %lu", ino);
+	else {
+		gdp = ext3_get_group_desc (sb, block_group, &bh2);
+
+		BUFFER_TRACE(bh2, "get_write_access");
+		fatal = ext3_journal_get_write_access(handle, bh2);
+		if (fatal) goto error_return;
+
+		if (gdp) {
+			spin_lock(sb_bgl_lock(sbi, block_group));
+			gdp->bg_free_inodes_count = cpu_to_le16(
+				le16_to_cpu(gdp->bg_free_inodes_count) + 1);
+			if (is_directory)
+				gdp->bg_used_dirs_count = cpu_to_le16(
+				  le16_to_cpu(gdp->bg_used_dirs_count) - 1);
+			spin_unlock(sb_bgl_lock(sbi, block_group));
+			percpu_counter_inc(&sbi->s_freeinodes_counter);
+			if (is_directory)
+				percpu_counter_dec(&sbi->s_dirs_counter);
+
+		}
+		BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
+		err = ext3_journal_dirty_metadata(handle, bh2);
+		if (!fatal) fatal = err;
+	}
+	BUFFER_TRACE(bitmap_bh, "call ext3_journal_dirty_metadata");
+	err = ext3_journal_dirty_metadata(handle, bitmap_bh);
+	if (!fatal)
+		fatal = err;
+	sb->s_dirt = 1;
+error_return:
+	brelse(bitmap_bh);
+	ext3_std_error(sb, fatal);
+}
+
+/*
+ * There are two policies for allocating an inode.  If the new inode is
+ * a directory, then a forward search is made for a block group with both
+ * free space and a low directory-to-inode ratio; if that fails, then of
+ * the groups with above-average free space, that group with the fewest
+ * directories already is chosen.
+ *
+ * For other inodes, search forward from the parent directory\'s block
+ * group to find a free inode.
+ */
+static int find_group_dir(struct super_block *sb, struct inode *parent)
+{
+	int ngroups = EXT3_SB(sb)->s_groups_count;
+	unsigned int freei, avefreei;
+	struct ext3_group_desc *desc, *best_desc = NULL;
+	struct buffer_head *bh;
+	int group, best_group = -1;
+
+	freei = percpu_counter_read_positive(&EXT3_SB(sb)->s_freeinodes_counter);
+	avefreei = freei / ngroups;
+
+	for (group = 0; group < ngroups; group++) {
+		desc = ext3_get_group_desc (sb, group, &bh);
+		if (!desc || !desc->bg_free_inodes_count)
+			continue;
+		if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
+			continue;
+		if (!best_desc ||
+		    (le16_to_cpu(desc->bg_free_blocks_count) >
+		     le16_to_cpu(best_desc->bg_free_blocks_count))) {
+			best_group = group;
+			best_desc = desc;
+		}
+	}
+	return best_group;
+}
+
+/*
+ * Orlov's allocator for directories.
+ *
+ * We always try to spread first-level directories.
+ *
+ * If there are blockgroups with both free inodes and free blocks counts
+ * not worse than average we return one with smallest directory count.
+ * Otherwise we simply return a random group.
+ *
+ * For the rest rules look so:
+ *
+ * It's OK to put directory into a group unless
+ * it has too many directories already (max_dirs) or
+ * it has too few free inodes left (min_inodes) or
+ * it has too few free blocks left (min_blocks) or
+ * it's already running too large debt (max_debt).
+ * Parent's group is prefered, if it doesn't satisfy these
+ * conditions we search cyclically through the rest. If none
+ * of the groups look good we just look for a group with more
+ * free inodes than average (starting at parent's group).
+ *
+ * Debt is incremented each time we allocate a directory and decremented
+ * when we allocate an inode, within 0--255.
+ */
+
+#define INODE_COST 64
+#define BLOCK_COST 256
+
+static int find_group_orlov(struct super_block *sb, struct inode *parent)
+{
+	int parent_group = EXT3_I(parent)->i_block_group;
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext3_super_block *es = sbi->s_es;
+	int ngroups = sbi->s_groups_count;
+	int inodes_per_group = EXT3_INODES_PER_GROUP(sb);
+	unsigned int freei, avefreei;
+	ext3_fsblk_t freeb, avefreeb;
+	ext3_fsblk_t blocks_per_dir;
+	unsigned int ndirs;
+	int max_debt, max_dirs, min_inodes;
+	ext3_grpblk_t min_blocks;
+	int group = -1, i;
+	struct ext3_group_desc *desc;
+	struct buffer_head *bh;
+
+	freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
+	avefreei = freei / ngroups;
+	freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
+	avefreeb = freeb / ngroups;
+	ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
+
+	if ((parent == sb->s_root->d_inode) ||
+	    (EXT3_I(parent)->i_flags & EXT3_TOPDIR_FL)) {
+		int best_ndir = inodes_per_group;
+		int best_group = -1;
+
+		get_random_bytes(&group, sizeof(group));
+		parent_group = (unsigned)group % ngroups;
+		for (i = 0; i < ngroups; i++) {
+			group = (parent_group + i) % ngroups;
+			desc = ext3_get_group_desc (sb, group, &bh);
+			if (!desc || !desc->bg_free_inodes_count)
+				continue;
+			if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
+				continue;
+			if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
+				continue;
+			if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb)
+				continue;
+			best_group = group;
+			best_ndir = le16_to_cpu(desc->bg_used_dirs_count);
+		}
+		if (best_group >= 0)
+			return best_group;
+		goto fallback;
+	}
+
+	blocks_per_dir = (le32_to_cpu(es->s_blocks_count) - freeb) / ndirs;
+
+	max_dirs = ndirs / ngroups + inodes_per_group / 16;
+	min_inodes = avefreei - inodes_per_group / 4;
+	min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4;
+
+	max_debt = EXT3_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, (ext3_fsblk_t)BLOCK_COST);
+	if (max_debt * INODE_COST > inodes_per_group)
+		max_debt = inodes_per_group / INODE_COST;
+	if (max_debt > 255)
+		max_debt = 255;
+	if (max_debt == 0)
+		max_debt = 1;
+
+	for (i = 0; i < ngroups; i++) {
+		group = (parent_group + i) % ngroups;
+		desc = ext3_get_group_desc (sb, group, &bh);
+		if (!desc || !desc->bg_free_inodes_count)
+			continue;
+		if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
+			continue;
+		if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)
+			continue;
+		if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks)
+			continue;
+		return group;
+	}
+
+fallback:
+	for (i = 0; i < ngroups; i++) {
+		group = (parent_group + i) % ngroups;
+		desc = ext3_get_group_desc (sb, group, &bh);
+		if (!desc || !desc->bg_free_inodes_count)
+			continue;
+		if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
+			return group;
+	}
+
+	if (avefreei) {
+		/*
+		 * The free-inodes counter is approximate, and for really small
+		 * filesystems the above test can fail to find any blockgroups
+		 */
+		avefreei = 0;
+		goto fallback;
+	}
+
+	return -1;
+}
+
+static int find_group_other(struct super_block *sb, struct inode *parent)
+{
+	int parent_group = EXT3_I(parent)->i_block_group;
+	int ngroups = EXT3_SB(sb)->s_groups_count;
+	struct ext3_group_desc *desc;
+	struct buffer_head *bh;
+	int group, i;
+
+	/*
+	 * Try to place the inode in its parent directory
+	 */
+	group = parent_group;
+	desc = ext3_get_group_desc (sb, group, &bh);
+	if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
+			le16_to_cpu(desc->bg_free_blocks_count))
+		return group;
+
+	/*
+	 * We're going to place this inode in a different blockgroup from its
+	 * parent.  We want to cause files in a common directory to all land in
+	 * the same blockgroup.  But we want files which are in a different
+	 * directory which shares a blockgroup with our parent to land in a
+	 * different blockgroup.
+	 *
+	 * So add our directory's i_ino into the starting point for the hash.
+	 */
+	group = (group + parent->i_ino) % ngroups;
+
+	/*
+	 * Use a quadratic hash to find a group with a free inode and some free
+	 * blocks.
+	 */
+	for (i = 1; i < ngroups; i <<= 1) {
+		group += i;
+		if (group >= ngroups)
+			group -= ngroups;
+		desc = ext3_get_group_desc (sb, group, &bh);
+		if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
+				le16_to_cpu(desc->bg_free_blocks_count))
+			return group;
+	}
+
+	/*
+	 * That failed: try linear search for a free inode, even if that group
+	 * has no free blocks.
+	 */
+	group = parent_group;
+	for (i = 0; i < ngroups; i++) {
+		if (++group >= ngroups)
+			group = 0;
+		desc = ext3_get_group_desc (sb, group, &bh);
+		if (desc && le16_to_cpu(desc->bg_free_inodes_count))
+			return group;
+	}
+
+	return -1;
+}
+
+/*
+ * There are two policies for allocating an inode.  If the new inode is
+ * a directory, then a forward search is made for a block group with both
+ * free space and a low directory-to-inode ratio; if that fails, then of
+ * the groups with above-average free space, that group with the fewest
+ * directories already is chosen.
+ *
+ * For other inodes, search forward from the parent directory's block
+ * group to find a free inode.
+ */
+struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
+{
+	struct super_block *sb;
+	struct buffer_head *bitmap_bh = NULL;
+	struct buffer_head *bh2;
+	int group;
+	unsigned long ino = 0;
+	struct inode * inode;
+	struct ext3_group_desc * gdp = NULL;
+	struct ext3_super_block * es;
+	struct ext3_inode_info *ei;
+	struct ext3_sb_info *sbi;
+	int err = 0;
+	struct inode *ret;
+	int i;
+
+	/* Cannot create files in a deleted directory */
+	if (!dir || !dir->i_nlink)
+		return ERR_PTR(-EPERM);
+
+	sb = dir->i_sb;
+	inode = new_inode(sb);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+	ei = EXT3_I(inode);
+
+	sbi = EXT3_SB(sb);
+	es = sbi->s_es;
+	if (S_ISDIR(mode)) {
+		if (test_opt (sb, OLDALLOC))
+			group = find_group_dir(sb, dir);
+		else
+			group = find_group_orlov(sb, dir);
+	} else
+		group = find_group_other(sb, dir);
+
+	err = -ENOSPC;
+	if (group == -1)
+		goto out;
+
+	for (i = 0; i < sbi->s_groups_count; i++) {
+		err = -EIO;
+
+		gdp = ext3_get_group_desc(sb, group, &bh2);
+		if (!gdp)
+			goto fail;
+
+		brelse(bitmap_bh);
+		bitmap_bh = read_inode_bitmap(sb, group);
+		if (!bitmap_bh)
+			goto fail;
+
+		ino = 0;
+
+repeat_in_this_group:
+		ino = ext3_find_next_zero_bit((unsigned long *)
+				bitmap_bh->b_data, EXT3_INODES_PER_GROUP(sb), ino);
+		if (ino < EXT3_INODES_PER_GROUP(sb)) {
+
+			BUFFER_TRACE(bitmap_bh, "get_write_access");
+			err = ext3_journal_get_write_access(handle, bitmap_bh);
+			if (err)
+				goto fail;
+
+			if (!ext3_set_bit_atomic(sb_bgl_lock(sbi, group),
+						ino, bitmap_bh->b_data)) {
+				/* we won it */
+				BUFFER_TRACE(bitmap_bh,
+					"call ext3_journal_dirty_metadata");
+				err = ext3_journal_dirty_metadata(handle,
+								bitmap_bh);
+				if (err)
+					goto fail;
+				goto got;
+			}
+			/* we lost it */
+			journal_release_buffer(handle, bitmap_bh);
+
+			if (++ino < EXT3_INODES_PER_GROUP(sb))
+				goto repeat_in_this_group;
+		}
+
+		/*
+		 * This case is possible in concurrent environment.  It is very
+		 * rare.  We cannot repeat the find_group_xxx() call because
+		 * that will simply return the same blockgroup, because the
+		 * group descriptor metadata has not yet been updated.
+		 * So we just go onto the next blockgroup.
+		 */
+		if (++group == sbi->s_groups_count)
+			group = 0;
+	}
+	err = -ENOSPC;
+	goto out;
+
+got:
+	ino += group * EXT3_INODES_PER_GROUP(sb) + 1;
+	if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
+		ext3_error (sb, "ext3_new_inode",
+			    "reserved inode or inode > inodes count - "
+			    "block_group = %d, inode=%lu", group, ino);
+		err = -EIO;
+		goto fail;
+	}
+
+	BUFFER_TRACE(bh2, "get_write_access");
+	err = ext3_journal_get_write_access(handle, bh2);
+	if (err) goto fail;
+	spin_lock(sb_bgl_lock(sbi, group));
+	gdp->bg_free_inodes_count =
+		cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
+	if (S_ISDIR(mode)) {
+		gdp->bg_used_dirs_count =
+			cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
+	}
+	spin_unlock(sb_bgl_lock(sbi, group));
+	BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
+	err = ext3_journal_dirty_metadata(handle, bh2);
+	if (err) goto fail;
+
+	percpu_counter_dec(&sbi->s_freeinodes_counter);
+	if (S_ISDIR(mode))
+		percpu_counter_inc(&sbi->s_dirs_counter);
+	sb->s_dirt = 1;
+
+	inode->i_uid = current->fsuid;
+	if (test_opt (sb, GRPID))
+		inode->i_gid = dir->i_gid;
+	else if (dir->i_mode & S_ISGID) {
+		inode->i_gid = dir->i_gid;
+		if (S_ISDIR(mode))
+			mode |= S_ISGID;
+	} else
+		inode->i_gid = current->fsgid;
+	inode->i_mode = mode;
+
+	inode->i_ino = ino;
+	/* This is the optimal IO size (for stat), not the fs block size */
+	inode->i_blocks = 0;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
+
+	memset(ei->i_data, 0, sizeof(ei->i_data));
+	ei->i_dir_start_lookup = 0;
+	ei->i_disksize = 0;
+
+	ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL;
+	if (S_ISLNK(mode))
+		ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL);
+	/* dirsync only applies to directories */
+	if (!S_ISDIR(mode))
+		ei->i_flags &= ~EXT3_DIRSYNC_FL;
+#ifdef EXT3_FRAGMENTS
+	ei->i_faddr = 0;
+	ei->i_frag_no = 0;
+	ei->i_frag_size = 0;
+#endif
+	ei->i_file_acl = 0;
+	ei->i_dir_acl = 0;
+	ei->i_dtime = 0;
+	ei->i_block_alloc_info = NULL;
+	ei->i_block_group = group;
+
+	ext3_set_inode_flags(inode);
+	if (IS_DIRSYNC(inode))
+		handle->h_sync = 1;
+	insert_inode_hash(inode);
+	spin_lock(&sbi->s_next_gen_lock);
+	inode->i_generation = sbi->s_next_generation++;
+	spin_unlock(&sbi->s_next_gen_lock);
+
+	ei->i_state = EXT3_STATE_NEW;
+	ei->i_extra_isize =
+		(EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
+		sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
+
+	ret = inode;
+	if(DQUOT_ALLOC_INODE(inode)) {
+		err = -EDQUOT;
+		goto fail_drop;
+	}
+
+	err = ext3_init_acl(handle, inode, dir);
+	if (err)
+		goto fail_free_drop;
+
+	err = ext3_init_security(handle,inode, dir);
+	if (err)
+		goto fail_free_drop;
+
+	err = ext3_mark_inode_dirty(handle, inode);
+	if (err) {
+		ext3_std_error(sb, err);
+		goto fail_free_drop;
+	}
+
+	ext3_debug("allocating inode %lu\n", inode->i_ino);
+	goto really_out;
+fail:
+	ext3_std_error(sb, err);
+out:
+	iput(inode);
+	ret = ERR_PTR(err);
+really_out:
+	brelse(bitmap_bh);
+	return ret;
+
+fail_free_drop:
+	DQUOT_FREE_INODE(inode);
+
+fail_drop:
+	DQUOT_DROP(inode);
+	inode->i_flags |= S_NOQUOTA;
+	inode->i_nlink = 0;
+	iput(inode);
+	brelse(bitmap_bh);
+	return ERR_PTR(err);
+}
+
+/* Verify that we are loading a valid orphan from disk */
+struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
+{
+	unsigned long max_ino = le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count);
+	unsigned long block_group;
+	int bit;
+	struct buffer_head *bitmap_bh = NULL;
+	struct inode *inode = NULL;
+
+	/* Error cases - e2fsck has already cleaned up for us */
+	if (ino > max_ino) {
+		ext3_warning(sb, __FUNCTION__,
+			     "bad orphan ino %lu!  e2fsck was run?", ino);
+		goto out;
+	}
+
+	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
+	bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
+	bitmap_bh = read_inode_bitmap(sb, block_group);
+	if (!bitmap_bh) {
+		ext3_warning(sb, __FUNCTION__,
+			     "inode bitmap error for orphan %lu", ino);
+		goto out;
+	}
+
+	/* Having the inode bit set should be a 100% indicator that this
+	 * is a valid orphan (no e2fsck run on fs).  Orphans also include
+	 * inodes that were being truncated, so we can't check i_nlink==0.
+	 */
+	if (!ext3_test_bit(bit, bitmap_bh->b_data) ||
+			!(inode = iget(sb, ino)) || is_bad_inode(inode) ||
+			NEXT_ORPHAN(inode) > max_ino) {
+		ext3_warning(sb, __FUNCTION__,
+			     "bad orphan inode %lu!  e2fsck was run?", ino);
+		printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n",
+		       bit, (unsigned long long)bitmap_bh->b_blocknr,
+		       ext3_test_bit(bit, bitmap_bh->b_data));
+		printk(KERN_NOTICE "inode=%p\n", inode);
+		if (inode) {
+			printk(KERN_NOTICE "is_bad_inode(inode)=%d\n",
+			       is_bad_inode(inode));
+			printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
+			       NEXT_ORPHAN(inode));
+			printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
+		}
+		/* Avoid freeing blocks if we got a bad deleted inode */
+		if (inode && inode->i_nlink == 0)
+			inode->i_blocks = 0;
+		iput(inode);
+		inode = NULL;
+	}
+out:
+	brelse(bitmap_bh);
+	return inode;
+}
+
+unsigned long ext3_count_free_inodes (struct super_block * sb)
+{
+	unsigned long desc_count;
+	struct ext3_group_desc *gdp;
+	int i;
+#ifdef EXT3FS_DEBUG
+	struct ext3_super_block *es;
+	unsigned long bitmap_count, x;
+	struct buffer_head *bitmap_bh = NULL;
+
+	es = EXT3_SB(sb)->s_es;
+	desc_count = 0;
+	bitmap_count = 0;
+	gdp = NULL;
+	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
+		gdp = ext3_get_group_desc (sb, i, NULL);
+		if (!gdp)
+			continue;
+		desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
+		brelse(bitmap_bh);
+		bitmap_bh = read_inode_bitmap(sb, i);
+		if (!bitmap_bh)
+			continue;
+
+		x = ext3_count_free(bitmap_bh, EXT3_INODES_PER_GROUP(sb) / 8);
+		printk("group %d: stored = %d, counted = %lu\n",
+			i, le16_to_cpu(gdp->bg_free_inodes_count), x);
+		bitmap_count += x;
+	}
+	brelse(bitmap_bh);
+	printk("ext3_count_free_inodes: stored = %u, computed = %lu, %lu\n",
+		le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
+	return desc_count;
+#else
+	desc_count = 0;
+	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
+		gdp = ext3_get_group_desc (sb, i, NULL);
+		if (!gdp)
+			continue;
+		desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
+		cond_resched();
+	}
+	return desc_count;
+#endif
+}
+
+/* Called at mount-time, super-block is locked */
+unsigned long ext3_count_dirs (struct super_block * sb)
+{
+	unsigned long count = 0;
+	int i;
+
+	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
+		struct ext3_group_desc *gdp = ext3_get_group_desc (sb, i, NULL);
+		if (!gdp)
+			continue;
+		count += le16_to_cpu(gdp->bg_used_dirs_count);
+	}
+	return count;
+}
+
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
new file mode 100644
index 000000000000..03ba5bcab186
--- /dev/null
+++ b/fs/ext4/inode.c
@@ -0,0 +1,3219 @@
+/*
+ *  linux/fs/ext3/inode.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  from
+ *
+ *  linux/fs/minix/inode.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Goal-directed block allocation by Stephen Tweedie
+ *	(sct@redhat.com), 1993, 1998
+ *  Big-endian to little-endian byte-swapping/bitmaps by
+ *        David S. Miller (davem@caip.rutgers.edu), 1995
+ *  64-bit file support on 64-bit platforms by Jakub Jelinek
+ *	(jj@sunsite.ms.mff.cuni.cz)
+ *
+ *  Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/time.h>
+#include <linux/ext3_jbd.h>
+#include <linux/jbd.h>
+#include <linux/smp_lock.h>
+#include <linux/highuid.h>
+#include <linux/pagemap.h>
+#include <linux/quotaops.h>
+#include <linux/string.h>
+#include <linux/buffer_head.h>
+#include <linux/writeback.h>
+#include <linux/mpage.h>
+#include <linux/uio.h>
+#include <linux/bio.h>
+#include "xattr.h"
+#include "acl.h"
+
+static int ext3_writepage_trans_blocks(struct inode *inode);
+
+/*
+ * Test whether an inode is a fast symlink.
+ */
+static int ext3_inode_is_fast_symlink(struct inode *inode)
+{
+	int ea_blocks = EXT3_I(inode)->i_file_acl ?
+		(inode->i_sb->s_blocksize >> 9) : 0;
+
+	return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
+}
+
+/*
+ * The ext3 forget function must perform a revoke if we are freeing data
+ * which has been journaled.  Metadata (eg. indirect blocks) must be
+ * revoked in all cases.
+ *
+ * "bh" may be NULL: a metadata block may have been freed from memory
+ * but there may still be a record of it in the journal, and that record
+ * still needs to be revoked.
+ */
+int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
+			struct buffer_head *bh, ext3_fsblk_t blocknr)
+{
+	int err;
+
+	might_sleep();
+
+	BUFFER_TRACE(bh, "enter");
+
+	jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
+		  "data mode %lx\n",
+		  bh, is_metadata, inode->i_mode,
+		  test_opt(inode->i_sb, DATA_FLAGS));
+
+	/* Never use the revoke function if we are doing full data
+	 * journaling: there is no need to, and a V1 superblock won't
+	 * support it.  Otherwise, only skip the revoke on un-journaled
+	 * data blocks. */
+
+	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ||
+	    (!is_metadata && !ext3_should_journal_data(inode))) {
+		if (bh) {
+			BUFFER_TRACE(bh, "call journal_forget");
+			return ext3_journal_forget(handle, bh);
+		}
+		return 0;
+	}
+
+	/*
+	 * data!=journal && (is_metadata || should_journal_data(inode))
+	 */
+	BUFFER_TRACE(bh, "call ext3_journal_revoke");
+	err = ext3_journal_revoke(handle, blocknr, bh);
+	if (err)
+		ext3_abort(inode->i_sb, __FUNCTION__,
+			   "error %d when attempting revoke", err);
+	BUFFER_TRACE(bh, "exit");
+	return err;
+}
+
+/*
+ * Work out how many blocks we need to proceed with the next chunk of a
+ * truncate transaction.
+ */
+static unsigned long blocks_for_truncate(struct inode *inode)
+{
+	unsigned long needed;
+
+	needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9);
+
+	/* Give ourselves just enough room to cope with inodes in which
+	 * i_blocks is corrupt: we've seen disk corruptions in the past
+	 * which resulted in random data in an inode which looked enough
+	 * like a regular file for ext3 to try to delete it.  Things
+	 * will go a bit crazy if that happens, but at least we should
+	 * try not to panic the whole kernel. */
+	if (needed < 2)
+		needed = 2;
+
+	/* But we need to bound the transaction so we don't overflow the
+	 * journal. */
+	if (needed > EXT3_MAX_TRANS_DATA)
+		needed = EXT3_MAX_TRANS_DATA;
+
+	return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
+}
+
+/*
+ * Truncate transactions can be complex and absolutely huge.  So we need to
+ * be able to restart the transaction at a conventient checkpoint to make
+ * sure we don't overflow the journal.
+ *
+ * start_transaction gets us a new handle for a truncate transaction,
+ * and extend_transaction tries to extend the existing one a bit.  If
+ * extend fails, we need to propagate the failure up and restart the
+ * transaction in the top-level truncate loop. --sct
+ */
+static handle_t *start_transaction(struct inode *inode)
+{
+	handle_t *result;
+
+	result = ext3_journal_start(inode, blocks_for_truncate(inode));
+	if (!IS_ERR(result))
+		return result;
+
+	ext3_std_error(inode->i_sb, PTR_ERR(result));
+	return result;
+}
+
+/*
+ * Try to extend this transaction for the purposes of truncation.
+ *
+ * Returns 0 if we managed to create more room.  If we can't create more
+ * room, and the transaction must be restarted we return 1.
+ */
+static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
+{
+	if (handle->h_buffer_credits > EXT3_RESERVE_TRANS_BLOCKS)
+		return 0;
+	if (!ext3_journal_extend(handle, blocks_for_truncate(inode)))
+		return 0;
+	return 1;
+}
+
+/*
+ * Restart the transaction associated with *handle.  This does a commit,
+ * so before we call here everything must be consistently dirtied against
+ * this transaction.
+ */
+static int ext3_journal_test_restart(handle_t *handle, struct inode *inode)
+{
+	jbd_debug(2, "restarting handle %p\n", handle);
+	return ext3_journal_restart(handle, blocks_for_truncate(inode));
+}
+
+/*
+ * Called at the last iput() if i_nlink is zero.
+ */
+void ext3_delete_inode (struct inode * inode)
+{
+	handle_t *handle;
+
+	truncate_inode_pages(&inode->i_data, 0);
+
+	if (is_bad_inode(inode))
+		goto no_delete;
+
+	handle = start_transaction(inode);
+	if (IS_ERR(handle)) {
+		/*
+		 * If we're going to skip the normal cleanup, we still need to
+		 * make sure that the in-core orphan linked list is properly
+		 * cleaned up.
+		 */
+		ext3_orphan_del(NULL, inode);
+		goto no_delete;
+	}
+
+	if (IS_SYNC(inode))
+		handle->h_sync = 1;
+	inode->i_size = 0;
+	if (inode->i_blocks)
+		ext3_truncate(inode);
+	/*
+	 * Kill off the orphan record which ext3_truncate created.
+	 * AKPM: I think this can be inside the above `if'.
+	 * Note that ext3_orphan_del() has to be able to cope with the
+	 * deletion of a non-existent orphan - this is because we don't
+	 * know if ext3_truncate() actually created an orphan record.
+	 * (Well, we could do this if we need to, but heck - it works)
+	 */
+	ext3_orphan_del(handle, inode);
+	EXT3_I(inode)->i_dtime	= get_seconds();
+
+	/*
+	 * One subtle ordering requirement: if anything has gone wrong
+	 * (transaction abort, IO errors, whatever), then we can still
+	 * do these next steps (the fs will already have been marked as
+	 * having errors), but we can't free the inode if the mark_dirty
+	 * fails.
+	 */
+	if (ext3_mark_inode_dirty(handle, inode))
+		/* If that failed, just do the required in-core inode clear. */
+		clear_inode(inode);
+	else
+		ext3_free_inode(handle, inode);
+	ext3_journal_stop(handle);
+	return;
+no_delete:
+	clear_inode(inode);	/* We must guarantee clearing of inode... */
+}
+
+typedef struct {
+	__le32	*p;
+	__le32	key;
+	struct buffer_head *bh;
+} Indirect;
+
+static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
+{
+	p->key = *(p->p = v);
+	p->bh = bh;
+}
+
+static int verify_chain(Indirect *from, Indirect *to)
+{
+	while (from <= to && from->key == *from->p)
+		from++;
+	return (from > to);
+}
+
+/**
+ *	ext3_block_to_path - parse the block number into array of offsets
+ *	@inode: inode in question (we are only interested in its superblock)
+ *	@i_block: block number to be parsed
+ *	@offsets: array to store the offsets in
+ *      @boundary: set this non-zero if the referred-to block is likely to be
+ *             followed (on disk) by an indirect block.
+ *
+ *	To store the locations of file's data ext3 uses a data structure common
+ *	for UNIX filesystems - tree of pointers anchored in the inode, with
+ *	data blocks at leaves and indirect blocks in intermediate nodes.
+ *	This function translates the block number into path in that tree -
+ *	return value is the path length and @offsets[n] is the offset of
+ *	pointer to (n+1)th node in the nth one. If @block is out of range
+ *	(negative or too large) warning is printed and zero returned.
+ *
+ *	Note: function doesn't find node addresses, so no IO is needed. All
+ *	we need to know is the capacity of indirect blocks (taken from the
+ *	inode->i_sb).
+ */
+
+/*
+ * Portability note: the last comparison (check that we fit into triple
+ * indirect block) is spelled differently, because otherwise on an
+ * architecture with 32-bit longs and 8Kb pages we might get into trouble
+ * if our filesystem had 8Kb blocks. We might use long long, but that would
+ * kill us on x86. Oh, well, at least the sign propagation does not matter -
+ * i_block would have to be negative in the very beginning, so we would not
+ * get there at all.
+ */
+
+static int ext3_block_to_path(struct inode *inode,
+			long i_block, int offsets[4], int *boundary)
+{
+	int ptrs = EXT3_ADDR_PER_BLOCK(inode->i_sb);
+	int ptrs_bits = EXT3_ADDR_PER_BLOCK_BITS(inode->i_sb);
+	const long direct_blocks = EXT3_NDIR_BLOCKS,
+		indirect_blocks = ptrs,
+		double_blocks = (1 << (ptrs_bits * 2));
+	int n = 0;
+	int final = 0;
+
+	if (i_block < 0) {
+		ext3_warning (inode->i_sb, "ext3_block_to_path", "block < 0");
+	} else if (i_block < direct_blocks) {
+		offsets[n++] = i_block;
+		final = direct_blocks;
+	} else if ( (i_block -= direct_blocks) < indirect_blocks) {
+		offsets[n++] = EXT3_IND_BLOCK;
+		offsets[n++] = i_block;
+		final = ptrs;
+	} else if ((i_block -= indirect_blocks) < double_blocks) {
+		offsets[n++] = EXT3_DIND_BLOCK;
+		offsets[n++] = i_block >> ptrs_bits;
+		offsets[n++] = i_block & (ptrs - 1);
+		final = ptrs;
+	} else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) {
+		offsets[n++] = EXT3_TIND_BLOCK;
+		offsets[n++] = i_block >> (ptrs_bits * 2);
+		offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1);
+		offsets[n++] = i_block & (ptrs - 1);
+		final = ptrs;
+	} else {
+		ext3_warning(inode->i_sb, "ext3_block_to_path", "block > big");
+	}
+	if (boundary)
+		*boundary = final - 1 - (i_block & (ptrs - 1));
+	return n;
+}
+
+/**
+ *	ext3_get_branch - read the chain of indirect blocks leading to data
+ *	@inode: inode in question
+ *	@depth: depth of the chain (1 - direct pointer, etc.)
+ *	@offsets: offsets of pointers in inode/indirect blocks
+ *	@chain: place to store the result
+ *	@err: here we store the error value
+ *
+ *	Function fills the array of triples <key, p, bh> and returns %NULL
+ *	if everything went OK or the pointer to the last filled triple
+ *	(incomplete one) otherwise. Upon the return chain[i].key contains
+ *	the number of (i+1)-th block in the chain (as it is stored in memory,
+ *	i.e. little-endian 32-bit), chain[i].p contains the address of that
+ *	number (it points into struct inode for i==0 and into the bh->b_data
+ *	for i>0) and chain[i].bh points to the buffer_head of i-th indirect
+ *	block for i>0 and NULL for i==0. In other words, it holds the block
+ *	numbers of the chain, addresses they were taken from (and where we can
+ *	verify that chain did not change) and buffer_heads hosting these
+ *	numbers.
+ *
+ *	Function stops when it stumbles upon zero pointer (absent block)
+ *		(pointer to last triple returned, *@err == 0)
+ *	or when it gets an IO error reading an indirect block
+ *		(ditto, *@err == -EIO)
+ *	or when it notices that chain had been changed while it was reading
+ *		(ditto, *@err == -EAGAIN)
+ *	or when it reads all @depth-1 indirect blocks successfully and finds
+ *	the whole chain, all way to the data (returns %NULL, *err == 0).
+ */
+static Indirect *ext3_get_branch(struct inode *inode, int depth, int *offsets,
+				 Indirect chain[4], int *err)
+{
+	struct super_block *sb = inode->i_sb;
+	Indirect *p = chain;
+	struct buffer_head *bh;
+
+	*err = 0;
+	/* i_data is not going away, no lock needed */
+	add_chain (chain, NULL, EXT3_I(inode)->i_data + *offsets);
+	if (!p->key)
+		goto no_block;
+	while (--depth) {
+		bh = sb_bread(sb, le32_to_cpu(p->key));
+		if (!bh)
+			goto failure;
+		/* Reader: pointers */
+		if (!verify_chain(chain, p))
+			goto changed;
+		add_chain(++p, bh, (__le32*)bh->b_data + *++offsets);
+		/* Reader: end */
+		if (!p->key)
+			goto no_block;
+	}
+	return NULL;
+
+changed:
+	brelse(bh);
+	*err = -EAGAIN;
+	goto no_block;
+failure:
+	*err = -EIO;
+no_block:
+	return p;
+}
+
+/**
+ *	ext3_find_near - find a place for allocation with sufficient locality
+ *	@inode: owner
+ *	@ind: descriptor of indirect block.
+ *
+ *	This function returns the prefered place for block allocation.
+ *	It is used when heuristic for sequential allocation fails.
+ *	Rules are:
+ *	  + if there is a block to the left of our position - allocate near it.
+ *	  + if pointer will live in indirect block - allocate near that block.
+ *	  + if pointer will live in inode - allocate in the same
+ *	    cylinder group.
+ *
+ * In the latter case we colour the starting block by the callers PID to
+ * prevent it from clashing with concurrent allocations for a different inode
+ * in the same block group.   The PID is used here so that functionally related
+ * files will be close-by on-disk.
+ *
+ *	Caller must make sure that @ind is valid and will stay that way.
+ */
+static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind)
+{
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	__le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data;
+	__le32 *p;
+	ext3_fsblk_t bg_start;
+	ext3_grpblk_t colour;
+
+	/* Try to find previous block */
+	for (p = ind->p - 1; p >= start; p--) {
+		if (*p)
+			return le32_to_cpu(*p);
+	}
+
+	/* No such thing, so let's try location of indirect block */
+	if (ind->bh)
+		return ind->bh->b_blocknr;
+
+	/*
+	 * It is going to be referred to from the inode itself? OK, just put it
+	 * into the same cylinder group then.
+	 */
+	bg_start = ext3_group_first_block_no(inode->i_sb, ei->i_block_group);
+	colour = (current->pid % 16) *
+			(EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16);
+	return bg_start + colour;
+}
+
+/**
+ *	ext3_find_goal - find a prefered place for allocation.
+ *	@inode: owner
+ *	@block:  block we want
+ *	@chain:  chain of indirect blocks
+ *	@partial: pointer to the last triple within a chain
+ *	@goal:	place to store the result.
+ *
+ *	Normally this function find the prefered place for block allocation,
+ *	stores it in *@goal and returns zero.
+ */
+
+static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block,
+		Indirect chain[4], Indirect *partial)
+{
+	struct ext3_block_alloc_info *block_i;
+
+	block_i =  EXT3_I(inode)->i_block_alloc_info;
+
+	/*
+	 * try the heuristic for sequential allocation,
+	 * failing that at least try to get decent locality.
+	 */
+	if (block_i && (block == block_i->last_alloc_logical_block + 1)
+		&& (block_i->last_alloc_physical_block != 0)) {
+		return block_i->last_alloc_physical_block + 1;
+	}
+
+	return ext3_find_near(inode, partial);
+}
+
+/**
+ *	ext3_blks_to_allocate: Look up the block map and count the number
+ *	of direct blocks need to be allocated for the given branch.
+ *
+ *	@branch: chain of indirect blocks
+ *	@k: number of blocks need for indirect blocks
+ *	@blks: number of data blocks to be mapped.
+ *	@blocks_to_boundary:  the offset in the indirect block
+ *
+ *	return the total number of blocks to be allocate, including the
+ *	direct and indirect blocks.
+ */
+static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
+		int blocks_to_boundary)
+{
+	unsigned long count = 0;
+
+	/*
+	 * Simple case, [t,d]Indirect block(s) has not allocated yet
+	 * then it's clear blocks on that path have not allocated
+	 */
+	if (k > 0) {
+		/* right now we don't handle cross boundary allocation */
+		if (blks < blocks_to_boundary + 1)
+			count += blks;
+		else
+			count += blocks_to_boundary + 1;
+		return count;
+	}
+
+	count++;
+	while (count < blks && count <= blocks_to_boundary &&
+		le32_to_cpu(*(branch[0].p + count)) == 0) {
+		count++;
+	}
+	return count;
+}
+
+/**
+ *	ext3_alloc_blocks: multiple allocate blocks needed for a branch
+ *	@indirect_blks: the number of blocks need to allocate for indirect
+ *			blocks
+ *
+ *	@new_blocks: on return it will store the new block numbers for
+ *	the indirect blocks(if needed) and the first direct block,
+ *	@blks:	on return it will store the total number of allocated
+ *		direct blocks
+ */
+static int ext3_alloc_blocks(handle_t *handle, struct inode *inode,
+			ext3_fsblk_t goal, int indirect_blks, int blks,
+			ext3_fsblk_t new_blocks[4], int *err)
+{
+	int target, i;
+	unsigned long count = 0;
+	int index = 0;
+	ext3_fsblk_t current_block = 0;
+	int ret = 0;
+
+	/*
+	 * Here we try to allocate the requested multiple blocks at once,
+	 * on a best-effort basis.
+	 * To build a branch, we should allocate blocks for
+	 * the indirect blocks(if not allocated yet), and at least
+	 * the first direct block of this branch.  That's the
+	 * minimum number of blocks need to allocate(required)
+	 */
+	target = blks + indirect_blks;
+
+	while (1) {
+		count = target;
+		/* allocating blocks for indirect blocks and direct blocks */
+		current_block = ext3_new_blocks(handle,inode,goal,&count,err);
+		if (*err)
+			goto failed_out;
+
+		target -= count;
+		/* allocate blocks for indirect blocks */
+		while (index < indirect_blks && count) {
+			new_blocks[index++] = current_block++;
+			count--;
+		}
+
+		if (count > 0)
+			break;
+	}
+
+	/* save the new block number for the first direct block */
+	new_blocks[index] = current_block;
+
+	/* total number of blocks allocated for direct blocks */
+	ret = count;
+	*err = 0;
+	return ret;
+failed_out:
+	for (i = 0; i <index; i++)
+		ext3_free_blocks(handle, inode, new_blocks[i], 1);
+	return ret;
+}
+
+/**
+ *	ext3_alloc_branch - allocate and set up a chain of blocks.
+ *	@inode: owner
+ *	@indirect_blks: number of allocated indirect blocks
+ *	@blks: number of allocated direct blocks
+ *	@offsets: offsets (in the blocks) to store the pointers to next.
+ *	@branch: place to store the chain in.
+ *
+ *	This function allocates blocks, zeroes out all but the last one,
+ *	links them into chain and (if we are synchronous) writes them to disk.
+ *	In other words, it prepares a branch that can be spliced onto the
+ *	inode. It stores the information about that chain in the branch[], in
+ *	the same format as ext3_get_branch() would do. We are calling it after
+ *	we had read the existing part of chain and partial points to the last
+ *	triple of that (one with zero ->key). Upon the exit we have the same
+ *	picture as after the successful ext3_get_block(), except that in one
+ *	place chain is disconnected - *branch->p is still zero (we did not
+ *	set the last link), but branch->key contains the number that should
+ *	be placed into *branch->p to fill that gap.
+ *
+ *	If allocation fails we free all blocks we've allocated (and forget
+ *	their buffer_heads) and return the error value the from failed
+ *	ext3_alloc_block() (normally -ENOSPC). Otherwise we set the chain
+ *	as described above and return 0.
+ */
+static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
+			int indirect_blks, int *blks, ext3_fsblk_t goal,
+			int *offsets, Indirect *branch)
+{
+	int blocksize = inode->i_sb->s_blocksize;
+	int i, n = 0;
+	int err = 0;
+	struct buffer_head *bh;
+	int num;
+	ext3_fsblk_t new_blocks[4];
+	ext3_fsblk_t current_block;
+
+	num = ext3_alloc_blocks(handle, inode, goal, indirect_blks,
+				*blks, new_blocks, &err);
+	if (err)
+		return err;
+
+	branch[0].key = cpu_to_le32(new_blocks[0]);
+	/*
+	 * metadata blocks and data blocks are allocated.
+	 */
+	for (n = 1; n <= indirect_blks;  n++) {
+		/*
+		 * Get buffer_head for parent block, zero it out
+		 * and set the pointer to new one, then send
+		 * parent to disk.
+		 */
+		bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
+		branch[n].bh = bh;
+		lock_buffer(bh);
+		BUFFER_TRACE(bh, "call get_create_access");
+		err = ext3_journal_get_create_access(handle, bh);
+		if (err) {
+			unlock_buffer(bh);
+			brelse(bh);
+			goto failed;
+		}
+
+		memset(bh->b_data, 0, blocksize);
+		branch[n].p = (__le32 *) bh->b_data + offsets[n];
+		branch[n].key = cpu_to_le32(new_blocks[n]);
+		*branch[n].p = branch[n].key;
+		if ( n == indirect_blks) {
+			current_block = new_blocks[n];
+			/*
+			 * End of chain, update the last new metablock of
+			 * the chain to point to the new allocated
+			 * data blocks numbers
+			 */
+			for (i=1; i < num; i++)
+				*(branch[n].p + i) = cpu_to_le32(++current_block);
+		}
+		BUFFER_TRACE(bh, "marking uptodate");
+		set_buffer_uptodate(bh);
+		unlock_buffer(bh);
+
+		BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+		err = ext3_journal_dirty_metadata(handle, bh);
+		if (err)
+			goto failed;
+	}
+	*blks = num;
+	return err;
+failed:
+	/* Allocation failed, free what we already allocated */
+	for (i = 1; i <= n ; i++) {
+		BUFFER_TRACE(branch[i].bh, "call journal_forget");
+		ext3_journal_forget(handle, branch[i].bh);
+	}
+	for (i = 0; i <indirect_blks; i++)
+		ext3_free_blocks(handle, inode, new_blocks[i], 1);
+
+	ext3_free_blocks(handle, inode, new_blocks[i], num);
+
+	return err;
+}
+
+/**
+ * ext3_splice_branch - splice the allocated branch onto inode.
+ * @inode: owner
+ * @block: (logical) number of block we are adding
+ * @chain: chain of indirect blocks (with a missing link - see
+ *	ext3_alloc_branch)
+ * @where: location of missing link
+ * @num:   number of indirect blocks we are adding
+ * @blks:  number of direct blocks we are adding
+ *
+ * This function fills the missing link and does all housekeeping needed in
+ * inode (->i_blocks, etc.). In case of success we end up with the full
+ * chain to new block and return 0.
+ */
+static int ext3_splice_branch(handle_t *handle, struct inode *inode,
+			long block, Indirect *where, int num, int blks)
+{
+	int i;
+	int err = 0;
+	struct ext3_block_alloc_info *block_i;
+	ext3_fsblk_t current_block;
+
+	block_i = EXT3_I(inode)->i_block_alloc_info;
+	/*
+	 * If we're splicing into a [td]indirect block (as opposed to the
+	 * inode) then we need to get write access to the [td]indirect block
+	 * before the splice.
+	 */
+	if (where->bh) {
+		BUFFER_TRACE(where->bh, "get_write_access");
+		err = ext3_journal_get_write_access(handle, where->bh);
+		if (err)
+			goto err_out;
+	}
+	/* That's it */
+
+	*where->p = where->key;
+
+	/*
+	 * Update the host buffer_head or inode to point to more just allocated
+	 * direct blocks blocks
+	 */
+	if (num == 0 && blks > 1) {
+		current_block = le32_to_cpu(where->key) + 1;
+		for (i = 1; i < blks; i++)
+			*(where->p + i ) = cpu_to_le32(current_block++);
+	}
+
+	/*
+	 * update the most recently allocated logical & physical block
+	 * in i_block_alloc_info, to assist find the proper goal block for next
+	 * allocation
+	 */
+	if (block_i) {
+		block_i->last_alloc_logical_block = block + blks - 1;
+		block_i->last_alloc_physical_block =
+				le32_to_cpu(where[num].key) + blks - 1;
+	}
+
+	/* We are done with atomic stuff, now do the rest of housekeeping */
+
+	inode->i_ctime = CURRENT_TIME_SEC;
+	ext3_mark_inode_dirty(handle, inode);
+
+	/* had we spliced it onto indirect block? */
+	if (where->bh) {
+		/*
+		 * If we spliced it onto an indirect block, we haven't
+		 * altered the inode.  Note however that if it is being spliced
+		 * onto an indirect block at the very end of the file (the
+		 * file is growing) then we *will* alter the inode to reflect
+		 * the new i_size.  But that is not done here - it is done in
+		 * generic_commit_write->__mark_inode_dirty->ext3_dirty_inode.
+		 */
+		jbd_debug(5, "splicing indirect only\n");
+		BUFFER_TRACE(where->bh, "call ext3_journal_dirty_metadata");
+		err = ext3_journal_dirty_metadata(handle, where->bh);
+		if (err)
+			goto err_out;
+	} else {
+		/*
+		 * OK, we spliced it into the inode itself on a direct block.
+		 * Inode was dirtied above.
+		 */
+		jbd_debug(5, "splicing direct\n");
+	}
+	return err;
+
+err_out:
+	for (i = 1; i <= num; i++) {
+		BUFFER_TRACE(where[i].bh, "call journal_forget");
+		ext3_journal_forget(handle, where[i].bh);
+		ext3_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1);
+	}
+	ext3_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks);
+
+	return err;
+}
+
+/*
+ * Allocation strategy is simple: if we have to allocate something, we will
+ * have to go the whole way to leaf. So let's do it before attaching anything
+ * to tree, set linkage between the newborn blocks, write them if sync is
+ * required, recheck the path, free and repeat if check fails, otherwise
+ * set the last missing link (that will protect us from any truncate-generated
+ * removals - all blocks on the path are immune now) and possibly force the
+ * write on the parent block.
+ * That has a nice additional property: no special recovery from the failed
+ * allocations is needed - we simply release blocks and do not touch anything
+ * reachable from inode.
+ *
+ * `handle' can be NULL if create == 0.
+ *
+ * The BKL may not be held on entry here.  Be sure to take it early.
+ * return > 0, # of blocks mapped or allocated.
+ * return = 0, if plain lookup failed.
+ * return < 0, error case.
+ */
+int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
+		sector_t iblock, unsigned long maxblocks,
+		struct buffer_head *bh_result,
+		int create, int extend_disksize)
+{
+	int err = -EIO;
+	int offsets[4];
+	Indirect chain[4];
+	Indirect *partial;
+	ext3_fsblk_t goal;
+	int indirect_blks;
+	int blocks_to_boundary = 0;
+	int depth;
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	int count = 0;
+	ext3_fsblk_t first_block = 0;
+
+
+	J_ASSERT(handle != NULL || create == 0);
+	depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
+
+	if (depth == 0)
+		goto out;
+
+	partial = ext3_get_branch(inode, depth, offsets, chain, &err);
+
+	/* Simplest case - block found, no allocation needed */
+	if (!partial) {
+		first_block = le32_to_cpu(chain[depth - 1].key);
+		clear_buffer_new(bh_result);
+		count++;
+		/*map more blocks*/
+		while (count < maxblocks && count <= blocks_to_boundary) {
+			ext3_fsblk_t blk;
+
+			if (!verify_chain(chain, partial)) {
+				/*
+				 * Indirect block might be removed by
+				 * truncate while we were reading it.
+				 * Handling of that case: forget what we've
+				 * got now. Flag the err as EAGAIN, so it
+				 * will reread.
+				 */
+				err = -EAGAIN;
+				count = 0;
+				break;
+			}
+			blk = le32_to_cpu(*(chain[depth-1].p + count));
+
+			if (blk == first_block + count)
+				count++;
+			else
+				break;
+		}
+		if (err != -EAGAIN)
+			goto got_it;
+	}
+
+	/* Next simple case - plain lookup or failed read of indirect block */
+	if (!create || err == -EIO)
+		goto cleanup;
+
+	mutex_lock(&ei->truncate_mutex);
+
+	/*
+	 * If the indirect block is missing while we are reading
+	 * the chain(ext3_get_branch() returns -EAGAIN err), or
+	 * if the chain has been changed after we grab the semaphore,
+	 * (either because another process truncated this branch, or
+	 * another get_block allocated this branch) re-grab the chain to see if
+	 * the request block has been allocated or not.
+	 *
+	 * Since we already block the truncate/other get_block
+	 * at this point, we will have the current copy of the chain when we
+	 * splice the branch into the tree.
+	 */
+	if (err == -EAGAIN || !verify_chain(chain, partial)) {
+		while (partial > chain) {
+			brelse(partial->bh);
+			partial--;
+		}
+		partial = ext3_get_branch(inode, depth, offsets, chain, &err);
+		if (!partial) {
+			count++;
+			mutex_unlock(&ei->truncate_mutex);
+			if (err)
+				goto cleanup;
+			clear_buffer_new(bh_result);
+			goto got_it;
+		}
+	}
+
+	/*
+	 * Okay, we need to do block allocation.  Lazily initialize the block
+	 * allocation info here if necessary
+	*/
+	if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info))
+		ext3_init_block_alloc_info(inode);
+
+	goal = ext3_find_goal(inode, iblock, chain, partial);
+
+	/* the number of blocks need to allocate for [d,t]indirect blocks */
+	indirect_blks = (chain + depth) - partial - 1;
+
+	/*
+	 * Next look up the indirect map to count the totoal number of
+	 * direct blocks to allocate for this branch.
+	 */
+	count = ext3_blks_to_allocate(partial, indirect_blks,
+					maxblocks, blocks_to_boundary);
+	/*
+	 * Block out ext3_truncate while we alter the tree
+	 */
+	err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal,
+				offsets + (partial - chain), partial);
+
+	/*
+	 * The ext3_splice_branch call will free and forget any buffers
+	 * on the new chain if there is a failure, but that risks using
+	 * up transaction credits, especially for bitmaps where the
+	 * credits cannot be returned.  Can we handle this somehow?  We
+	 * may need to return -EAGAIN upwards in the worst case.  --sct
+	 */
+	if (!err)
+		err = ext3_splice_branch(handle, inode, iblock,
+					partial, indirect_blks, count);
+	/*
+	 * i_disksize growing is protected by truncate_mutex.  Don't forget to
+	 * protect it if you're about to implement concurrent
+	 * ext3_get_block() -bzzz
+	*/
+	if (!err && extend_disksize && inode->i_size > ei->i_disksize)
+		ei->i_disksize = inode->i_size;
+	mutex_unlock(&ei->truncate_mutex);
+	if (err)
+		goto cleanup;
+
+	set_buffer_new(bh_result);
+got_it:
+	map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
+	if (count > blocks_to_boundary)
+		set_buffer_boundary(bh_result);
+	err = count;
+	/* Clean up and exit */
+	partial = chain + depth - 1;	/* the whole chain */
+cleanup:
+	while (partial > chain) {
+		BUFFER_TRACE(partial->bh, "call brelse");
+		brelse(partial->bh);
+		partial--;
+	}
+	BUFFER_TRACE(bh_result, "returned");
+out:
+	return err;
+}
+
+#define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32)
+
+static int ext3_get_block(struct inode *inode, sector_t iblock,
+			struct buffer_head *bh_result, int create)
+{
+	handle_t *handle = journal_current_handle();
+	int ret = 0;
+	unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
+
+	if (!create)
+		goto get_block;		/* A read */
+
+	if (max_blocks == 1)
+		goto get_block;		/* A single block get */
+
+	if (handle->h_transaction->t_state == T_LOCKED) {
+		/*
+		 * Huge direct-io writes can hold off commits for long
+		 * periods of time.  Let this commit run.
+		 */
+		ext3_journal_stop(handle);
+		handle = ext3_journal_start(inode, DIO_CREDITS);
+		if (IS_ERR(handle))
+			ret = PTR_ERR(handle);
+		goto get_block;
+	}
+
+	if (handle->h_buffer_credits <= EXT3_RESERVE_TRANS_BLOCKS) {
+		/*
+		 * Getting low on buffer credits...
+		 */
+		ret = ext3_journal_extend(handle, DIO_CREDITS);
+		if (ret > 0) {
+			/*
+			 * Couldn't extend the transaction.  Start a new one.
+			 */
+			ret = ext3_journal_restart(handle, DIO_CREDITS);
+		}
+	}
+
+get_block:
+	if (ret == 0) {
+		ret = ext3_get_blocks_handle(handle, inode, iblock,
+					max_blocks, bh_result, create, 0);
+		if (ret > 0) {
+			bh_result->b_size = (ret << inode->i_blkbits);
+			ret = 0;
+		}
+	}
+	return ret;
+}
+
+/*
+ * `handle' can be NULL if create is zero
+ */
+struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
+				long block, int create, int *errp)
+{
+	struct buffer_head dummy;
+	int fatal = 0, err;
+
+	J_ASSERT(handle != NULL || create == 0);
+
+	dummy.b_state = 0;
+	dummy.b_blocknr = -1000;
+	buffer_trace_init(&dummy.b_history);
+	err = ext3_get_blocks_handle(handle, inode, block, 1,
+					&dummy, create, 1);
+	/*
+	 * ext3_get_blocks_handle() returns number of blocks
+	 * mapped. 0 in case of a HOLE.
+	 */
+	if (err > 0) {
+		if (err > 1)
+			WARN_ON(1);
+		err = 0;
+	}
+	*errp = err;
+	if (!err && buffer_mapped(&dummy)) {
+		struct buffer_head *bh;
+		bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
+		if (!bh) {
+			*errp = -EIO;
+			goto err;
+		}
+		if (buffer_new(&dummy)) {
+			J_ASSERT(create != 0);
+			J_ASSERT(handle != 0);
+
+			/*
+			 * Now that we do not always journal data, we should
+			 * keep in mind whether this should always journal the
+			 * new buffer as metadata.  For now, regular file
+			 * writes use ext3_get_block instead, so it's not a
+			 * problem.
+			 */
+			lock_buffer(bh);
+			BUFFER_TRACE(bh, "call get_create_access");
+			fatal = ext3_journal_get_create_access(handle, bh);
+			if (!fatal && !buffer_uptodate(bh)) {
+				memset(bh->b_data,0,inode->i_sb->s_blocksize);
+				set_buffer_uptodate(bh);
+			}
+			unlock_buffer(bh);
+			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+			err = ext3_journal_dirty_metadata(handle, bh);
+			if (!fatal)
+				fatal = err;
+		} else {
+			BUFFER_TRACE(bh, "not a new buffer");
+		}
+		if (fatal) {
+			*errp = fatal;
+			brelse(bh);
+			bh = NULL;
+		}
+		return bh;
+	}
+err:
+	return NULL;
+}
+
+struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode,
+			       int block, int create, int *err)
+{
+	struct buffer_head * bh;
+
+	bh = ext3_getblk(handle, inode, block, create, err);
+	if (!bh)
+		return bh;
+	if (buffer_uptodate(bh))
+		return bh;
+	ll_rw_block(READ_META, 1, &bh);
+	wait_on_buffer(bh);
+	if (buffer_uptodate(bh))
+		return bh;
+	put_bh(bh);
+	*err = -EIO;
+	return NULL;
+}
+
+static int walk_page_buffers(	handle_t *handle,
+				struct buffer_head *head,
+				unsigned from,
+				unsigned to,
+				int *partial,
+				int (*fn)(	handle_t *handle,
+						struct buffer_head *bh))
+{
+	struct buffer_head *bh;
+	unsigned block_start, block_end;
+	unsigned blocksize = head->b_size;
+	int err, ret = 0;
+	struct buffer_head *next;
+
+	for (	bh = head, block_start = 0;
+		ret == 0 && (bh != head || !block_start);
+		block_start = block_end, bh = next)
+	{
+		next = bh->b_this_page;
+		block_end = block_start + blocksize;
+		if (block_end <= from || block_start >= to) {
+			if (partial && !buffer_uptodate(bh))
+				*partial = 1;
+			continue;
+		}
+		err = (*fn)(handle, bh);
+		if (!ret)
+			ret = err;
+	}
+	return ret;
+}
+
+/*
+ * To preserve ordering, it is essential that the hole instantiation and
+ * the data write be encapsulated in a single transaction.  We cannot
+ * close off a transaction and start a new one between the ext3_get_block()
+ * and the commit_write().  So doing the journal_start at the start of
+ * prepare_write() is the right place.
+ *
+ * Also, this function can nest inside ext3_writepage() ->
+ * block_write_full_page(). In that case, we *know* that ext3_writepage()
+ * has generated enough buffer credits to do the whole page.  So we won't
+ * block on the journal in that case, which is good, because the caller may
+ * be PF_MEMALLOC.
+ *
+ * By accident, ext3 can be reentered when a transaction is open via
+ * quota file writes.  If we were to commit the transaction while thus
+ * reentered, there can be a deadlock - we would be holding a quota
+ * lock, and the commit would never complete if another thread had a
+ * transaction open and was blocking on the quota lock - a ranking
+ * violation.
+ *
+ * So what we do is to rely on the fact that journal_stop/journal_start
+ * will _not_ run commit under these circumstances because handle->h_ref
+ * is elevated.  We'll still have enough credits for the tiny quotafile
+ * write.
+ */
+static int do_journal_get_write_access(handle_t *handle,
+					struct buffer_head *bh)
+{
+	if (!buffer_mapped(bh) || buffer_freed(bh))
+		return 0;
+	return ext3_journal_get_write_access(handle, bh);
+}
+
+static int ext3_prepare_write(struct file *file, struct page *page,
+			      unsigned from, unsigned to)
+{
+	struct inode *inode = page->mapping->host;
+	int ret, needed_blocks = ext3_writepage_trans_blocks(inode);
+	handle_t *handle;
+	int retries = 0;
+
+retry:
+	handle = ext3_journal_start(inode, needed_blocks);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		goto out;
+	}
+	if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
+		ret = nobh_prepare_write(page, from, to, ext3_get_block);
+	else
+		ret = block_prepare_write(page, from, to, ext3_get_block);
+	if (ret)
+		goto prepare_write_failed;
+
+	if (ext3_should_journal_data(inode)) {
+		ret = walk_page_buffers(handle, page_buffers(page),
+				from, to, NULL, do_journal_get_write_access);
+	}
+prepare_write_failed:
+	if (ret)
+		ext3_journal_stop(handle);
+	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
+		goto retry;
+out:
+	return ret;
+}
+
+int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
+{
+	int err = journal_dirty_data(handle, bh);
+	if (err)
+		ext3_journal_abort_handle(__FUNCTION__, __FUNCTION__,
+						bh, handle,err);
+	return err;
+}
+
+/* For commit_write() in data=journal mode */
+static int commit_write_fn(handle_t *handle, struct buffer_head *bh)
+{
+	if (!buffer_mapped(bh) || buffer_freed(bh))
+		return 0;
+	set_buffer_uptodate(bh);
+	return ext3_journal_dirty_metadata(handle, bh);
+}
+
+/*
+ * We need to pick up the new inode size which generic_commit_write gave us
+ * `file' can be NULL - eg, when called from page_symlink().
+ *
+ * ext3 never places buffers on inode->i_mapping->private_list.  metadata
+ * buffers are managed internally.
+ */
+static int ext3_ordered_commit_write(struct file *file, struct page *page,
+			     unsigned from, unsigned to)
+{
+	handle_t *handle = ext3_journal_current_handle();
+	struct inode *inode = page->mapping->host;
+	int ret = 0, ret2;
+
+	ret = walk_page_buffers(handle, page_buffers(page),
+		from, to, NULL, ext3_journal_dirty_data);
+
+	if (ret == 0) {
+		/*
+		 * generic_commit_write() will run mark_inode_dirty() if i_size
+		 * changes.  So let's piggyback the i_disksize mark_inode_dirty
+		 * into that.
+		 */
+		loff_t new_i_size;
+
+		new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+		if (new_i_size > EXT3_I(inode)->i_disksize)
+			EXT3_I(inode)->i_disksize = new_i_size;
+		ret = generic_commit_write(file, page, from, to);
+	}
+	ret2 = ext3_journal_stop(handle);
+	if (!ret)
+		ret = ret2;
+	return ret;
+}
+
+static int ext3_writeback_commit_write(struct file *file, struct page *page,
+			     unsigned from, unsigned to)
+{
+	handle_t *handle = ext3_journal_current_handle();
+	struct inode *inode = page->mapping->host;
+	int ret = 0, ret2;
+	loff_t new_i_size;
+
+	new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+	if (new_i_size > EXT3_I(inode)->i_disksize)
+		EXT3_I(inode)->i_disksize = new_i_size;
+
+	if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
+		ret = nobh_commit_write(file, page, from, to);
+	else
+		ret = generic_commit_write(file, page, from, to);
+
+	ret2 = ext3_journal_stop(handle);
+	if (!ret)
+		ret = ret2;
+	return ret;
+}
+
+static int ext3_journalled_commit_write(struct file *file,
+			struct page *page, unsigned from, unsigned to)
+{
+	handle_t *handle = ext3_journal_current_handle();
+	struct inode *inode = page->mapping->host;
+	int ret = 0, ret2;
+	int partial = 0;
+	loff_t pos;
+
+	/*
+	 * Here we duplicate the generic_commit_write() functionality
+	 */
+	pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+
+	ret = walk_page_buffers(handle, page_buffers(page), from,
+				to, &partial, commit_write_fn);
+	if (!partial)
+		SetPageUptodate(page);
+	if (pos > inode->i_size)
+		i_size_write(inode, pos);
+	EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
+	if (inode->i_size > EXT3_I(inode)->i_disksize) {
+		EXT3_I(inode)->i_disksize = inode->i_size;
+		ret2 = ext3_mark_inode_dirty(handle, inode);
+		if (!ret)
+			ret = ret2;
+	}
+	ret2 = ext3_journal_stop(handle);
+	if (!ret)
+		ret = ret2;
+	return ret;
+}
+
+/*
+ * bmap() is special.  It gets used by applications such as lilo and by
+ * the swapper to find the on-disk block of a specific piece of data.
+ *
+ * Naturally, this is dangerous if the block concerned is still in the
+ * journal.  If somebody makes a swapfile on an ext3 data-journaling
+ * filesystem and enables swap, then they may get a nasty shock when the
+ * data getting swapped to that swapfile suddenly gets overwritten by
+ * the original zero's written out previously to the journal and
+ * awaiting writeback in the kernel's buffer cache.
+ *
+ * So, if we see any bmap calls here on a modified, data-journaled file,
+ * take extra steps to flush any blocks which might be in the cache.
+ */
+static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
+{
+	struct inode *inode = mapping->host;
+	journal_t *journal;
+	int err;
+
+	if (EXT3_I(inode)->i_state & EXT3_STATE_JDATA) {
+		/*
+		 * This is a REALLY heavyweight approach, but the use of
+		 * bmap on dirty files is expected to be extremely rare:
+		 * only if we run lilo or swapon on a freshly made file
+		 * do we expect this to happen.
+		 *
+		 * (bmap requires CAP_SYS_RAWIO so this does not
+		 * represent an unprivileged user DOS attack --- we'd be
+		 * in trouble if mortal users could trigger this path at
+		 * will.)
+		 *
+		 * NB. EXT3_STATE_JDATA is not set on files other than
+		 * regular files.  If somebody wants to bmap a directory
+		 * or symlink and gets confused because the buffer
+		 * hasn't yet been flushed to disk, they deserve
+		 * everything they get.
+		 */
+
+		EXT3_I(inode)->i_state &= ~EXT3_STATE_JDATA;
+		journal = EXT3_JOURNAL(inode);
+		journal_lock_updates(journal);
+		err = journal_flush(journal);
+		journal_unlock_updates(journal);
+
+		if (err)
+			return 0;
+	}
+
+	return generic_block_bmap(mapping,block,ext3_get_block);
+}
+
+static int bget_one(handle_t *handle, struct buffer_head *bh)
+{
+	get_bh(bh);
+	return 0;
+}
+
+static int bput_one(handle_t *handle, struct buffer_head *bh)
+{
+	put_bh(bh);
+	return 0;
+}
+
+static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
+{
+	if (buffer_mapped(bh))
+		return ext3_journal_dirty_data(handle, bh);
+	return 0;
+}
+
+/*
+ * Note that we always start a transaction even if we're not journalling
+ * data.  This is to preserve ordering: any hole instantiation within
+ * __block_write_full_page -> ext3_get_block() should be journalled
+ * along with the data so we don't crash and then get metadata which
+ * refers to old data.
+ *
+ * In all journalling modes block_write_full_page() will start the I/O.
+ *
+ * Problem:
+ *
+ *	ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
+ *		ext3_writepage()
+ *
+ * Similar for:
+ *
+ *	ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ...
+ *
+ * Same applies to ext3_get_block().  We will deadlock on various things like
+ * lock_journal and i_truncate_mutex.
+ *
+ * Setting PF_MEMALLOC here doesn't work - too many internal memory
+ * allocations fail.
+ *
+ * 16May01: If we're reentered then journal_current_handle() will be
+ *	    non-zero. We simply *return*.
+ *
+ * 1 July 2001: @@@ FIXME:
+ *   In journalled data mode, a data buffer may be metadata against the
+ *   current transaction.  But the same file is part of a shared mapping
+ *   and someone does a writepage() on it.
+ *
+ *   We will move the buffer onto the async_data list, but *after* it has
+ *   been dirtied. So there's a small window where we have dirty data on
+ *   BJ_Metadata.
+ *
+ *   Note that this only applies to the last partial page in the file.  The
+ *   bit which block_write_full_page() uses prepare/commit for.  (That's
+ *   broken code anyway: it's wrong for msync()).
+ *
+ *   It's a rare case: affects the final partial page, for journalled data
+ *   where the file is subject to bith write() and writepage() in the same
+ *   transction.  To fix it we'll need a custom block_write_full_page().
+ *   We'll probably need that anyway for journalling writepage() output.
+ *
+ * We don't honour synchronous mounts for writepage().  That would be
+ * disastrous.  Any write() or metadata operation will sync the fs for
+ * us.
+ *
+ * AKPM2: if all the page's buffers are mapped to disk and !data=journal,
+ * we don't need to open a transaction here.
+ */
+static int ext3_ordered_writepage(struct page *page,
+				struct writeback_control *wbc)
+{
+	struct inode *inode = page->mapping->host;
+	struct buffer_head *page_bufs;
+	handle_t *handle = NULL;
+	int ret = 0;
+	int err;
+
+	J_ASSERT(PageLocked(page));
+
+	/*
+	 * We give up here if we're reentered, because it might be for a
+	 * different filesystem.
+	 */
+	if (ext3_journal_current_handle())
+		goto out_fail;
+
+	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
+
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		goto out_fail;
+	}
+
+	if (!page_has_buffers(page)) {
+		create_empty_buffers(page, inode->i_sb->s_blocksize,
+				(1 << BH_Dirty)|(1 << BH_Uptodate));
+	}
+	page_bufs = page_buffers(page);
+	walk_page_buffers(handle, page_bufs, 0,
+			PAGE_CACHE_SIZE, NULL, bget_one);
+
+	ret = block_write_full_page(page, ext3_get_block, wbc);
+
+	/*
+	 * The page can become unlocked at any point now, and
+	 * truncate can then come in and change things.  So we
+	 * can't touch *page from now on.  But *page_bufs is
+	 * safe due to elevated refcount.
+	 */
+
+	/*
+	 * And attach them to the current transaction.  But only if
+	 * block_write_full_page() succeeded.  Otherwise they are unmapped,
+	 * and generally junk.
+	 */
+	if (ret == 0) {
+		err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
+					NULL, journal_dirty_data_fn);
+		if (!ret)
+			ret = err;
+	}
+	walk_page_buffers(handle, page_bufs, 0,
+			PAGE_CACHE_SIZE, NULL, bput_one);
+	err = ext3_journal_stop(handle);
+	if (!ret)
+		ret = err;
+	return ret;
+
+out_fail:
+	redirty_page_for_writepage(wbc, page);
+	unlock_page(page);
+	return ret;
+}
+
+static int ext3_writeback_writepage(struct page *page,
+				struct writeback_control *wbc)
+{
+	struct inode *inode = page->mapping->host;
+	handle_t *handle = NULL;
+	int ret = 0;
+	int err;
+
+	if (ext3_journal_current_handle())
+		goto out_fail;
+
+	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		goto out_fail;
+	}
+
+	if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
+		ret = nobh_writepage(page, ext3_get_block, wbc);
+	else
+		ret = block_write_full_page(page, ext3_get_block, wbc);
+
+	err = ext3_journal_stop(handle);
+	if (!ret)
+		ret = err;
+	return ret;
+
+out_fail:
+	redirty_page_for_writepage(wbc, page);
+	unlock_page(page);
+	return ret;
+}
+
+static int ext3_journalled_writepage(struct page *page,
+				struct writeback_control *wbc)
+{
+	struct inode *inode = page->mapping->host;
+	handle_t *handle = NULL;
+	int ret = 0;
+	int err;
+
+	if (ext3_journal_current_handle())
+		goto no_write;
+
+	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		goto no_write;
+	}
+
+	if (!page_has_buffers(page) || PageChecked(page)) {
+		/*
+		 * It's mmapped pagecache.  Add buffers and journal it.  There
+		 * doesn't seem much point in redirtying the page here.
+		 */
+		ClearPageChecked(page);
+		ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
+					ext3_get_block);
+		if (ret != 0) {
+			ext3_journal_stop(handle);
+			goto out_unlock;
+		}
+		ret = walk_page_buffers(handle, page_buffers(page), 0,
+			PAGE_CACHE_SIZE, NULL, do_journal_get_write_access);
+
+		err = walk_page_buffers(handle, page_buffers(page), 0,
+				PAGE_CACHE_SIZE, NULL, commit_write_fn);
+		if (ret == 0)
+			ret = err;
+		EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
+		unlock_page(page);
+	} else {
+		/*
+		 * It may be a page full of checkpoint-mode buffers.  We don't
+		 * really know unless we go poke around in the buffer_heads.
+		 * But block_write_full_page will do the right thing.
+		 */
+		ret = block_write_full_page(page, ext3_get_block, wbc);
+	}
+	err = ext3_journal_stop(handle);
+	if (!ret)
+		ret = err;
+out:
+	return ret;
+
+no_write:
+	redirty_page_for_writepage(wbc, page);
+out_unlock:
+	unlock_page(page);
+	goto out;
+}
+
+static int ext3_readpage(struct file *file, struct page *page)
+{
+	return mpage_readpage(page, ext3_get_block);
+}
+
+static int
+ext3_readpages(struct file *file, struct address_space *mapping,
+		struct list_head *pages, unsigned nr_pages)
+{
+	return mpage_readpages(mapping, pages, nr_pages, ext3_get_block);
+}
+
+static void ext3_invalidatepage(struct page *page, unsigned long offset)
+{
+	journal_t *journal = EXT3_JOURNAL(page->mapping->host);
+
+	/*
+	 * If it's a full truncate we just forget about the pending dirtying
+	 */
+	if (offset == 0)
+		ClearPageChecked(page);
+
+	journal_invalidatepage(journal, page, offset);
+}
+
+static int ext3_releasepage(struct page *page, gfp_t wait)
+{
+	journal_t *journal = EXT3_JOURNAL(page->mapping->host);
+
+	WARN_ON(PageChecked(page));
+	if (!page_has_buffers(page))
+		return 0;
+	return journal_try_to_free_buffers(journal, page, wait);
+}
+
+/*
+ * If the O_DIRECT write will extend the file then add this inode to the
+ * orphan list.  So recovery will truncate it back to the original size
+ * if the machine crashes during the write.
+ *
+ * If the O_DIRECT write is intantiating holes inside i_size and the machine
+ * crashes then stale disk data _may_ be exposed inside the file.
+ */
+static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
+			const struct iovec *iov, loff_t offset,
+			unsigned long nr_segs)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file->f_mapping->host;
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	handle_t *handle = NULL;
+	ssize_t ret;
+	int orphan = 0;
+	size_t count = iov_length(iov, nr_segs);
+
+	if (rw == WRITE) {
+		loff_t final_size = offset + count;
+
+		handle = ext3_journal_start(inode, DIO_CREDITS);
+		if (IS_ERR(handle)) {
+			ret = PTR_ERR(handle);
+			goto out;
+		}
+		if (final_size > inode->i_size) {
+			ret = ext3_orphan_add(handle, inode);
+			if (ret)
+				goto out_stop;
+			orphan = 1;
+			ei->i_disksize = inode->i_size;
+		}
+	}
+
+	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+				 offset, nr_segs,
+				 ext3_get_block, NULL);
+
+	/*
+	 * Reacquire the handle: ext3_get_block() can restart the transaction
+	 */
+	handle = journal_current_handle();
+
+out_stop:
+	if (handle) {
+		int err;
+
+		if (orphan && inode->i_nlink)
+			ext3_orphan_del(handle, inode);
+		if (orphan && ret > 0) {
+			loff_t end = offset + ret;
+			if (end > inode->i_size) {
+				ei->i_disksize = end;
+				i_size_write(inode, end);
+				/*
+				 * We're going to return a positive `ret'
+				 * here due to non-zero-length I/O, so there's
+				 * no way of reporting error returns from
+				 * ext3_mark_inode_dirty() to userspace.  So
+				 * ignore it.
+				 */
+				ext3_mark_inode_dirty(handle, inode);
+			}
+		}
+		err = ext3_journal_stop(handle);
+		if (ret == 0)
+			ret = err;
+	}
+out:
+	return ret;
+}
+
+/*
+ * Pages can be marked dirty completely asynchronously from ext3's journalling
+ * activity.  By filemap_sync_pte(), try_to_unmap_one(), etc.  We cannot do
+ * much here because ->set_page_dirty is called under VFS locks.  The page is
+ * not necessarily locked.
+ *
+ * We cannot just dirty the page and leave attached buffers clean, because the
+ * buffers' dirty state is "definitive".  We cannot just set the buffers dirty
+ * or jbddirty because all the journalling code will explode.
+ *
+ * So what we do is to mark the page "pending dirty" and next time writepage
+ * is called, propagate that into the buffers appropriately.
+ */
+static int ext3_journalled_set_page_dirty(struct page *page)
+{
+	SetPageChecked(page);
+	return __set_page_dirty_nobuffers(page);
+}
+
+static const struct address_space_operations ext3_ordered_aops = {
+	.readpage	= ext3_readpage,
+	.readpages	= ext3_readpages,
+	.writepage	= ext3_ordered_writepage,
+	.sync_page	= block_sync_page,
+	.prepare_write	= ext3_prepare_write,
+	.commit_write	= ext3_ordered_commit_write,
+	.bmap		= ext3_bmap,
+	.invalidatepage	= ext3_invalidatepage,
+	.releasepage	= ext3_releasepage,
+	.direct_IO	= ext3_direct_IO,
+	.migratepage	= buffer_migrate_page,
+};
+
+static const struct address_space_operations ext3_writeback_aops = {
+	.readpage	= ext3_readpage,
+	.readpages	= ext3_readpages,
+	.writepage	= ext3_writeback_writepage,
+	.sync_page	= block_sync_page,
+	.prepare_write	= ext3_prepare_write,
+	.commit_write	= ext3_writeback_commit_write,
+	.bmap		= ext3_bmap,
+	.invalidatepage	= ext3_invalidatepage,
+	.releasepage	= ext3_releasepage,
+	.direct_IO	= ext3_direct_IO,
+	.migratepage	= buffer_migrate_page,
+};
+
+static const struct address_space_operations ext3_journalled_aops = {
+	.readpage	= ext3_readpage,
+	.readpages	= ext3_readpages,
+	.writepage	= ext3_journalled_writepage,
+	.sync_page	= block_sync_page,
+	.prepare_write	= ext3_prepare_write,
+	.commit_write	= ext3_journalled_commit_write,
+	.set_page_dirty	= ext3_journalled_set_page_dirty,
+	.bmap		= ext3_bmap,
+	.invalidatepage	= ext3_invalidatepage,
+	.releasepage	= ext3_releasepage,
+};
+
+void ext3_set_aops(struct inode *inode)
+{
+	if (ext3_should_order_data(inode))
+		inode->i_mapping->a_ops = &ext3_ordered_aops;
+	else if (ext3_should_writeback_data(inode))
+		inode->i_mapping->a_ops = &ext3_writeback_aops;
+	else
+		inode->i_mapping->a_ops = &ext3_journalled_aops;
+}
+
+/*
+ * ext3_block_truncate_page() zeroes out a mapping from file offset `from'
+ * up to the end of the block which corresponds to `from'.
+ * This required during truncate. We need to physically zero the tail end
+ * of that block so it doesn't yield old data if the file is later grown.
+ */
+static int ext3_block_truncate_page(handle_t *handle, struct page *page,
+		struct address_space *mapping, loff_t from)
+{
+	ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT;
+	unsigned offset = from & (PAGE_CACHE_SIZE-1);
+	unsigned blocksize, iblock, length, pos;
+	struct inode *inode = mapping->host;
+	struct buffer_head *bh;
+	int err = 0;
+	void *kaddr;
+
+	blocksize = inode->i_sb->s_blocksize;
+	length = blocksize - (offset & (blocksize - 1));
+	iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+
+	/*
+	 * For "nobh" option,  we can only work if we don't need to
+	 * read-in the page - otherwise we create buffers to do the IO.
+	 */
+	if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
+	     ext3_should_writeback_data(inode) && PageUptodate(page)) {
+		kaddr = kmap_atomic(page, KM_USER0);
+		memset(kaddr + offset, 0, length);
+		flush_dcache_page(page);
+		kunmap_atomic(kaddr, KM_USER0);
+		set_page_dirty(page);
+		goto unlock;
+	}
+
+	if (!page_has_buffers(page))
+		create_empty_buffers(page, blocksize, 0);
+
+	/* Find the buffer that contains "offset" */
+	bh = page_buffers(page);
+	pos = blocksize;
+	while (offset >= pos) {
+		bh = bh->b_this_page;
+		iblock++;
+		pos += blocksize;
+	}
+
+	err = 0;
+	if (buffer_freed(bh)) {
+		BUFFER_TRACE(bh, "freed: skip");
+		goto unlock;
+	}
+
+	if (!buffer_mapped(bh)) {
+		BUFFER_TRACE(bh, "unmapped");
+		ext3_get_block(inode, iblock, bh, 0);
+		/* unmapped? It's a hole - nothing to do */
+		if (!buffer_mapped(bh)) {
+			BUFFER_TRACE(bh, "still unmapped");
+			goto unlock;
+		}
+	}
+
+	/* Ok, it's mapped. Make sure it's up-to-date */
+	if (PageUptodate(page))
+		set_buffer_uptodate(bh);
+
+	if (!buffer_uptodate(bh)) {
+		err = -EIO;
+		ll_rw_block(READ, 1, &bh);
+		wait_on_buffer(bh);
+		/* Uhhuh. Read error. Complain and punt. */
+		if (!buffer_uptodate(bh))
+			goto unlock;
+	}
+
+	if (ext3_should_journal_data(inode)) {
+		BUFFER_TRACE(bh, "get write access");
+		err = ext3_journal_get_write_access(handle, bh);
+		if (err)
+			goto unlock;
+	}
+
+	kaddr = kmap_atomic(page, KM_USER0);
+	memset(kaddr + offset, 0, length);
+	flush_dcache_page(page);
+	kunmap_atomic(kaddr, KM_USER0);
+
+	BUFFER_TRACE(bh, "zeroed end of block");
+
+	err = 0;
+	if (ext3_should_journal_data(inode)) {
+		err = ext3_journal_dirty_metadata(handle, bh);
+	} else {
+		if (ext3_should_order_data(inode))
+			err = ext3_journal_dirty_data(handle, bh);
+		mark_buffer_dirty(bh);
+	}
+
+unlock:
+	unlock_page(page);
+	page_cache_release(page);
+	return err;
+}
+
+/*
+ * Probably it should be a library function... search for first non-zero word
+ * or memcmp with zero_page, whatever is better for particular architecture.
+ * Linus?
+ */
+static inline int all_zeroes(__le32 *p, __le32 *q)
+{
+	while (p < q)
+		if (*p++)
+			return 0;
+	return 1;
+}
+
+/**
+ *	ext3_find_shared - find the indirect blocks for partial truncation.
+ *	@inode:	  inode in question
+ *	@depth:	  depth of the affected branch
+ *	@offsets: offsets of pointers in that branch (see ext3_block_to_path)
+ *	@chain:	  place to store the pointers to partial indirect blocks
+ *	@top:	  place to the (detached) top of branch
+ *
+ *	This is a helper function used by ext3_truncate().
+ *
+ *	When we do truncate() we may have to clean the ends of several
+ *	indirect blocks but leave the blocks themselves alive. Block is
+ *	partially truncated if some data below the new i_size is refered
+ *	from it (and it is on the path to the first completely truncated
+ *	data block, indeed).  We have to free the top of that path along
+ *	with everything to the right of the path. Since no allocation
+ *	past the truncation point is possible until ext3_truncate()
+ *	finishes, we may safely do the latter, but top of branch may
+ *	require special attention - pageout below the truncation point
+ *	might try to populate it.
+ *
+ *	We atomically detach the top of branch from the tree, store the
+ *	block number of its root in *@top, pointers to buffer_heads of
+ *	partially truncated blocks - in @chain[].bh and pointers to
+ *	their last elements that should not be removed - in
+ *	@chain[].p. Return value is the pointer to last filled element
+ *	of @chain.
+ *
+ *	The work left to caller to do the actual freeing of subtrees:
+ *		a) free the subtree starting from *@top
+ *		b) free the subtrees whose roots are stored in
+ *			(@chain[i].p+1 .. end of @chain[i].bh->b_data)
+ *		c) free the subtrees growing from the inode past the @chain[0].
+ *			(no partially truncated stuff there).  */
+
+static Indirect *ext3_find_shared(struct inode *inode, int depth,
+			int offsets[4], Indirect chain[4], __le32 *top)
+{
+	Indirect *partial, *p;
+	int k, err;
+
+	*top = 0;
+	/* Make k index the deepest non-null offest + 1 */
+	for (k = depth; k > 1 && !offsets[k-1]; k--)
+		;
+	partial = ext3_get_branch(inode, k, offsets, chain, &err);
+	/* Writer: pointers */
+	if (!partial)
+		partial = chain + k-1;
+	/*
+	 * If the branch acquired continuation since we've looked at it -
+	 * fine, it should all survive and (new) top doesn't belong to us.
+	 */
+	if (!partial->key && *partial->p)
+		/* Writer: end */
+		goto no_top;
+	for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--)
+		;
+	/*
+	 * OK, we've found the last block that must survive. The rest of our
+	 * branch should be detached before unlocking. However, if that rest
+	 * of branch is all ours and does not grow immediately from the inode
+	 * it's easier to cheat and just decrement partial->p.
+	 */
+	if (p == chain + k - 1 && p > chain) {
+		p->p--;
+	} else {
+		*top = *p->p;
+		/* Nope, don't do this in ext3.  Must leave the tree intact */
+#if 0
+		*p->p = 0;
+#endif
+	}
+	/* Writer: end */
+
+	while(partial > p) {
+		brelse(partial->bh);
+		partial--;
+	}
+no_top:
+	return partial;
+}
+
+/*
+ * Zero a number of block pointers in either an inode or an indirect block.
+ * If we restart the transaction we must again get write access to the
+ * indirect block for further modification.
+ *
+ * We release `count' blocks on disk, but (last - first) may be greater
+ * than `count' because there can be holes in there.
+ */
+static void ext3_clear_blocks(handle_t *handle, struct inode *inode,
+		struct buffer_head *bh, ext3_fsblk_t block_to_free,
+		unsigned long count, __le32 *first, __le32 *last)
+{
+	__le32 *p;
+	if (try_to_extend_transaction(handle, inode)) {
+		if (bh) {
+			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+			ext3_journal_dirty_metadata(handle, bh);
+		}
+		ext3_mark_inode_dirty(handle, inode);
+		ext3_journal_test_restart(handle, inode);
+		if (bh) {
+			BUFFER_TRACE(bh, "retaking write access");
+			ext3_journal_get_write_access(handle, bh);
+		}
+	}
+
+	/*
+	 * Any buffers which are on the journal will be in memory. We find
+	 * them on the hash table so journal_revoke() will run journal_forget()
+	 * on them.  We've already detached each block from the file, so
+	 * bforget() in journal_forget() should be safe.
+	 *
+	 * AKPM: turn on bforget in journal_forget()!!!
+	 */
+	for (p = first; p < last; p++) {
+		u32 nr = le32_to_cpu(*p);
+		if (nr) {
+			struct buffer_head *bh;
+
+			*p = 0;
+			bh = sb_find_get_block(inode->i_sb, nr);
+			ext3_forget(handle, 0, inode, bh, nr);
+		}
+	}
+
+	ext3_free_blocks(handle, inode, block_to_free, count);
+}
+
+/**
+ * ext3_free_data - free a list of data blocks
+ * @handle:	handle for this transaction
+ * @inode:	inode we are dealing with
+ * @this_bh:	indirect buffer_head which contains *@first and *@last
+ * @first:	array of block numbers
+ * @last:	points immediately past the end of array
+ *
+ * We are freeing all blocks refered from that array (numbers are stored as
+ * little-endian 32-bit) and updating @inode->i_blocks appropriately.
+ *
+ * We accumulate contiguous runs of blocks to free.  Conveniently, if these
+ * blocks are contiguous then releasing them at one time will only affect one
+ * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't
+ * actually use a lot of journal space.
+ *
+ * @this_bh will be %NULL if @first and @last point into the inode's direct
+ * block pointers.
+ */
+static void ext3_free_data(handle_t *handle, struct inode *inode,
+			   struct buffer_head *this_bh,
+			   __le32 *first, __le32 *last)
+{
+	ext3_fsblk_t block_to_free = 0;    /* Starting block # of a run */
+	unsigned long count = 0;	    /* Number of blocks in the run */
+	__le32 *block_to_free_p = NULL;	    /* Pointer into inode/ind
+					       corresponding to
+					       block_to_free */
+	ext3_fsblk_t nr;		    /* Current block # */
+	__le32 *p;			    /* Pointer into inode/ind
+					       for current block */
+	int err;
+
+	if (this_bh) {				/* For indirect block */
+		BUFFER_TRACE(this_bh, "get_write_access");
+		err = ext3_journal_get_write_access(handle, this_bh);
+		/* Important: if we can't update the indirect pointers
+		 * to the blocks, we can't free them. */
+		if (err)
+			return;
+	}
+
+	for (p = first; p < last; p++) {
+		nr = le32_to_cpu(*p);
+		if (nr) {
+			/* accumulate blocks to free if they're contiguous */
+			if (count == 0) {
+				block_to_free = nr;
+				block_to_free_p = p;
+				count = 1;
+			} else if (nr == block_to_free + count) {
+				count++;
+			} else {
+				ext3_clear_blocks(handle, inode, this_bh,
+						  block_to_free,
+						  count, block_to_free_p, p);
+				block_to_free = nr;
+				block_to_free_p = p;
+				count = 1;
+			}
+		}
+	}
+
+	if (count > 0)
+		ext3_clear_blocks(handle, inode, this_bh, block_to_free,
+				  count, block_to_free_p, p);
+
+	if (this_bh) {
+		BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
+		ext3_journal_dirty_metadata(handle, this_bh);
+	}
+}
+
+/**
+ *	ext3_free_branches - free an array of branches
+ *	@handle: JBD handle for this transaction
+ *	@inode:	inode we are dealing with
+ *	@parent_bh: the buffer_head which contains *@first and *@last
+ *	@first:	array of block numbers
+ *	@last:	pointer immediately past the end of array
+ *	@depth:	depth of the branches to free
+ *
+ *	We are freeing all blocks refered from these branches (numbers are
+ *	stored as little-endian 32-bit) and updating @inode->i_blocks
+ *	appropriately.
+ */
+static void ext3_free_branches(handle_t *handle, struct inode *inode,
+			       struct buffer_head *parent_bh,
+			       __le32 *first, __le32 *last, int depth)
+{
+	ext3_fsblk_t nr;
+	__le32 *p;
+
+	if (is_handle_aborted(handle))
+		return;
+
+	if (depth--) {
+		struct buffer_head *bh;
+		int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
+		p = last;
+		while (--p >= first) {
+			nr = le32_to_cpu(*p);
+			if (!nr)
+				continue;		/* A hole */
+
+			/* Go read the buffer for the next level down */
+			bh = sb_bread(inode->i_sb, nr);
+
+			/*
+			 * A read failure? Report error and clear slot
+			 * (should be rare).
+			 */
+			if (!bh) {
+				ext3_error(inode->i_sb, "ext3_free_branches",
+					   "Read failure, inode=%lu, block="E3FSBLK,
+					   inode->i_ino, nr);
+				continue;
+			}
+
+			/* This zaps the entire block.  Bottom up. */
+			BUFFER_TRACE(bh, "free child branches");
+			ext3_free_branches(handle, inode, bh,
+					   (__le32*)bh->b_data,
+					   (__le32*)bh->b_data + addr_per_block,
+					   depth);
+
+			/*
+			 * We've probably journalled the indirect block several
+			 * times during the truncate.  But it's no longer
+			 * needed and we now drop it from the transaction via
+			 * journal_revoke().
+			 *
+			 * That's easy if it's exclusively part of this
+			 * transaction.  But if it's part of the committing
+			 * transaction then journal_forget() will simply
+			 * brelse() it.  That means that if the underlying
+			 * block is reallocated in ext3_get_block(),
+			 * unmap_underlying_metadata() will find this block
+			 * and will try to get rid of it.  damn, damn.
+			 *
+			 * If this block has already been committed to the
+			 * journal, a revoke record will be written.  And
+			 * revoke records must be emitted *before* clearing
+			 * this block's bit in the bitmaps.
+			 */
+			ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
+
+			/*
+			 * Everything below this this pointer has been
+			 * released.  Now let this top-of-subtree go.
+			 *
+			 * We want the freeing of this indirect block to be
+			 * atomic in the journal with the updating of the
+			 * bitmap block which owns it.  So make some room in
+			 * the journal.
+			 *
+			 * We zero the parent pointer *after* freeing its
+			 * pointee in the bitmaps, so if extend_transaction()
+			 * for some reason fails to put the bitmap changes and
+			 * the release into the same transaction, recovery
+			 * will merely complain about releasing a free block,
+			 * rather than leaking blocks.
+			 */
+			if (is_handle_aborted(handle))
+				return;
+			if (try_to_extend_transaction(handle, inode)) {
+				ext3_mark_inode_dirty(handle, inode);
+				ext3_journal_test_restart(handle, inode);
+			}
+
+			ext3_free_blocks(handle, inode, nr, 1);
+
+			if (parent_bh) {
+				/*
+				 * The block which we have just freed is
+				 * pointed to by an indirect block: journal it
+				 */
+				BUFFER_TRACE(parent_bh, "get_write_access");
+				if (!ext3_journal_get_write_access(handle,
+								   parent_bh)){
+					*p = 0;
+					BUFFER_TRACE(parent_bh,
+					"call ext3_journal_dirty_metadata");
+					ext3_journal_dirty_metadata(handle,
+								    parent_bh);
+				}
+			}
+		}
+	} else {
+		/* We have reached the bottom of the tree. */
+		BUFFER_TRACE(parent_bh, "free data blocks");
+		ext3_free_data(handle, inode, parent_bh, first, last);
+	}
+}
+
+/*
+ * ext3_truncate()
+ *
+ * We block out ext3_get_block() block instantiations across the entire
+ * transaction, and VFS/VM ensures that ext3_truncate() cannot run
+ * simultaneously on behalf of the same inode.
+ *
+ * As we work through the truncate and commmit bits of it to the journal there
+ * is one core, guiding principle: the file's tree must always be consistent on
+ * disk.  We must be able to restart the truncate after a crash.
+ *
+ * The file's tree may be transiently inconsistent in memory (although it
+ * probably isn't), but whenever we close off and commit a journal transaction,
+ * the contents of (the filesystem + the journal) must be consistent and
+ * restartable.  It's pretty simple, really: bottom up, right to left (although
+ * left-to-right works OK too).
+ *
+ * Note that at recovery time, journal replay occurs *before* the restart of
+ * truncate against the orphan inode list.
+ *
+ * The committed inode has the new, desired i_size (which is the same as
+ * i_disksize in this case).  After a crash, ext3_orphan_cleanup() will see
+ * that this inode's truncate did not complete and it will again call
+ * ext3_truncate() to have another go.  So there will be instantiated blocks
+ * to the right of the truncation point in a crashed ext3 filesystem.  But
+ * that's fine - as long as they are linked from the inode, the post-crash
+ * ext3_truncate() run will find them and release them.
+ */
+void ext3_truncate(struct inode *inode)
+{
+	handle_t *handle;
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	__le32 *i_data = ei->i_data;
+	int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
+	struct address_space *mapping = inode->i_mapping;
+	int offsets[4];
+	Indirect chain[4];
+	Indirect *partial;
+	__le32 nr = 0;
+	int n;
+	long last_block;
+	unsigned blocksize = inode->i_sb->s_blocksize;
+	struct page *page;
+
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+	    S_ISLNK(inode->i_mode)))
+		return;
+	if (ext3_inode_is_fast_symlink(inode))
+		return;
+	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+		return;
+
+	/*
+	 * We have to lock the EOF page here, because lock_page() nests
+	 * outside journal_start().
+	 */
+	if ((inode->i_size & (blocksize - 1)) == 0) {
+		/* Block boundary? Nothing to do */
+		page = NULL;
+	} else {
+		page = grab_cache_page(mapping,
+				inode->i_size >> PAGE_CACHE_SHIFT);
+		if (!page)
+			return;
+	}
+
+	handle = start_transaction(inode);
+	if (IS_ERR(handle)) {
+		if (page) {
+			clear_highpage(page);
+			flush_dcache_page(page);
+			unlock_page(page);
+			page_cache_release(page);
+		}
+		return;		/* AKPM: return what? */
+	}
+
+	last_block = (inode->i_size + blocksize-1)
+					>> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
+
+	if (page)
+		ext3_block_truncate_page(handle, page, mapping, inode->i_size);
+
+	n = ext3_block_to_path(inode, last_block, offsets, NULL);
+	if (n == 0)
+		goto out_stop;	/* error */
+
+	/*
+	 * OK.  This truncate is going to happen.  We add the inode to the
+	 * orphan list, so that if this truncate spans multiple transactions,
+	 * and we crash, we will resume the truncate when the filesystem
+	 * recovers.  It also marks the inode dirty, to catch the new size.
+	 *
+	 * Implication: the file must always be in a sane, consistent
+	 * truncatable state while each transaction commits.
+	 */
+	if (ext3_orphan_add(handle, inode))
+		goto out_stop;
+
+	/*
+	 * The orphan list entry will now protect us from any crash which
+	 * occurs before the truncate completes, so it is now safe to propagate
+	 * the new, shorter inode size (held for now in i_size) into the
+	 * on-disk inode. We do this via i_disksize, which is the value which
+	 * ext3 *really* writes onto the disk inode.
+	 */
+	ei->i_disksize = inode->i_size;
+
+	/*
+	 * From here we block out all ext3_get_block() callers who want to
+	 * modify the block allocation tree.
+	 */
+	mutex_lock(&ei->truncate_mutex);
+
+	if (n == 1) {		/* direct blocks */
+		ext3_free_data(handle, inode, NULL, i_data+offsets[0],
+			       i_data + EXT3_NDIR_BLOCKS);
+		goto do_indirects;
+	}
+
+	partial = ext3_find_shared(inode, n, offsets, chain, &nr);
+	/* Kill the top of shared branch (not detached) */
+	if (nr) {
+		if (partial == chain) {
+			/* Shared branch grows from the inode */
+			ext3_free_branches(handle, inode, NULL,
+					   &nr, &nr+1, (chain+n-1) - partial);
+			*partial->p = 0;
+			/*
+			 * We mark the inode dirty prior to restart,
+			 * and prior to stop.  No need for it here.
+			 */
+		} else {
+			/* Shared branch grows from an indirect block */
+			BUFFER_TRACE(partial->bh, "get_write_access");
+			ext3_free_branches(handle, inode, partial->bh,
+					partial->p,
+					partial->p+1, (chain+n-1) - partial);
+		}
+	}
+	/* Clear the ends of indirect blocks on the shared branch */
+	while (partial > chain) {
+		ext3_free_branches(handle, inode, partial->bh, partial->p + 1,
+				   (__le32*)partial->bh->b_data+addr_per_block,
+				   (chain+n-1) - partial);
+		BUFFER_TRACE(partial->bh, "call brelse");
+		brelse (partial->bh);
+		partial--;
+	}
+do_indirects:
+	/* Kill the remaining (whole) subtrees */
+	switch (offsets[0]) {
+	default:
+		nr = i_data[EXT3_IND_BLOCK];
+		if (nr) {
+			ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
+			i_data[EXT3_IND_BLOCK] = 0;
+		}
+	case EXT3_IND_BLOCK:
+		nr = i_data[EXT3_DIND_BLOCK];
+		if (nr) {
+			ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
+			i_data[EXT3_DIND_BLOCK] = 0;
+		}
+	case EXT3_DIND_BLOCK:
+		nr = i_data[EXT3_TIND_BLOCK];
+		if (nr) {
+			ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
+			i_data[EXT3_TIND_BLOCK] = 0;
+		}
+	case EXT3_TIND_BLOCK:
+		;
+	}
+
+	ext3_discard_reservation(inode);
+
+	mutex_unlock(&ei->truncate_mutex);
+	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+	ext3_mark_inode_dirty(handle, inode);
+
+	/*
+	 * In a multi-transaction truncate, we only make the final transaction
+	 * synchronous
+	 */
+	if (IS_SYNC(inode))
+		handle->h_sync = 1;
+out_stop:
+	/*
+	 * If this was a simple ftruncate(), and the file will remain alive
+	 * then we need to clear up the orphan record which we created above.
+	 * However, if this was a real unlink then we were called by
+	 * ext3_delete_inode(), and we allow that function to clean up the
+	 * orphan info for us.
+	 */
+	if (inode->i_nlink)
+		ext3_orphan_del(handle, inode);
+
+	ext3_journal_stop(handle);
+}
+
+static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
+		unsigned long ino, struct ext3_iloc *iloc)
+{
+	unsigned long desc, group_desc, block_group;
+	unsigned long offset;
+	ext3_fsblk_t block;
+	struct buffer_head *bh;
+	struct ext3_group_desc * gdp;
+
+	if (!ext3_valid_inum(sb, ino)) {
+		/*
+		 * This error is already checked for in namei.c unless we are
+		 * looking at an NFS filehandle, in which case no error
+		 * report is needed
+		 */
+		return 0;
+	}
+
+	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
+	if (block_group >= EXT3_SB(sb)->s_groups_count) {
+		ext3_error(sb,"ext3_get_inode_block","group >= groups count");
+		return 0;
+	}
+	smp_rmb();
+	group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb);
+	desc = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1);
+	bh = EXT3_SB(sb)->s_group_desc[group_desc];
+	if (!bh) {
+		ext3_error (sb, "ext3_get_inode_block",
+			    "Descriptor not loaded");
+		return 0;
+	}
+
+	gdp = (struct ext3_group_desc *)bh->b_data;
+	/*
+	 * Figure out the offset within the block group inode table
+	 */
+	offset = ((ino - 1) % EXT3_INODES_PER_GROUP(sb)) *
+		EXT3_INODE_SIZE(sb);
+	block = le32_to_cpu(gdp[desc].bg_inode_table) +
+		(offset >> EXT3_BLOCK_SIZE_BITS(sb));
+
+	iloc->block_group = block_group;
+	iloc->offset = offset & (EXT3_BLOCK_SIZE(sb) - 1);
+	return block;
+}
+
+/*
+ * ext3_get_inode_loc returns with an extra refcount against the inode's
+ * underlying buffer_head on success. If 'in_mem' is true, we have all
+ * data in memory that is needed to recreate the on-disk version of this
+ * inode.
+ */
+static int __ext3_get_inode_loc(struct inode *inode,
+				struct ext3_iloc *iloc, int in_mem)
+{
+	ext3_fsblk_t block;
+	struct buffer_head *bh;
+
+	block = ext3_get_inode_block(inode->i_sb, inode->i_ino, iloc);
+	if (!block)
+		return -EIO;
+
+	bh = sb_getblk(inode->i_sb, block);
+	if (!bh) {
+		ext3_error (inode->i_sb, "ext3_get_inode_loc",
+				"unable to read inode block - "
+				"inode=%lu, block="E3FSBLK,
+				 inode->i_ino, block);
+		return -EIO;
+	}
+	if (!buffer_uptodate(bh)) {
+		lock_buffer(bh);
+		if (buffer_uptodate(bh)) {
+			/* someone brought it uptodate while we waited */
+			unlock_buffer(bh);
+			goto has_buffer;
+		}
+
+		/*
+		 * If we have all information of the inode in memory and this
+		 * is the only valid inode in the block, we need not read the
+		 * block.
+		 */
+		if (in_mem) {
+			struct buffer_head *bitmap_bh;
+			struct ext3_group_desc *desc;
+			int inodes_per_buffer;
+			int inode_offset, i;
+			int block_group;
+			int start;
+
+			block_group = (inode->i_ino - 1) /
+					EXT3_INODES_PER_GROUP(inode->i_sb);
+			inodes_per_buffer = bh->b_size /
+				EXT3_INODE_SIZE(inode->i_sb);
+			inode_offset = ((inode->i_ino - 1) %
+					EXT3_INODES_PER_GROUP(inode->i_sb));
+			start = inode_offset & ~(inodes_per_buffer - 1);
+
+			/* Is the inode bitmap in cache? */
+			desc = ext3_get_group_desc(inode->i_sb,
+						block_group, NULL);
+			if (!desc)
+				goto make_io;
+
+			bitmap_bh = sb_getblk(inode->i_sb,
+					le32_to_cpu(desc->bg_inode_bitmap));
+			if (!bitmap_bh)
+				goto make_io;
+
+			/*
+			 * If the inode bitmap isn't in cache then the
+			 * optimisation may end up performing two reads instead
+			 * of one, so skip it.
+			 */
+			if (!buffer_uptodate(bitmap_bh)) {
+				brelse(bitmap_bh);
+				goto make_io;
+			}
+			for (i = start; i < start + inodes_per_buffer; i++) {
+				if (i == inode_offset)
+					continue;
+				if (ext3_test_bit(i, bitmap_bh->b_data))
+					break;
+			}
+			brelse(bitmap_bh);
+			if (i == start + inodes_per_buffer) {
+				/* all other inodes are free, so skip I/O */
+				memset(bh->b_data, 0, bh->b_size);
+				set_buffer_uptodate(bh);
+				unlock_buffer(bh);
+				goto has_buffer;
+			}
+		}
+
+make_io:
+		/*
+		 * There are other valid inodes in the buffer, this inode
+		 * has in-inode xattrs, or we don't have this inode in memory.
+		 * Read the block from disk.
+		 */
+		get_bh(bh);
+		bh->b_end_io = end_buffer_read_sync;
+		submit_bh(READ_META, bh);
+		wait_on_buffer(bh);
+		if (!buffer_uptodate(bh)) {
+			ext3_error(inode->i_sb, "ext3_get_inode_loc",
+					"unable to read inode block - "
+					"inode=%lu, block="E3FSBLK,
+					inode->i_ino, block);
+			brelse(bh);
+			return -EIO;
+		}
+	}
+has_buffer:
+	iloc->bh = bh;
+	return 0;
+}
+
+int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc)
+{
+	/* We have all inode data except xattrs in memory here. */
+	return __ext3_get_inode_loc(inode, iloc,
+		!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR));
+}
+
+void ext3_set_inode_flags(struct inode *inode)
+{
+	unsigned int flags = EXT3_I(inode)->i_flags;
+
+	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+	if (flags & EXT3_SYNC_FL)
+		inode->i_flags |= S_SYNC;
+	if (flags & EXT3_APPEND_FL)
+		inode->i_flags |= S_APPEND;
+	if (flags & EXT3_IMMUTABLE_FL)
+		inode->i_flags |= S_IMMUTABLE;
+	if (flags & EXT3_NOATIME_FL)
+		inode->i_flags |= S_NOATIME;
+	if (flags & EXT3_DIRSYNC_FL)
+		inode->i_flags |= S_DIRSYNC;
+}
+
+void ext3_read_inode(struct inode * inode)
+{
+	struct ext3_iloc iloc;
+	struct ext3_inode *raw_inode;
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct buffer_head *bh;
+	int block;
+
+#ifdef CONFIG_EXT3_FS_POSIX_ACL
+	ei->i_acl = EXT3_ACL_NOT_CACHED;
+	ei->i_default_acl = EXT3_ACL_NOT_CACHED;
+#endif
+	ei->i_block_alloc_info = NULL;
+
+	if (__ext3_get_inode_loc(inode, &iloc, 0))
+		goto bad_inode;
+	bh = iloc.bh;
+	raw_inode = ext3_raw_inode(&iloc);
+	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
+	inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
+	inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+	if(!(test_opt (inode->i_sb, NO_UID32))) {
+		inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
+		inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
+	}
+	inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
+	inode->i_size = le32_to_cpu(raw_inode->i_size);
+	inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime);
+	inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime);
+	inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime);
+	inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0;
+
+	ei->i_state = 0;
+	ei->i_dir_start_lookup = 0;
+	ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
+	/* We now have enough fields to check if the inode was active or not.
+	 * This is needed because nfsd might try to access dead inodes
+	 * the test is that same one that e2fsck uses
+	 * NeilBrown 1999oct15
+	 */
+	if (inode->i_nlink == 0) {
+		if (inode->i_mode == 0 ||
+		    !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) {
+			/* this inode is deleted */
+			brelse (bh);
+			goto bad_inode;
+		}
+		/* The only unlinked inodes we let through here have
+		 * valid i_mode and are being read by the orphan
+		 * recovery code: that's fine, we're about to complete
+		 * the process of deleting those. */
+	}
+	inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
+	ei->i_flags = le32_to_cpu(raw_inode->i_flags);
+#ifdef EXT3_FRAGMENTS
+	ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
+	ei->i_frag_no = raw_inode->i_frag;
+	ei->i_frag_size = raw_inode->i_fsize;
+#endif
+	ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
+	if (!S_ISREG(inode->i_mode)) {
+		ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
+	} else {
+		inode->i_size |=
+			((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32;
+	}
+	ei->i_disksize = inode->i_size;
+	inode->i_generation = le32_to_cpu(raw_inode->i_generation);
+	ei->i_block_group = iloc.block_group;
+	/*
+	 * NOTE! The in-memory inode i_data array is in little-endian order
+	 * even on big-endian machines: we do NOT byteswap the block numbers!
+	 */
+	for (block = 0; block < EXT3_N_BLOCKS; block++)
+		ei->i_data[block] = raw_inode->i_block[block];
+	INIT_LIST_HEAD(&ei->i_orphan);
+
+	if (inode->i_ino >= EXT3_FIRST_INO(inode->i_sb) + 1 &&
+	    EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) {
+		/*
+		 * When mke2fs creates big inodes it does not zero out
+		 * the unused bytes above EXT3_GOOD_OLD_INODE_SIZE,
+		 * so ignore those first few inodes.
+		 */
+		ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
+		if (EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
+		    EXT3_INODE_SIZE(inode->i_sb))
+			goto bad_inode;
+		if (ei->i_extra_isize == 0) {
+			/* The extra space is currently unused. Use it. */
+			ei->i_extra_isize = sizeof(struct ext3_inode) -
+					    EXT3_GOOD_OLD_INODE_SIZE;
+		} else {
+			__le32 *magic = (void *)raw_inode +
+					EXT3_GOOD_OLD_INODE_SIZE +
+					ei->i_extra_isize;
+			if (*magic == cpu_to_le32(EXT3_XATTR_MAGIC))
+				 ei->i_state |= EXT3_STATE_XATTR;
+		}
+	} else
+		ei->i_extra_isize = 0;
+
+	if (S_ISREG(inode->i_mode)) {
+		inode->i_op = &ext3_file_inode_operations;
+		inode->i_fop = &ext3_file_operations;
+		ext3_set_aops(inode);
+	} else if (S_ISDIR(inode->i_mode)) {
+		inode->i_op = &ext3_dir_inode_operations;
+		inode->i_fop = &ext3_dir_operations;
+	} else if (S_ISLNK(inode->i_mode)) {
+		if (ext3_inode_is_fast_symlink(inode))
+			inode->i_op = &ext3_fast_symlink_inode_operations;
+		else {
+			inode->i_op = &ext3_symlink_inode_operations;
+			ext3_set_aops(inode);
+		}
+	} else {
+		inode->i_op = &ext3_special_inode_operations;
+		if (raw_inode->i_block[0])
+			init_special_inode(inode, inode->i_mode,
+			   old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
+		else
+			init_special_inode(inode, inode->i_mode,
+			   new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
+	}
+	brelse (iloc.bh);
+	ext3_set_inode_flags(inode);
+	return;
+
+bad_inode:
+	make_bad_inode(inode);
+	return;
+}
+
+/*
+ * Post the struct inode info into an on-disk inode location in the
+ * buffer-cache.  This gobbles the caller's reference to the
+ * buffer_head in the inode location struct.
+ *
+ * The caller must have write access to iloc->bh.
+ */
+static int ext3_do_update_inode(handle_t *handle,
+				struct inode *inode,
+				struct ext3_iloc *iloc)
+{
+	struct ext3_inode *raw_inode = ext3_raw_inode(iloc);
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct buffer_head *bh = iloc->bh;
+	int err = 0, rc, block;
+
+	/* For fields not not tracking in the in-memory inode,
+	 * initialise them to zero for new inodes. */
+	if (ei->i_state & EXT3_STATE_NEW)
+		memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
+
+	raw_inode->i_mode = cpu_to_le16(inode->i_mode);
+	if(!(test_opt(inode->i_sb, NO_UID32))) {
+		raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
+		raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
+/*
+ * Fix up interoperability with old kernels. Otherwise, old inodes get
+ * re-used with the upper 16 bits of the uid/gid intact
+ */
+		if(!ei->i_dtime) {
+			raw_inode->i_uid_high =
+				cpu_to_le16(high_16_bits(inode->i_uid));
+			raw_inode->i_gid_high =
+				cpu_to_le16(high_16_bits(inode->i_gid));
+		} else {
+			raw_inode->i_uid_high = 0;
+			raw_inode->i_gid_high = 0;
+		}
+	} else {
+		raw_inode->i_uid_low =
+			cpu_to_le16(fs_high2lowuid(inode->i_uid));
+		raw_inode->i_gid_low =
+			cpu_to_le16(fs_high2lowgid(inode->i_gid));
+		raw_inode->i_uid_high = 0;
+		raw_inode->i_gid_high = 0;
+	}
+	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
+	raw_inode->i_size = cpu_to_le32(ei->i_disksize);
+	raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
+	raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
+	raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
+	raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
+	raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
+	raw_inode->i_flags = cpu_to_le32(ei->i_flags);
+#ifdef EXT3_FRAGMENTS
+	raw_inode->i_faddr = cpu_to_le32(ei->i_faddr);
+	raw_inode->i_frag = ei->i_frag_no;
+	raw_inode->i_fsize = ei->i_frag_size;
+#endif
+	raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl);
+	if (!S_ISREG(inode->i_mode)) {
+		raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
+	} else {
+		raw_inode->i_size_high =
+			cpu_to_le32(ei->i_disksize >> 32);
+		if (ei->i_disksize > 0x7fffffffULL) {
+			struct super_block *sb = inode->i_sb;
+			if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
+					EXT3_FEATURE_RO_COMPAT_LARGE_FILE) ||
+			    EXT3_SB(sb)->s_es->s_rev_level ==
+					cpu_to_le32(EXT3_GOOD_OLD_REV)) {
+			       /* If this is the first large file
+				* created, add a flag to the superblock.
+				*/
+				err = ext3_journal_get_write_access(handle,
+						EXT3_SB(sb)->s_sbh);
+				if (err)
+					goto out_brelse;
+				ext3_update_dynamic_rev(sb);
+				EXT3_SET_RO_COMPAT_FEATURE(sb,
+					EXT3_FEATURE_RO_COMPAT_LARGE_FILE);
+				sb->s_dirt = 1;
+				handle->h_sync = 1;
+				err = ext3_journal_dirty_metadata(handle,
+						EXT3_SB(sb)->s_sbh);
+			}
+		}
+	}
+	raw_inode->i_generation = cpu_to_le32(inode->i_generation);
+	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
+		if (old_valid_dev(inode->i_rdev)) {
+			raw_inode->i_block[0] =
+				cpu_to_le32(old_encode_dev(inode->i_rdev));
+			raw_inode->i_block[1] = 0;
+		} else {
+			raw_inode->i_block[0] = 0;
+			raw_inode->i_block[1] =
+				cpu_to_le32(new_encode_dev(inode->i_rdev));
+			raw_inode->i_block[2] = 0;
+		}
+	} else for (block = 0; block < EXT3_N_BLOCKS; block++)
+		raw_inode->i_block[block] = ei->i_data[block];
+
+	if (ei->i_extra_isize)
+		raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
+
+	BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+	rc = ext3_journal_dirty_metadata(handle, bh);
+	if (!err)
+		err = rc;
+	ei->i_state &= ~EXT3_STATE_NEW;
+
+out_brelse:
+	brelse (bh);
+	ext3_std_error(inode->i_sb, err);
+	return err;
+}
+
+/*
+ * ext3_write_inode()
+ *
+ * We are called from a few places:
+ *
+ * - Within generic_file_write() for O_SYNC files.
+ *   Here, there will be no transaction running. We wait for any running
+ *   trasnaction to commit.
+ *
+ * - Within sys_sync(), kupdate and such.
+ *   We wait on commit, if tol to.
+ *
+ * - Within prune_icache() (PF_MEMALLOC == true)
+ *   Here we simply return.  We can't afford to block kswapd on the
+ *   journal commit.
+ *
+ * In all cases it is actually safe for us to return without doing anything,
+ * because the inode has been copied into a raw inode buffer in
+ * ext3_mark_inode_dirty().  This is a correctness thing for O_SYNC and for
+ * knfsd.
+ *
+ * Note that we are absolutely dependent upon all inode dirtiers doing the
+ * right thing: they *must* call mark_inode_dirty() after dirtying info in
+ * which we are interested.
+ *
+ * It would be a bug for them to not do this.  The code:
+ *
+ *	mark_inode_dirty(inode)
+ *	stuff();
+ *	inode->i_size = expr;
+ *
+ * is in error because a kswapd-driven write_inode() could occur while
+ * `stuff()' is running, and the new i_size will be lost.  Plus the inode
+ * will no longer be on the superblock's dirty inode list.
+ */
+int ext3_write_inode(struct inode *inode, int wait)
+{
+	if (current->flags & PF_MEMALLOC)
+		return 0;
+
+	if (ext3_journal_current_handle()) {
+		jbd_debug(0, "called recursively, non-PF_MEMALLOC!\n");
+		dump_stack();
+		return -EIO;
+	}
+
+	if (!wait)
+		return 0;
+
+	return ext3_force_commit(inode->i_sb);
+}
+
+/*
+ * ext3_setattr()
+ *
+ * Called from notify_change.
+ *
+ * We want to trap VFS attempts to truncate the file as soon as
+ * possible.  In particular, we want to make sure that when the VFS
+ * shrinks i_size, we put the inode on the orphan list and modify
+ * i_disksize immediately, so that during the subsequent flushing of
+ * dirty pages and freeing of disk blocks, we can guarantee that any
+ * commit will leave the blocks being flushed in an unused state on
+ * disk.  (On recovery, the inode will get truncated and the blocks will
+ * be freed, so we have a strong guarantee that no future commit will
+ * leave these blocks visible to the user.)
+ *
+ * Called with inode->sem down.
+ */
+int ext3_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+	int error, rc = 0;
+	const unsigned int ia_valid = attr->ia_valid;
+
+	error = inode_change_ok(inode, attr);
+	if (error)
+		return error;
+
+	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
+		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
+		handle_t *handle;
+
+		/* (user+group)*(old+new) structure, inode write (sb,
+		 * inode block, ? - but truncate inode update has it) */
+		handle = ext3_journal_start(inode, 2*(EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)+
+					EXT3_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
+		if (IS_ERR(handle)) {
+			error = PTR_ERR(handle);
+			goto err_out;
+		}
+		error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
+		if (error) {
+			ext3_journal_stop(handle);
+			return error;
+		}
+		/* Update corresponding info in inode so that everything is in
+		 * one transaction */
+		if (attr->ia_valid & ATTR_UID)
+			inode->i_uid = attr->ia_uid;
+		if (attr->ia_valid & ATTR_GID)
+			inode->i_gid = attr->ia_gid;
+		error = ext3_mark_inode_dirty(handle, inode);
+		ext3_journal_stop(handle);
+	}
+
+	if (S_ISREG(inode->i_mode) &&
+	    attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
+		handle_t *handle;
+
+		handle = ext3_journal_start(inode, 3);
+		if (IS_ERR(handle)) {
+			error = PTR_ERR(handle);
+			goto err_out;
+		}
+
+		error = ext3_orphan_add(handle, inode);
+		EXT3_I(inode)->i_disksize = attr->ia_size;
+		rc = ext3_mark_inode_dirty(handle, inode);
+		if (!error)
+			error = rc;
+		ext3_journal_stop(handle);
+	}
+
+	rc = inode_setattr(inode, attr);
+
+	/* If inode_setattr's call to ext3_truncate failed to get a
+	 * transaction handle at all, we need to clean up the in-core
+	 * orphan list manually. */
+	if (inode->i_nlink)
+		ext3_orphan_del(NULL, inode);
+
+	if (!rc && (ia_valid & ATTR_MODE))
+		rc = ext3_acl_chmod(inode);
+
+err_out:
+	ext3_std_error(inode->i_sb, error);
+	if (!error)
+		error = rc;
+	return error;
+}
+
+
+/*
+ * How many blocks doth make a writepage()?
+ *
+ * With N blocks per page, it may be:
+ * N data blocks
+ * 2 indirect block
+ * 2 dindirect
+ * 1 tindirect
+ * N+5 bitmap blocks (from the above)
+ * N+5 group descriptor summary blocks
+ * 1 inode block
+ * 1 superblock.
+ * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quote files
+ *
+ * 3 * (N + 5) + 2 + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
+ *
+ * With ordered or writeback data it's the same, less the N data blocks.
+ *
+ * If the inode's direct blocks can hold an integral number of pages then a
+ * page cannot straddle two indirect blocks, and we can only touch one indirect
+ * and dindirect block, and the "5" above becomes "3".
+ *
+ * This still overestimates under most circumstances.  If we were to pass the
+ * start and end offsets in here as well we could do block_to_path() on each
+ * block and work out the exact number of indirects which are touched.  Pah.
+ */
+
+static int ext3_writepage_trans_blocks(struct inode *inode)
+{
+	int bpp = ext3_journal_blocks_per_page(inode);
+	int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3;
+	int ret;
+
+	if (ext3_should_journal_data(inode))
+		ret = 3 * (bpp + indirects) + 2;
+	else
+		ret = 2 * (bpp + indirects) + 2;
+
+#ifdef CONFIG_QUOTA
+	/* We know that structure was already allocated during DQUOT_INIT so
+	 * we will be updating only the data blocks + inodes */
+	ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb);
+#endif
+
+	return ret;
+}
+
+/*
+ * The caller must have previously called ext3_reserve_inode_write().
+ * Give this, we know that the caller already has write access to iloc->bh.
+ */
+int ext3_mark_iloc_dirty(handle_t *handle,
+		struct inode *inode, struct ext3_iloc *iloc)
+{
+	int err = 0;
+
+	/* the do_update_inode consumes one bh->b_count */
+	get_bh(iloc->bh);
+
+	/* ext3_do_update_inode() does journal_dirty_metadata */
+	err = ext3_do_update_inode(handle, inode, iloc);
+	put_bh(iloc->bh);
+	return err;
+}
+
+/*
+ * On success, We end up with an outstanding reference count against
+ * iloc->bh.  This _must_ be cleaned up later.
+ */
+
+int
+ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
+			 struct ext3_iloc *iloc)
+{
+	int err = 0;
+	if (handle) {
+		err = ext3_get_inode_loc(inode, iloc);
+		if (!err) {
+			BUFFER_TRACE(iloc->bh, "get_write_access");
+			err = ext3_journal_get_write_access(handle, iloc->bh);
+			if (err) {
+				brelse(iloc->bh);
+				iloc->bh = NULL;
+			}
+		}
+	}
+	ext3_std_error(inode->i_sb, err);
+	return err;
+}
+
+/*
+ * What we do here is to mark the in-core inode as clean with respect to inode
+ * dirtiness (it may still be data-dirty).
+ * This means that the in-core inode may be reaped by prune_icache
+ * without having to perform any I/O.  This is a very good thing,
+ * because *any* task may call prune_icache - even ones which
+ * have a transaction open against a different journal.
+ *
+ * Is this cheating?  Not really.  Sure, we haven't written the
+ * inode out, but prune_icache isn't a user-visible syncing function.
+ * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync)
+ * we start and wait on commits.
+ *
+ * Is this efficient/effective?  Well, we're being nice to the system
+ * by cleaning up our inodes proactively so they can be reaped
+ * without I/O.  But we are potentially leaving up to five seconds'
+ * worth of inodes floating about which prune_icache wants us to
+ * write out.  One way to fix that would be to get prune_icache()
+ * to do a write_super() to free up some memory.  It has the desired
+ * effect.
+ */
+int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
+{
+	struct ext3_iloc iloc;
+	int err;
+
+	might_sleep();
+	err = ext3_reserve_inode_write(handle, inode, &iloc);
+	if (!err)
+		err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+	return err;
+}
+
+/*
+ * ext3_dirty_inode() is called from __mark_inode_dirty()
+ *
+ * We're really interested in the case where a file is being extended.
+ * i_size has been changed by generic_commit_write() and we thus need
+ * to include the updated inode in the current transaction.
+ *
+ * Also, DQUOT_ALLOC_SPACE() will always dirty the inode when blocks
+ * are allocated to the file.
+ *
+ * If the inode is marked synchronous, we don't honour that here - doing
+ * so would cause a commit on atime updates, which we don't bother doing.
+ * We handle synchronous inodes at the highest possible level.
+ */
+void ext3_dirty_inode(struct inode *inode)
+{
+	handle_t *current_handle = ext3_journal_current_handle();
+	handle_t *handle;
+
+	handle = ext3_journal_start(inode, 2);
+	if (IS_ERR(handle))
+		goto out;
+	if (current_handle &&
+		current_handle->h_transaction != handle->h_transaction) {
+		/* This task has a transaction open against a different fs */
+		printk(KERN_EMERG "%s: transactions do not match!\n",
+		       __FUNCTION__);
+	} else {
+		jbd_debug(5, "marking dirty.  outer handle=%p\n",
+				current_handle);
+		ext3_mark_inode_dirty(handle, inode);
+	}
+	ext3_journal_stop(handle);
+out:
+	return;
+}
+
+#if 0
+/*
+ * Bind an inode's backing buffer_head into this transaction, to prevent
+ * it from being flushed to disk early.  Unlike
+ * ext3_reserve_inode_write, this leaves behind no bh reference and
+ * returns no iloc structure, so the caller needs to repeat the iloc
+ * lookup to mark the inode dirty later.
+ */
+static int ext3_pin_inode(handle_t *handle, struct inode *inode)
+{
+	struct ext3_iloc iloc;
+
+	int err = 0;
+	if (handle) {
+		err = ext3_get_inode_loc(inode, &iloc);
+		if (!err) {
+			BUFFER_TRACE(iloc.bh, "get_write_access");
+			err = journal_get_write_access(handle, iloc.bh);
+			if (!err)
+				err = ext3_journal_dirty_metadata(handle,
+								  iloc.bh);
+			brelse(iloc.bh);
+		}
+	}
+	ext3_std_error(inode->i_sb, err);
+	return err;
+}
+#endif
+
+int ext3_change_inode_journal_flag(struct inode *inode, int val)
+{
+	journal_t *journal;
+	handle_t *handle;
+	int err;
+
+	/*
+	 * We have to be very careful here: changing a data block's
+	 * journaling status dynamically is dangerous.  If we write a
+	 * data block to the journal, change the status and then delete
+	 * that block, we risk forgetting to revoke the old log record
+	 * from the journal and so a subsequent replay can corrupt data.
+	 * So, first we make sure that the journal is empty and that
+	 * nobody is changing anything.
+	 */
+
+	journal = EXT3_JOURNAL(inode);
+	if (is_journal_aborted(journal) || IS_RDONLY(inode))
+		return -EROFS;
+
+	journal_lock_updates(journal);
+	journal_flush(journal);
+
+	/*
+	 * OK, there are no updates running now, and all cached data is
+	 * synced to disk.  We are now in a completely consistent state
+	 * which doesn't have anything in the journal, and we know that
+	 * no filesystem updates are running, so it is safe to modify
+	 * the inode's in-core data-journaling state flag now.
+	 */
+
+	if (val)
+		EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL;
+	else
+		EXT3_I(inode)->i_flags &= ~EXT3_JOURNAL_DATA_FL;
+	ext3_set_aops(inode);
+
+	journal_unlock_updates(journal);
+
+	/* Finally we can mark the inode as dirty. */
+
+	handle = ext3_journal_start(inode, 1);
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	err = ext3_mark_inode_dirty(handle, inode);
+	handle->h_sync = 1;
+	ext3_journal_stop(handle);
+	ext3_std_error(inode->i_sb, err);
+
+	return err;
+}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
new file mode 100644
index 000000000000..12daa6869572
--- /dev/null
+++ b/fs/ext4/ioctl.c
@@ -0,0 +1,307 @@
+/*
+ * linux/fs/ext3/ioctl.c
+ *
+ * Copyright (C) 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ */
+
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/capability.h>
+#include <linux/ext3_fs.h>
+#include <linux/ext3_jbd.h>
+#include <linux/time.h>
+#include <linux/compat.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+
+int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
+		unsigned long arg)
+{
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	unsigned int flags;
+	unsigned short rsv_window_size;
+
+	ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg);
+
+	switch (cmd) {
+	case EXT3_IOC_GETFLAGS:
+		flags = ei->i_flags & EXT3_FL_USER_VISIBLE;
+		return put_user(flags, (int __user *) arg);
+	case EXT3_IOC_SETFLAGS: {
+		handle_t *handle = NULL;
+		int err;
+		struct ext3_iloc iloc;
+		unsigned int oldflags;
+		unsigned int jflag;
+
+		if (IS_RDONLY(inode))
+			return -EROFS;
+
+		if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
+			return -EACCES;
+
+		if (get_user(flags, (int __user *) arg))
+			return -EFAULT;
+
+		if (!S_ISDIR(inode->i_mode))
+			flags &= ~EXT3_DIRSYNC_FL;
+
+		mutex_lock(&inode->i_mutex);
+		oldflags = ei->i_flags;
+
+		/* The JOURNAL_DATA flag is modifiable only by root */
+		jflag = flags & EXT3_JOURNAL_DATA_FL;
+
+		/*
+		 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+		 * the relevant capability.
+		 *
+		 * This test looks nicer. Thanks to Pauline Middelink
+		 */
+		if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) {
+			if (!capable(CAP_LINUX_IMMUTABLE)) {
+				mutex_unlock(&inode->i_mutex);
+				return -EPERM;
+			}
+		}
+
+		/*
+		 * The JOURNAL_DATA flag can only be changed by
+		 * the relevant capability.
+		 */
+		if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) {
+			if (!capable(CAP_SYS_RESOURCE)) {
+				mutex_unlock(&inode->i_mutex);
+				return -EPERM;
+			}
+		}
+
+
+		handle = ext3_journal_start(inode, 1);
+		if (IS_ERR(handle)) {
+			mutex_unlock(&inode->i_mutex);
+			return PTR_ERR(handle);
+		}
+		if (IS_SYNC(inode))
+			handle->h_sync = 1;
+		err = ext3_reserve_inode_write(handle, inode, &iloc);
+		if (err)
+			goto flags_err;
+
+		flags = flags & EXT3_FL_USER_MODIFIABLE;
+		flags |= oldflags & ~EXT3_FL_USER_MODIFIABLE;
+		ei->i_flags = flags;
+
+		ext3_set_inode_flags(inode);
+		inode->i_ctime = CURRENT_TIME_SEC;
+
+		err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+flags_err:
+		ext3_journal_stop(handle);
+		if (err) {
+			mutex_unlock(&inode->i_mutex);
+			return err;
+		}
+
+		if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL))
+			err = ext3_change_inode_journal_flag(inode, jflag);
+		mutex_unlock(&inode->i_mutex);
+		return err;
+	}
+	case EXT3_IOC_GETVERSION:
+	case EXT3_IOC_GETVERSION_OLD:
+		return put_user(inode->i_generation, (int __user *) arg);
+	case EXT3_IOC_SETVERSION:
+	case EXT3_IOC_SETVERSION_OLD: {
+		handle_t *handle;
+		struct ext3_iloc iloc;
+		__u32 generation;
+		int err;
+
+		if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
+			return -EPERM;
+		if (IS_RDONLY(inode))
+			return -EROFS;
+		if (get_user(generation, (int __user *) arg))
+			return -EFAULT;
+
+		handle = ext3_journal_start(inode, 1);
+		if (IS_ERR(handle))
+			return PTR_ERR(handle);
+		err = ext3_reserve_inode_write(handle, inode, &iloc);
+		if (err == 0) {
+			inode->i_ctime = CURRENT_TIME_SEC;
+			inode->i_generation = generation;
+			err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+		}
+		ext3_journal_stop(handle);
+		return err;
+	}
+#ifdef CONFIG_JBD_DEBUG
+	case EXT3_IOC_WAIT_FOR_READONLY:
+		/*
+		 * This is racy - by the time we're woken up and running,
+		 * the superblock could be released.  And the module could
+		 * have been unloaded.  So sue me.
+		 *
+		 * Returns 1 if it slept, else zero.
+		 */
+		{
+			struct super_block *sb = inode->i_sb;
+			DECLARE_WAITQUEUE(wait, current);
+			int ret = 0;
+
+			set_current_state(TASK_INTERRUPTIBLE);
+			add_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
+			if (timer_pending(&EXT3_SB(sb)->turn_ro_timer)) {
+				schedule();
+				ret = 1;
+			}
+			remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
+			return ret;
+		}
+#endif
+	case EXT3_IOC_GETRSVSZ:
+		if (test_opt(inode->i_sb, RESERVATION)
+			&& S_ISREG(inode->i_mode)
+			&& ei->i_block_alloc_info) {
+			rsv_window_size = ei->i_block_alloc_info->rsv_window_node.rsv_goal_size;
+			return put_user(rsv_window_size, (int __user *)arg);
+		}
+		return -ENOTTY;
+	case EXT3_IOC_SETRSVSZ: {
+
+		if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
+			return -ENOTTY;
+
+		if (IS_RDONLY(inode))
+			return -EROFS;
+
+		if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
+			return -EACCES;
+
+		if (get_user(rsv_window_size, (int __user *)arg))
+			return -EFAULT;
+
+		if (rsv_window_size > EXT3_MAX_RESERVE_BLOCKS)
+			rsv_window_size = EXT3_MAX_RESERVE_BLOCKS;
+
+		/*
+		 * need to allocate reservation structure for this inode
+		 * before set the window size
+		 */
+		mutex_lock(&ei->truncate_mutex);
+		if (!ei->i_block_alloc_info)
+			ext3_init_block_alloc_info(inode);
+
+		if (ei->i_block_alloc_info){
+			struct ext3_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node;
+			rsv->rsv_goal_size = rsv_window_size;
+		}
+		mutex_unlock(&ei->truncate_mutex);
+		return 0;
+	}
+	case EXT3_IOC_GROUP_EXTEND: {
+		ext3_fsblk_t n_blocks_count;
+		struct super_block *sb = inode->i_sb;
+		int err;
+
+		if (!capable(CAP_SYS_RESOURCE))
+			return -EPERM;
+
+		if (IS_RDONLY(inode))
+			return -EROFS;
+
+		if (get_user(n_blocks_count, (__u32 __user *)arg))
+			return -EFAULT;
+
+		err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count);
+		journal_lock_updates(EXT3_SB(sb)->s_journal);
+		journal_flush(EXT3_SB(sb)->s_journal);
+		journal_unlock_updates(EXT3_SB(sb)->s_journal);
+
+		return err;
+	}
+	case EXT3_IOC_GROUP_ADD: {
+		struct ext3_new_group_data input;
+		struct super_block *sb = inode->i_sb;
+		int err;
+
+		if (!capable(CAP_SYS_RESOURCE))
+			return -EPERM;
+
+		if (IS_RDONLY(inode))
+			return -EROFS;
+
+		if (copy_from_user(&input, (struct ext3_new_group_input __user *)arg,
+				sizeof(input)))
+			return -EFAULT;
+
+		err = ext3_group_add(sb, &input);
+		journal_lock_updates(EXT3_SB(sb)->s_journal);
+		journal_flush(EXT3_SB(sb)->s_journal);
+		journal_unlock_updates(EXT3_SB(sb)->s_journal);
+
+		return err;
+	}
+
+
+	default:
+		return -ENOTTY;
+	}
+}
+
+#ifdef CONFIG_COMPAT
+long ext3_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	int ret;
+
+	/* These are just misnamed, they actually get/put from/to user an int */
+	switch (cmd) {
+	case EXT3_IOC32_GETFLAGS:
+		cmd = EXT3_IOC_GETFLAGS;
+		break;
+	case EXT3_IOC32_SETFLAGS:
+		cmd = EXT3_IOC_SETFLAGS;
+		break;
+	case EXT3_IOC32_GETVERSION:
+		cmd = EXT3_IOC_GETVERSION;
+		break;
+	case EXT3_IOC32_SETVERSION:
+		cmd = EXT3_IOC_SETVERSION;
+		break;
+	case EXT3_IOC32_GROUP_EXTEND:
+		cmd = EXT3_IOC_GROUP_EXTEND;
+		break;
+	case EXT3_IOC32_GETVERSION_OLD:
+		cmd = EXT3_IOC_GETVERSION_OLD;
+		break;
+	case EXT3_IOC32_SETVERSION_OLD:
+		cmd = EXT3_IOC_SETVERSION_OLD;
+		break;
+#ifdef CONFIG_JBD_DEBUG
+	case EXT3_IOC32_WAIT_FOR_READONLY:
+		cmd = EXT3_IOC_WAIT_FOR_READONLY;
+		break;
+#endif
+	case EXT3_IOC32_GETRSVSZ:
+		cmd = EXT3_IOC_GETRSVSZ;
+		break;
+	case EXT3_IOC32_SETRSVSZ:
+		cmd = EXT3_IOC_SETRSVSZ;
+		break;
+	case EXT3_IOC_GROUP_ADD:
+		break;
+	default:
+		return -ENOIOCTLCMD;
+	}
+	lock_kernel();
+	ret = ext3_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
+	unlock_kernel();
+	return ret;
+}
+#endif
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
new file mode 100644
index 000000000000..906731a20f1a
--- /dev/null
+++ b/fs/ext4/namei.c
@@ -0,0 +1,2397 @@
+/*
+ *  linux/fs/ext3/namei.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  from
+ *
+ *  linux/fs/minix/namei.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Big-endian to little-endian byte-swapping/bitmaps by
+ *        David S. Miller (davem@caip.rutgers.edu), 1995
+ *  Directory entry file type support and forward compatibility hooks
+ *	for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
+ *  Hash Tree Directory indexing (c)
+ *	Daniel Phillips, 2001
+ *  Hash Tree Directory indexing porting
+ *	Christopher Li, 2002
+ *  Hash Tree Directory indexing cleanup
+ *	Theodore Ts'o, 2002
+ */
+
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/jbd.h>
+#include <linux/time.h>
+#include <linux/ext3_fs.h>
+#include <linux/ext3_jbd.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/quotaops.h>
+#include <linux/buffer_head.h>
+#include <linux/bio.h>
+#include <linux/smp_lock.h>
+
+#include "namei.h"
+#include "xattr.h"
+#include "acl.h"
+
+/*
+ * define how far ahead to read directories while searching them.
+ */
+#define NAMEI_RA_CHUNKS  2
+#define NAMEI_RA_BLOCKS  4
+#define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
+#define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
+
+static struct buffer_head *ext3_append(handle_t *handle,
+					struct inode *inode,
+					u32 *block, int *err)
+{
+	struct buffer_head *bh;
+
+	*block = inode->i_size >> inode->i_sb->s_blocksize_bits;
+
+	if ((bh = ext3_bread(handle, inode, *block, 1, err))) {
+		inode->i_size += inode->i_sb->s_blocksize;
+		EXT3_I(inode)->i_disksize = inode->i_size;
+		ext3_journal_get_write_access(handle,bh);
+	}
+	return bh;
+}
+
+#ifndef assert
+#define assert(test) J_ASSERT(test)
+#endif
+
+#ifndef swap
+#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
+#endif
+
+#ifdef DX_DEBUG
+#define dxtrace(command) command
+#else
+#define dxtrace(command)
+#endif
+
+struct fake_dirent
+{
+	__le32 inode;
+	__le16 rec_len;
+	u8 name_len;
+	u8 file_type;
+};
+
+struct dx_countlimit
+{
+	__le16 limit;
+	__le16 count;
+};
+
+struct dx_entry
+{
+	__le32 hash;
+	__le32 block;
+};
+
+/*
+ * dx_root_info is laid out so that if it should somehow get overlaid by a
+ * dirent the two low bits of the hash version will be zero.  Therefore, the
+ * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
+ */
+
+struct dx_root
+{
+	struct fake_dirent dot;
+	char dot_name[4];
+	struct fake_dirent dotdot;
+	char dotdot_name[4];
+	struct dx_root_info
+	{
+		__le32 reserved_zero;
+		u8 hash_version;
+		u8 info_length; /* 8 */
+		u8 indirect_levels;
+		u8 unused_flags;
+	}
+	info;
+	struct dx_entry	entries[0];
+};
+
+struct dx_node
+{
+	struct fake_dirent fake;
+	struct dx_entry	entries[0];
+};
+
+
+struct dx_frame
+{
+	struct buffer_head *bh;
+	struct dx_entry *entries;
+	struct dx_entry *at;
+};
+
+struct dx_map_entry
+{
+	u32 hash;
+	u32 offs;
+};
+
+#ifdef CONFIG_EXT3_INDEX
+static inline unsigned dx_get_block (struct dx_entry *entry);
+static void dx_set_block (struct dx_entry *entry, unsigned value);
+static inline unsigned dx_get_hash (struct dx_entry *entry);
+static void dx_set_hash (struct dx_entry *entry, unsigned value);
+static unsigned dx_get_count (struct dx_entry *entries);
+static unsigned dx_get_limit (struct dx_entry *entries);
+static void dx_set_count (struct dx_entry *entries, unsigned value);
+static void dx_set_limit (struct dx_entry *entries, unsigned value);
+static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
+static unsigned dx_node_limit (struct inode *dir);
+static struct dx_frame *dx_probe(struct dentry *dentry,
+				 struct inode *dir,
+				 struct dx_hash_info *hinfo,
+				 struct dx_frame *frame,
+				 int *err);
+static void dx_release (struct dx_frame *frames);
+static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
+			struct dx_hash_info *hinfo, struct dx_map_entry map[]);
+static void dx_sort_map(struct dx_map_entry *map, unsigned count);
+static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
+		struct dx_map_entry *offsets, int count);
+static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
+static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
+static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+				 struct dx_frame *frame,
+				 struct dx_frame *frames,
+				 __u32 *start_hash);
+static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
+		       struct ext3_dir_entry_2 **res_dir, int *err);
+static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+			     struct inode *inode);
+
+/*
+ * Future: use high four bits of block for coalesce-on-delete flags
+ * Mask them off for now.
+ */
+
+static inline unsigned dx_get_block (struct dx_entry *entry)
+{
+	return le32_to_cpu(entry->block) & 0x00ffffff;
+}
+
+static inline void dx_set_block (struct dx_entry *entry, unsigned value)
+{
+	entry->block = cpu_to_le32(value);
+}
+
+static inline unsigned dx_get_hash (struct dx_entry *entry)
+{
+	return le32_to_cpu(entry->hash);
+}
+
+static inline void dx_set_hash (struct dx_entry *entry, unsigned value)
+{
+	entry->hash = cpu_to_le32(value);
+}
+
+static inline unsigned dx_get_count (struct dx_entry *entries)
+{
+	return le16_to_cpu(((struct dx_countlimit *) entries)->count);
+}
+
+static inline unsigned dx_get_limit (struct dx_entry *entries)
+{
+	return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
+}
+
+static inline void dx_set_count (struct dx_entry *entries, unsigned value)
+{
+	((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
+}
+
+static inline void dx_set_limit (struct dx_entry *entries, unsigned value)
+{
+	((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
+}
+
+static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
+{
+	unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
+		EXT3_DIR_REC_LEN(2) - infosize;
+	return 0? 20: entry_space / sizeof(struct dx_entry);
+}
+
+static inline unsigned dx_node_limit (struct inode *dir)
+{
+	unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
+	return 0? 22: entry_space / sizeof(struct dx_entry);
+}
+
+/*
+ * Debug
+ */
+#ifdef DX_DEBUG
+static void dx_show_index (char * label, struct dx_entry *entries)
+{
+        int i, n = dx_get_count (entries);
+        printk("%s index ", label);
+        for (i = 0; i < n; i++)
+        {
+                printk("%x->%u ", i? dx_get_hash(entries + i): 0, dx_get_block(entries + i));
+        }
+        printk("\n");
+}
+
+struct stats
+{
+	unsigned names;
+	unsigned space;
+	unsigned bcount;
+};
+
+static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de,
+				 int size, int show_names)
+{
+	unsigned names = 0, space = 0;
+	char *base = (char *) de;
+	struct dx_hash_info h = *hinfo;
+
+	printk("names: ");
+	while ((char *) de < base + size)
+	{
+		if (de->inode)
+		{
+			if (show_names)
+			{
+				int len = de->name_len;
+				char *name = de->name;
+				while (len--) printk("%c", *name++);
+				ext3fs_dirhash(de->name, de->name_len, &h);
+				printk(":%x.%u ", h.hash,
+				       ((char *) de - base));
+			}
+			space += EXT3_DIR_REC_LEN(de->name_len);
+			names++;
+		}
+		de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
+	}
+	printk("(%i)\n", names);
+	return (struct stats) { names, space, 1 };
+}
+
+struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
+			     struct dx_entry *entries, int levels)
+{
+	unsigned blocksize = dir->i_sb->s_blocksize;
+	unsigned count = dx_get_count (entries), names = 0, space = 0, i;
+	unsigned bcount = 0;
+	struct buffer_head *bh;
+	int err;
+	printk("%i indexed blocks...\n", count);
+	for (i = 0; i < count; i++, entries++)
+	{
+		u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0;
+		u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
+		struct stats stats;
+		printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
+		if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue;
+		stats = levels?
+		   dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
+		   dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0);
+		names += stats.names;
+		space += stats.space;
+		bcount += stats.bcount;
+		brelse (bh);
+	}
+	if (bcount)
+		printk("%snames %u, fullness %u (%u%%)\n", levels?"":"   ",
+			names, space/bcount,(space/bcount)*100/blocksize);
+	return (struct stats) { names, space, bcount};
+}
+#endif /* DX_DEBUG */
+
+/*
+ * Probe for a directory leaf block to search.
+ *
+ * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
+ * error in the directory index, and the caller should fall back to
+ * searching the directory normally.  The callers of dx_probe **MUST**
+ * check for this error code, and make sure it never gets reflected
+ * back to userspace.
+ */
+static struct dx_frame *
+dx_probe(struct dentry *dentry, struct inode *dir,
+	 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
+{
+	unsigned count, indirect;
+	struct dx_entry *at, *entries, *p, *q, *m;
+	struct dx_root *root;
+	struct buffer_head *bh;
+	struct dx_frame *frame = frame_in;
+	u32 hash;
+
+	frame->bh = NULL;
+	if (dentry)
+		dir = dentry->d_parent->d_inode;
+	if (!(bh = ext3_bread (NULL,dir, 0, 0, err)))
+		goto fail;
+	root = (struct dx_root *) bh->b_data;
+	if (root->info.hash_version != DX_HASH_TEA &&
+	    root->info.hash_version != DX_HASH_HALF_MD4 &&
+	    root->info.hash_version != DX_HASH_LEGACY) {
+		ext3_warning(dir->i_sb, __FUNCTION__,
+			     "Unrecognised inode hash code %d",
+			     root->info.hash_version);
+		brelse(bh);
+		*err = ERR_BAD_DX_DIR;
+		goto fail;
+	}
+	hinfo->hash_version = root->info.hash_version;
+	hinfo->seed = EXT3_SB(dir->i_sb)->s_hash_seed;
+	if (dentry)
+		ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo);
+	hash = hinfo->hash;
+
+	if (root->info.unused_flags & 1) {
+		ext3_warning(dir->i_sb, __FUNCTION__,
+			     "Unimplemented inode hash flags: %#06x",
+			     root->info.unused_flags);
+		brelse(bh);
+		*err = ERR_BAD_DX_DIR;
+		goto fail;
+	}
+
+	if ((indirect = root->info.indirect_levels) > 1) {
+		ext3_warning(dir->i_sb, __FUNCTION__,
+			     "Unimplemented inode hash depth: %#06x",
+			     root->info.indirect_levels);
+		brelse(bh);
+		*err = ERR_BAD_DX_DIR;
+		goto fail;
+	}
+
+	entries = (struct dx_entry *) (((char *)&root->info) +
+				       root->info.info_length);
+	assert(dx_get_limit(entries) == dx_root_limit(dir,
+						      root->info.info_length));
+	dxtrace (printk("Look up %x", hash));
+	while (1)
+	{
+		count = dx_get_count(entries);
+		assert (count && count <= dx_get_limit(entries));
+		p = entries + 1;
+		q = entries + count - 1;
+		while (p <= q)
+		{
+			m = p + (q - p)/2;
+			dxtrace(printk("."));
+			if (dx_get_hash(m) > hash)
+				q = m - 1;
+			else
+				p = m + 1;
+		}
+
+		if (0) // linear search cross check
+		{
+			unsigned n = count - 1;
+			at = entries;
+			while (n--)
+			{
+				dxtrace(printk(","));
+				if (dx_get_hash(++at) > hash)
+				{
+					at--;
+					break;
+				}
+			}
+			assert (at == p - 1);
+		}
+
+		at = p - 1;
+		dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
+		frame->bh = bh;
+		frame->entries = entries;
+		frame->at = at;
+		if (!indirect--) return frame;
+		if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
+			goto fail2;
+		at = entries = ((struct dx_node *) bh->b_data)->entries;
+		assert (dx_get_limit(entries) == dx_node_limit (dir));
+		frame++;
+	}
+fail2:
+	while (frame >= frame_in) {
+		brelse(frame->bh);
+		frame--;
+	}
+fail:
+	return NULL;
+}
+
+static void dx_release (struct dx_frame *frames)
+{
+	if (frames[0].bh == NULL)
+		return;
+
+	if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
+		brelse(frames[1].bh);
+	brelse(frames[0].bh);
+}
+
+/*
+ * This function increments the frame pointer to search the next leaf
+ * block, and reads in the necessary intervening nodes if the search
+ * should be necessary.  Whether or not the search is necessary is
+ * controlled by the hash parameter.  If the hash value is even, then
+ * the search is only continued if the next block starts with that
+ * hash value.  This is used if we are searching for a specific file.
+ *
+ * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
+ *
+ * This function returns 1 if the caller should continue to search,
+ * or 0 if it should not.  If there is an error reading one of the
+ * index blocks, it will a negative error code.
+ *
+ * If start_hash is non-null, it will be filled in with the starting
+ * hash of the next page.
+ */
+static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+				 struct dx_frame *frame,
+				 struct dx_frame *frames,
+				 __u32 *start_hash)
+{
+	struct dx_frame *p;
+	struct buffer_head *bh;
+	int err, num_frames = 0;
+	__u32 bhash;
+
+	p = frame;
+	/*
+	 * Find the next leaf page by incrementing the frame pointer.
+	 * If we run out of entries in the interior node, loop around and
+	 * increment pointer in the parent node.  When we break out of
+	 * this loop, num_frames indicates the number of interior
+	 * nodes need to be read.
+	 */
+	while (1) {
+		if (++(p->at) < p->entries + dx_get_count(p->entries))
+			break;
+		if (p == frames)
+			return 0;
+		num_frames++;
+		p--;
+	}
+
+	/*
+	 * If the hash is 1, then continue only if the next page has a
+	 * continuation hash of any value.  This is used for readdir
+	 * handling.  Otherwise, check to see if the hash matches the
+	 * desired contiuation hash.  If it doesn't, return since
+	 * there's no point to read in the successive index pages.
+	 */
+	bhash = dx_get_hash(p->at);
+	if (start_hash)
+		*start_hash = bhash;
+	if ((hash & 1) == 0) {
+		if ((bhash & ~1) != hash)
+			return 0;
+	}
+	/*
+	 * If the hash is HASH_NB_ALWAYS, we always go to the next
+	 * block so no check is necessary
+	 */
+	while (num_frames--) {
+		if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at),
+				      0, &err)))
+			return err; /* Failure */
+		p++;
+		brelse (p->bh);
+		p->bh = bh;
+		p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
+	}
+	return 1;
+}
+
+
+/*
+ * p is at least 6 bytes before the end of page
+ */
+static inline struct ext3_dir_entry_2 *ext3_next_entry(struct ext3_dir_entry_2 *p)
+{
+	return (struct ext3_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len));
+}
+
+/*
+ * This function fills a red-black tree with information from a
+ * directory block.  It returns the number directory entries loaded
+ * into the tree.  If there is an error it is returned in err.
+ */
+static int htree_dirblock_to_tree(struct file *dir_file,
+				  struct inode *dir, int block,
+				  struct dx_hash_info *hinfo,
+				  __u32 start_hash, __u32 start_minor_hash)
+{
+	struct buffer_head *bh;
+	struct ext3_dir_entry_2 *de, *top;
+	int err, count = 0;
+
+	dxtrace(printk("In htree dirblock_to_tree: block %d\n", block));
+	if (!(bh = ext3_bread (NULL, dir, block, 0, &err)))
+		return err;
+
+	de = (struct ext3_dir_entry_2 *) bh->b_data;
+	top = (struct ext3_dir_entry_2 *) ((char *) de +
+					   dir->i_sb->s_blocksize -
+					   EXT3_DIR_REC_LEN(0));
+	for (; de < top; de = ext3_next_entry(de)) {
+		ext3fs_dirhash(de->name, de->name_len, hinfo);
+		if ((hinfo->hash < start_hash) ||
+		    ((hinfo->hash == start_hash) &&
+		     (hinfo->minor_hash < start_minor_hash)))
+			continue;
+		if (de->inode == 0)
+			continue;
+		if ((err = ext3_htree_store_dirent(dir_file,
+				   hinfo->hash, hinfo->minor_hash, de)) != 0) {
+			brelse(bh);
+			return err;
+		}
+		count++;
+	}
+	brelse(bh);
+	return count;
+}
+
+
+/*
+ * This function fills a red-black tree with information from a
+ * directory.  We start scanning the directory in hash order, starting
+ * at start_hash and start_minor_hash.
+ *
+ * This function returns the number of entries inserted into the tree,
+ * or a negative error code.
+ */
+int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
+			 __u32 start_minor_hash, __u32 *next_hash)
+{
+	struct dx_hash_info hinfo;
+	struct ext3_dir_entry_2 *de;
+	struct dx_frame frames[2], *frame;
+	struct inode *dir;
+	int block, err;
+	int count = 0;
+	int ret;
+	__u32 hashval;
+
+	dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
+		       start_minor_hash));
+	dir = dir_file->f_dentry->d_inode;
+	if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) {
+		hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
+		hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
+		count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
+					       start_hash, start_minor_hash);
+		*next_hash = ~0;
+		return count;
+	}
+	hinfo.hash = start_hash;
+	hinfo.minor_hash = 0;
+	frame = dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, frames, &err);
+	if (!frame)
+		return err;
+
+	/* Add '.' and '..' from the htree header */
+	if (!start_hash && !start_minor_hash) {
+		de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
+		if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0)
+			goto errout;
+		count++;
+	}
+	if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
+		de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
+		de = ext3_next_entry(de);
+		if ((err = ext3_htree_store_dirent(dir_file, 2, 0, de)) != 0)
+			goto errout;
+		count++;
+	}
+
+	while (1) {
+		block = dx_get_block(frame->at);
+		ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
+					     start_hash, start_minor_hash);
+		if (ret < 0) {
+			err = ret;
+			goto errout;
+		}
+		count += ret;
+		hashval = ~0;
+		ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS,
+					    frame, frames, &hashval);
+		*next_hash = hashval;
+		if (ret < 0) {
+			err = ret;
+			goto errout;
+		}
+		/*
+		 * Stop if:  (a) there are no more entries, or
+		 * (b) we have inserted at least one entry and the
+		 * next hash value is not a continuation
+		 */
+		if ((ret == 0) ||
+		    (count && ((hashval & 1) == 0)))
+			break;
+	}
+	dx_release(frames);
+	dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
+		       count, *next_hash));
+	return count;
+errout:
+	dx_release(frames);
+	return (err);
+}
+
+
+/*
+ * Directory block splitting, compacting
+ */
+
+static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
+			struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
+{
+	int count = 0;
+	char *base = (char *) de;
+	struct dx_hash_info h = *hinfo;
+
+	while ((char *) de < base + size)
+	{
+		if (de->name_len && de->inode) {
+			ext3fs_dirhash(de->name, de->name_len, &h);
+			map_tail--;
+			map_tail->hash = h.hash;
+			map_tail->offs = (u32) ((char *) de - base);
+			count++;
+			cond_resched();
+		}
+		/* XXX: do we need to check rec_len == 0 case? -Chris */
+		de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
+	}
+	return count;
+}
+
+static void dx_sort_map (struct dx_map_entry *map, unsigned count)
+{
+        struct dx_map_entry *p, *q, *top = map + count - 1;
+        int more;
+        /* Combsort until bubble sort doesn't suck */
+        while (count > 2)
+	{
+                count = count*10/13;
+                if (count - 9 < 2) /* 9, 10 -> 11 */
+                        count = 11;
+                for (p = top, q = p - count; q >= map; p--, q--)
+                        if (p->hash < q->hash)
+                                swap(*p, *q);
+        }
+        /* Garden variety bubble sort */
+        do {
+                more = 0;
+                q = top;
+                while (q-- > map)
+		{
+                        if (q[1].hash >= q[0].hash)
+				continue;
+                        swap(*(q+1), *q);
+                        more = 1;
+		}
+	} while(more);
+}
+
+static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
+{
+	struct dx_entry *entries = frame->entries;
+	struct dx_entry *old = frame->at, *new = old + 1;
+	int count = dx_get_count(entries);
+
+	assert(count < dx_get_limit(entries));
+	assert(old < entries + count);
+	memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
+	dx_set_hash(new, hash);
+	dx_set_block(new, block);
+	dx_set_count(entries, count + 1);
+}
+#endif
+
+
+static void ext3_update_dx_flag(struct inode *inode)
+{
+	if (!EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
+				     EXT3_FEATURE_COMPAT_DIR_INDEX))
+		EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL;
+}
+
+/*
+ * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure.
+ *
+ * `len <= EXT3_NAME_LEN' is guaranteed by caller.
+ * `de != NULL' is guaranteed by caller.
+ */
+static inline int ext3_match (int len, const char * const name,
+			      struct ext3_dir_entry_2 * de)
+{
+	if (len != de->name_len)
+		return 0;
+	if (!de->inode)
+		return 0;
+	return !memcmp(name, de->name, len);
+}
+
+/*
+ * Returns 0 if not found, -1 on failure, and 1 on success
+ */
+static inline int search_dirblock(struct buffer_head * bh,
+				  struct inode *dir,
+				  struct dentry *dentry,
+				  unsigned long offset,
+				  struct ext3_dir_entry_2 ** res_dir)
+{
+	struct ext3_dir_entry_2 * de;
+	char * dlimit;
+	int de_len;
+	const char *name = dentry->d_name.name;
+	int namelen = dentry->d_name.len;
+
+	de = (struct ext3_dir_entry_2 *) bh->b_data;
+	dlimit = bh->b_data + dir->i_sb->s_blocksize;
+	while ((char *) de < dlimit) {
+		/* this code is executed quadratically often */
+		/* do minimal checking `by hand' */
+
+		if ((char *) de + namelen <= dlimit &&
+		    ext3_match (namelen, name, de)) {
+			/* found a match - just to be sure, do a full check */
+			if (!ext3_check_dir_entry("ext3_find_entry",
+						  dir, de, bh, offset))
+				return -1;
+			*res_dir = de;
+			return 1;
+		}
+		/* prevent looping on a bad block */
+		de_len = le16_to_cpu(de->rec_len);
+		if (de_len <= 0)
+			return -1;
+		offset += de_len;
+		de = (struct ext3_dir_entry_2 *) ((char *) de + de_len);
+	}
+	return 0;
+}
+
+
+/*
+ *	ext3_find_entry()
+ *
+ * finds an entry in the specified directory with the wanted name. It
+ * returns the cache buffer in which the entry was found, and the entry
+ * itself (as a parameter - res_dir). It does NOT read the inode of the
+ * entry - you'll have to do that yourself if you want to.
+ *
+ * The returned buffer_head has ->b_count elevated.  The caller is expected
+ * to brelse() it when appropriate.
+ */
+static struct buffer_head * ext3_find_entry (struct dentry *dentry,
+					struct ext3_dir_entry_2 ** res_dir)
+{
+	struct super_block * sb;
+	struct buffer_head * bh_use[NAMEI_RA_SIZE];
+	struct buffer_head * bh, *ret = NULL;
+	unsigned long start, block, b;
+	int ra_max = 0;		/* Number of bh's in the readahead
+				   buffer, bh_use[] */
+	int ra_ptr = 0;		/* Current index into readahead
+				   buffer */
+	int num = 0;
+	int nblocks, i, err;
+	struct inode *dir = dentry->d_parent->d_inode;
+	int namelen;
+	const u8 *name;
+	unsigned blocksize;
+
+	*res_dir = NULL;
+	sb = dir->i_sb;
+	blocksize = sb->s_blocksize;
+	namelen = dentry->d_name.len;
+	name = dentry->d_name.name;
+	if (namelen > EXT3_NAME_LEN)
+		return NULL;
+#ifdef CONFIG_EXT3_INDEX
+	if (is_dx(dir)) {
+		bh = ext3_dx_find_entry(dentry, res_dir, &err);
+		/*
+		 * On success, or if the error was file not found,
+		 * return.  Otherwise, fall back to doing a search the
+		 * old fashioned way.
+		 */
+		if (bh || (err != ERR_BAD_DX_DIR))
+			return bh;
+		dxtrace(printk("ext3_find_entry: dx failed, falling back\n"));
+	}
+#endif
+	nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
+	start = EXT3_I(dir)->i_dir_start_lookup;
+	if (start >= nblocks)
+		start = 0;
+	block = start;
+restart:
+	do {
+		/*
+		 * We deal with the read-ahead logic here.
+		 */
+		if (ra_ptr >= ra_max) {
+			/* Refill the readahead buffer */
+			ra_ptr = 0;
+			b = block;
+			for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
+				/*
+				 * Terminate if we reach the end of the
+				 * directory and must wrap, or if our
+				 * search has finished at this block.
+				 */
+				if (b >= nblocks || (num && block == start)) {
+					bh_use[ra_max] = NULL;
+					break;
+				}
+				num++;
+				bh = ext3_getblk(NULL, dir, b++, 0, &err);
+				bh_use[ra_max] = bh;
+				if (bh)
+					ll_rw_block(READ_META, 1, &bh);
+			}
+		}
+		if ((bh = bh_use[ra_ptr++]) == NULL)
+			goto next;
+		wait_on_buffer(bh);
+		if (!buffer_uptodate(bh)) {
+			/* read error, skip block & hope for the best */
+			ext3_error(sb, __FUNCTION__, "reading directory #%lu "
+				   "offset %lu", dir->i_ino, block);
+			brelse(bh);
+			goto next;
+		}
+		i = search_dirblock(bh, dir, dentry,
+			    block << EXT3_BLOCK_SIZE_BITS(sb), res_dir);
+		if (i == 1) {
+			EXT3_I(dir)->i_dir_start_lookup = block;
+			ret = bh;
+			goto cleanup_and_exit;
+		} else {
+			brelse(bh);
+			if (i < 0)
+				goto cleanup_and_exit;
+		}
+	next:
+		if (++block >= nblocks)
+			block = 0;
+	} while (block != start);
+
+	/*
+	 * If the directory has grown while we were searching, then
+	 * search the last part of the directory before giving up.
+	 */
+	block = nblocks;
+	nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
+	if (block < nblocks) {
+		start = 0;
+		goto restart;
+	}
+
+cleanup_and_exit:
+	/* Clean up the read-ahead blocks */
+	for (; ra_ptr < ra_max; ra_ptr++)
+		brelse (bh_use[ra_ptr]);
+	return ret;
+}
+
+#ifdef CONFIG_EXT3_INDEX
+static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
+		       struct ext3_dir_entry_2 **res_dir, int *err)
+{
+	struct super_block * sb;
+	struct dx_hash_info	hinfo;
+	u32 hash;
+	struct dx_frame frames[2], *frame;
+	struct ext3_dir_entry_2 *de, *top;
+	struct buffer_head *bh;
+	unsigned long block;
+	int retval;
+	int namelen = dentry->d_name.len;
+	const u8 *name = dentry->d_name.name;
+	struct inode *dir = dentry->d_parent->d_inode;
+
+	sb = dir->i_sb;
+	/* NFS may look up ".." - look at dx_root directory block */
+	if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
+		if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err)))
+			return NULL;
+	} else {
+		frame = frames;
+		frame->bh = NULL;			/* for dx_release() */
+		frame->at = (struct dx_entry *)frames;	/* hack for zero entry*/
+		dx_set_block(frame->at, 0);		/* dx_root block is 0 */
+	}
+	hash = hinfo.hash;
+	do {
+		block = dx_get_block(frame->at);
+		if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
+			goto errout;
+		de = (struct ext3_dir_entry_2 *) bh->b_data;
+		top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
+				       EXT3_DIR_REC_LEN(0));
+		for (; de < top; de = ext3_next_entry(de))
+		if (ext3_match (namelen, name, de)) {
+			if (!ext3_check_dir_entry("ext3_find_entry",
+						  dir, de, bh,
+				  (block<<EXT3_BLOCK_SIZE_BITS(sb))
+					  +((char *)de - bh->b_data))) {
+				brelse (bh);
+				goto errout;
+			}
+			*res_dir = de;
+			dx_release (frames);
+			return bh;
+		}
+		brelse (bh);
+		/* Check to see if we should continue to search */
+		retval = ext3_htree_next_block(dir, hash, frame,
+					       frames, NULL);
+		if (retval < 0) {
+			ext3_warning(sb, __FUNCTION__,
+			     "error reading index page in directory #%lu",
+			     dir->i_ino);
+			*err = retval;
+			goto errout;
+		}
+	} while (retval == 1);
+
+	*err = -ENOENT;
+errout:
+	dxtrace(printk("%s not found\n", name));
+	dx_release (frames);
+	return NULL;
+}
+#endif
+
+static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+{
+	struct inode * inode;
+	struct ext3_dir_entry_2 * de;
+	struct buffer_head * bh;
+
+	if (dentry->d_name.len > EXT3_NAME_LEN)
+		return ERR_PTR(-ENAMETOOLONG);
+
+	bh = ext3_find_entry(dentry, &de);
+	inode = NULL;
+	if (bh) {
+		unsigned long ino = le32_to_cpu(de->inode);
+		brelse (bh);
+		if (!ext3_valid_inum(dir->i_sb, ino)) {
+			ext3_error(dir->i_sb, "ext3_lookup",
+				   "bad inode number: %lu", ino);
+			inode = NULL;
+		} else
+			inode = iget(dir->i_sb, ino);
+
+		if (!inode)
+			return ERR_PTR(-EACCES);
+	}
+	return d_splice_alias(inode, dentry);
+}
+
+
+struct dentry *ext3_get_parent(struct dentry *child)
+{
+	unsigned long ino;
+	struct dentry *parent;
+	struct inode *inode;
+	struct dentry dotdot;
+	struct ext3_dir_entry_2 * de;
+	struct buffer_head *bh;
+
+	dotdot.d_name.name = "..";
+	dotdot.d_name.len = 2;
+	dotdot.d_parent = child; /* confusing, isn't it! */
+
+	bh = ext3_find_entry(&dotdot, &de);
+	inode = NULL;
+	if (!bh)
+		return ERR_PTR(-ENOENT);
+	ino = le32_to_cpu(de->inode);
+	brelse(bh);
+
+	if (!ext3_valid_inum(child->d_inode->i_sb, ino)) {
+		ext3_error(child->d_inode->i_sb, "ext3_get_parent",
+			   "bad inode number: %lu", ino);
+		inode = NULL;
+	} else
+		inode = iget(child->d_inode->i_sb, ino);
+
+	if (!inode)
+		return ERR_PTR(-EACCES);
+
+	parent = d_alloc_anon(inode);
+	if (!parent) {
+		iput(inode);
+		parent = ERR_PTR(-ENOMEM);
+	}
+	return parent;
+}
+
+#define S_SHIFT 12
+static unsigned char ext3_type_by_mode[S_IFMT >> S_SHIFT] = {
+	[S_IFREG >> S_SHIFT]	= EXT3_FT_REG_FILE,
+	[S_IFDIR >> S_SHIFT]	= EXT3_FT_DIR,
+	[S_IFCHR >> S_SHIFT]	= EXT3_FT_CHRDEV,
+	[S_IFBLK >> S_SHIFT]	= EXT3_FT_BLKDEV,
+	[S_IFIFO >> S_SHIFT]	= EXT3_FT_FIFO,
+	[S_IFSOCK >> S_SHIFT]	= EXT3_FT_SOCK,
+	[S_IFLNK >> S_SHIFT]	= EXT3_FT_SYMLINK,
+};
+
+static inline void ext3_set_de_type(struct super_block *sb,
+				struct ext3_dir_entry_2 *de,
+				umode_t mode) {
+	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE))
+		de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
+}
+
+#ifdef CONFIG_EXT3_INDEX
+static struct ext3_dir_entry_2 *
+dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
+{
+	unsigned rec_len = 0;
+
+	while (count--) {
+		struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
+		rec_len = EXT3_DIR_REC_LEN(de->name_len);
+		memcpy (to, de, rec_len);
+		((struct ext3_dir_entry_2 *) to)->rec_len =
+				cpu_to_le16(rec_len);
+		de->inode = 0;
+		map++;
+		to += rec_len;
+	}
+	return (struct ext3_dir_entry_2 *) (to - rec_len);
+}
+
+static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
+{
+	struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
+	unsigned rec_len = 0;
+
+	prev = to = de;
+	while ((char*)de < base + size) {
+		next = (struct ext3_dir_entry_2 *) ((char *) de +
+						    le16_to_cpu(de->rec_len));
+		if (de->inode && de->name_len) {
+			rec_len = EXT3_DIR_REC_LEN(de->name_len);
+			if (de > to)
+				memmove(to, de, rec_len);
+			to->rec_len = cpu_to_le16(rec_len);
+			prev = to;
+			to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
+		}
+		de = next;
+	}
+	return prev;
+}
+
+static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
+			struct buffer_head **bh,struct dx_frame *frame,
+			struct dx_hash_info *hinfo, int *error)
+{
+	unsigned blocksize = dir->i_sb->s_blocksize;
+	unsigned count, continued;
+	struct buffer_head *bh2;
+	u32 newblock;
+	u32 hash2;
+	struct dx_map_entry *map;
+	char *data1 = (*bh)->b_data, *data2;
+	unsigned split;
+	struct ext3_dir_entry_2 *de = NULL, *de2;
+	int	err;
+
+	bh2 = ext3_append (handle, dir, &newblock, error);
+	if (!(bh2)) {
+		brelse(*bh);
+		*bh = NULL;
+		goto errout;
+	}
+
+	BUFFER_TRACE(*bh, "get_write_access");
+	err = ext3_journal_get_write_access(handle, *bh);
+	if (err) {
+	journal_error:
+		brelse(*bh);
+		brelse(bh2);
+		*bh = NULL;
+		ext3_std_error(dir->i_sb, err);
+		goto errout;
+	}
+	BUFFER_TRACE(frame->bh, "get_write_access");
+	err = ext3_journal_get_write_access(handle, frame->bh);
+	if (err)
+		goto journal_error;
+
+	data2 = bh2->b_data;
+
+	/* create map in the end of data2 block */
+	map = (struct dx_map_entry *) (data2 + blocksize);
+	count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
+			     blocksize, hinfo, map);
+	map -= count;
+	split = count/2; // need to adjust to actual middle
+	dx_sort_map (map, count);
+	hash2 = map[split].hash;
+	continued = hash2 == map[split - 1].hash;
+	dxtrace(printk("Split block %i at %x, %i/%i\n",
+		dx_get_block(frame->at), hash2, split, count-split));
+
+	/* Fancy dance to stay within two buffers */
+	de2 = dx_move_dirents(data1, data2, map + split, count - split);
+	de = dx_pack_dirents(data1,blocksize);
+	de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
+	de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
+	dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1));
+	dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1));
+
+	/* Which block gets the new entry? */
+	if (hinfo->hash >= hash2)
+	{
+		swap(*bh, bh2);
+		de = de2;
+	}
+	dx_insert_block (frame, hash2 + continued, newblock);
+	err = ext3_journal_dirty_metadata (handle, bh2);
+	if (err)
+		goto journal_error;
+	err = ext3_journal_dirty_metadata (handle, frame->bh);
+	if (err)
+		goto journal_error;
+	brelse (bh2);
+	dxtrace(dx_show_index ("frame", frame->entries));
+errout:
+	return de;
+}
+#endif
+
+
+/*
+ * Add a new entry into a directory (leaf) block.  If de is non-NULL,
+ * it points to a directory entry which is guaranteed to be large
+ * enough for new directory entry.  If de is NULL, then
+ * add_dirent_to_buf will attempt search the directory block for
+ * space.  It will return -ENOSPC if no space is available, and -EIO
+ * and -EEXIST if directory entry already exists.
+ *
+ * NOTE!  bh is NOT released in the case where ENOSPC is returned.  In
+ * all other cases bh is released.
+ */
+static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
+			     struct inode *inode, struct ext3_dir_entry_2 *de,
+			     struct buffer_head * bh)
+{
+	struct inode	*dir = dentry->d_parent->d_inode;
+	const char	*name = dentry->d_name.name;
+	int		namelen = dentry->d_name.len;
+	unsigned long	offset = 0;
+	unsigned short	reclen;
+	int		nlen, rlen, err;
+	char		*top;
+
+	reclen = EXT3_DIR_REC_LEN(namelen);
+	if (!de) {
+		de = (struct ext3_dir_entry_2 *)bh->b_data;
+		top = bh->b_data + dir->i_sb->s_blocksize - reclen;
+		while ((char *) de <= top) {
+			if (!ext3_check_dir_entry("ext3_add_entry", dir, de,
+						  bh, offset)) {
+				brelse (bh);
+				return -EIO;
+			}
+			if (ext3_match (namelen, name, de)) {
+				brelse (bh);
+				return -EEXIST;
+			}
+			nlen = EXT3_DIR_REC_LEN(de->name_len);
+			rlen = le16_to_cpu(de->rec_len);
+			if ((de->inode? rlen - nlen: rlen) >= reclen)
+				break;
+			de = (struct ext3_dir_entry_2 *)((char *)de + rlen);
+			offset += rlen;
+		}
+		if ((char *) de > top)
+			return -ENOSPC;
+	}
+	BUFFER_TRACE(bh, "get_write_access");
+	err = ext3_journal_get_write_access(handle, bh);
+	if (err) {
+		ext3_std_error(dir->i_sb, err);
+		brelse(bh);
+		return err;
+	}
+
+	/* By now the buffer is marked for journaling */
+	nlen = EXT3_DIR_REC_LEN(de->name_len);
+	rlen = le16_to_cpu(de->rec_len);
+	if (de->inode) {
+		struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
+		de1->rec_len = cpu_to_le16(rlen - nlen);
+		de->rec_len = cpu_to_le16(nlen);
+		de = de1;
+	}
+	de->file_type = EXT3_FT_UNKNOWN;
+	if (inode) {
+		de->inode = cpu_to_le32(inode->i_ino);
+		ext3_set_de_type(dir->i_sb, de, inode->i_mode);
+	} else
+		de->inode = 0;
+	de->name_len = namelen;
+	memcpy (de->name, name, namelen);
+	/*
+	 * XXX shouldn't update any times until successful
+	 * completion of syscall, but too many callers depend
+	 * on this.
+	 *
+	 * XXX similarly, too many callers depend on
+	 * ext3_new_inode() setting the times, but error
+	 * recovery deletes the inode, so the worst that can
+	 * happen is that the times are slightly out of date
+	 * and/or different from the directory change time.
+	 */
+	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
+	ext3_update_dx_flag(dir);
+	dir->i_version++;
+	ext3_mark_inode_dirty(handle, dir);
+	BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+	err = ext3_journal_dirty_metadata(handle, bh);
+	if (err)
+		ext3_std_error(dir->i_sb, err);
+	brelse(bh);
+	return 0;
+}
+
+#ifdef CONFIG_EXT3_INDEX
+/*
+ * This converts a one block unindexed directory to a 3 block indexed
+ * directory, and adds the dentry to the indexed directory.
+ */
+static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
+			    struct inode *inode, struct buffer_head *bh)
+{
+	struct inode	*dir = dentry->d_parent->d_inode;
+	const char	*name = dentry->d_name.name;
+	int		namelen = dentry->d_name.len;
+	struct buffer_head *bh2;
+	struct dx_root	*root;
+	struct dx_frame	frames[2], *frame;
+	struct dx_entry *entries;
+	struct ext3_dir_entry_2	*de, *de2;
+	char		*data1, *top;
+	unsigned	len;
+	int		retval;
+	unsigned	blocksize;
+	struct dx_hash_info hinfo;
+	u32		block;
+	struct fake_dirent *fde;
+
+	blocksize =  dir->i_sb->s_blocksize;
+	dxtrace(printk("Creating index\n"));
+	retval = ext3_journal_get_write_access(handle, bh);
+	if (retval) {
+		ext3_std_error(dir->i_sb, retval);
+		brelse(bh);
+		return retval;
+	}
+	root = (struct dx_root *) bh->b_data;
+
+	bh2 = ext3_append (handle, dir, &block, &retval);
+	if (!(bh2)) {
+		brelse(bh);
+		return retval;
+	}
+	EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
+	data1 = bh2->b_data;
+
+	/* The 0th block becomes the root, move the dirents out */
+	fde = &root->dotdot;
+	de = (struct ext3_dir_entry_2 *)((char *)fde + le16_to_cpu(fde->rec_len));
+	len = ((char *) root) + blocksize - (char *) de;
+	memcpy (data1, de, len);
+	de = (struct ext3_dir_entry_2 *) data1;
+	top = data1 + len;
+	while ((char *)(de2=(void*)de+le16_to_cpu(de->rec_len)) < top)
+		de = de2;
+	de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
+	/* Initialize the root; the dot dirents already exist */
+	de = (struct ext3_dir_entry_2 *) (&root->dotdot);
+	de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2));
+	memset (&root->info, 0, sizeof(root->info));
+	root->info.info_length = sizeof(root->info);
+	root->info.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
+	entries = root->entries;
+	dx_set_block (entries, 1);
+	dx_set_count (entries, 1);
+	dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info)));
+
+	/* Initialize as for dx_probe */
+	hinfo.hash_version = root->info.hash_version;
+	hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
+	ext3fs_dirhash(name, namelen, &hinfo);
+	frame = frames;
+	frame->entries = entries;
+	frame->at = entries;
+	frame->bh = bh;
+	bh = bh2;
+	de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
+	dx_release (frames);
+	if (!(de))
+		return retval;
+
+	return add_dirent_to_buf(handle, dentry, inode, de, bh);
+}
+#endif
+
+/*
+ *	ext3_add_entry()
+ *
+ * adds a file entry to the specified directory, using the same
+ * semantics as ext3_find_entry(). It returns NULL if it failed.
+ *
+ * NOTE!! The inode part of 'de' is left at 0 - which means you
+ * may not sleep between calling this and putting something into
+ * the entry, as someone else might have used it while you slept.
+ */
+static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
+	struct inode *inode)
+{
+	struct inode *dir = dentry->d_parent->d_inode;
+	unsigned long offset;
+	struct buffer_head * bh;
+	struct ext3_dir_entry_2 *de;
+	struct super_block * sb;
+	int	retval;
+#ifdef CONFIG_EXT3_INDEX
+	int	dx_fallback=0;
+#endif
+	unsigned blocksize;
+	u32 block, blocks;
+
+	sb = dir->i_sb;
+	blocksize = sb->s_blocksize;
+	if (!dentry->d_name.len)
+		return -EINVAL;
+#ifdef CONFIG_EXT3_INDEX
+	if (is_dx(dir)) {
+		retval = ext3_dx_add_entry(handle, dentry, inode);
+		if (!retval || (retval != ERR_BAD_DX_DIR))
+			return retval;
+		EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL;
+		dx_fallback++;
+		ext3_mark_inode_dirty(handle, dir);
+	}
+#endif
+	blocks = dir->i_size >> sb->s_blocksize_bits;
+	for (block = 0, offset = 0; block < blocks; block++) {
+		bh = ext3_bread(handle, dir, block, 0, &retval);
+		if(!bh)
+			return retval;
+		retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
+		if (retval != -ENOSPC)
+			return retval;
+
+#ifdef CONFIG_EXT3_INDEX
+		if (blocks == 1 && !dx_fallback &&
+		    EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
+			return make_indexed_dir(handle, dentry, inode, bh);
+#endif
+		brelse(bh);
+	}
+	bh = ext3_append(handle, dir, &block, &retval);
+	if (!bh)
+		return retval;
+	de = (struct ext3_dir_entry_2 *) bh->b_data;
+	de->inode = 0;
+	de->rec_len = cpu_to_le16(blocksize);
+	return add_dirent_to_buf(handle, dentry, inode, de, bh);
+}
+
+#ifdef CONFIG_EXT3_INDEX
+/*
+ * Returns 0 for success, or a negative error value
+ */
+static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+			     struct inode *inode)
+{
+	struct dx_frame frames[2], *frame;
+	struct dx_entry *entries, *at;
+	struct dx_hash_info hinfo;
+	struct buffer_head * bh;
+	struct inode *dir = dentry->d_parent->d_inode;
+	struct super_block * sb = dir->i_sb;
+	struct ext3_dir_entry_2 *de;
+	int err;
+
+	frame = dx_probe(dentry, NULL, &hinfo, frames, &err);
+	if (!frame)
+		return err;
+	entries = frame->entries;
+	at = frame->at;
+
+	if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
+		goto cleanup;
+
+	BUFFER_TRACE(bh, "get_write_access");
+	err = ext3_journal_get_write_access(handle, bh);
+	if (err)
+		goto journal_error;
+
+	err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
+	if (err != -ENOSPC) {
+		bh = NULL;
+		goto cleanup;
+	}
+
+	/* Block full, should compress but for now just split */
+	dxtrace(printk("using %u of %u node entries\n",
+		       dx_get_count(entries), dx_get_limit(entries)));
+	/* Need to split index? */
+	if (dx_get_count(entries) == dx_get_limit(entries)) {
+		u32 newblock;
+		unsigned icount = dx_get_count(entries);
+		int levels = frame - frames;
+		struct dx_entry *entries2;
+		struct dx_node *node2;
+		struct buffer_head *bh2;
+
+		if (levels && (dx_get_count(frames->entries) ==
+			       dx_get_limit(frames->entries))) {
+			ext3_warning(sb, __FUNCTION__,
+				     "Directory index full!");
+			err = -ENOSPC;
+			goto cleanup;
+		}
+		bh2 = ext3_append (handle, dir, &newblock, &err);
+		if (!(bh2))
+			goto cleanup;
+		node2 = (struct dx_node *)(bh2->b_data);
+		entries2 = node2->entries;
+		node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
+		node2->fake.inode = 0;
+		BUFFER_TRACE(frame->bh, "get_write_access");
+		err = ext3_journal_get_write_access(handle, frame->bh);
+		if (err)
+			goto journal_error;
+		if (levels) {
+			unsigned icount1 = icount/2, icount2 = icount - icount1;
+			unsigned hash2 = dx_get_hash(entries + icount1);
+			dxtrace(printk("Split index %i/%i\n", icount1, icount2));
+
+			BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
+			err = ext3_journal_get_write_access(handle,
+							     frames[0].bh);
+			if (err)
+				goto journal_error;
+
+			memcpy ((char *) entries2, (char *) (entries + icount1),
+				icount2 * sizeof(struct dx_entry));
+			dx_set_count (entries, icount1);
+			dx_set_count (entries2, icount2);
+			dx_set_limit (entries2, dx_node_limit(dir));
+
+			/* Which index block gets the new entry? */
+			if (at - entries >= icount1) {
+				frame->at = at = at - entries - icount1 + entries2;
+				frame->entries = entries = entries2;
+				swap(frame->bh, bh2);
+			}
+			dx_insert_block (frames + 0, hash2, newblock);
+			dxtrace(dx_show_index ("node", frames[1].entries));
+			dxtrace(dx_show_index ("node",
+			       ((struct dx_node *) bh2->b_data)->entries));
+			err = ext3_journal_dirty_metadata(handle, bh2);
+			if (err)
+				goto journal_error;
+			brelse (bh2);
+		} else {
+			dxtrace(printk("Creating second level index...\n"));
+			memcpy((char *) entries2, (char *) entries,
+			       icount * sizeof(struct dx_entry));
+			dx_set_limit(entries2, dx_node_limit(dir));
+
+			/* Set up root */
+			dx_set_count(entries, 1);
+			dx_set_block(entries + 0, newblock);
+			((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
+
+			/* Add new access path frame */
+			frame = frames + 1;
+			frame->at = at = at - entries + entries2;
+			frame->entries = entries = entries2;
+			frame->bh = bh2;
+			err = ext3_journal_get_write_access(handle,
+							     frame->bh);
+			if (err)
+				goto journal_error;
+		}
+		ext3_journal_dirty_metadata(handle, frames[0].bh);
+	}
+	de = do_split(handle, dir, &bh, frame, &hinfo, &err);
+	if (!de)
+		goto cleanup;
+	err = add_dirent_to_buf(handle, dentry, inode, de, bh);
+	bh = NULL;
+	goto cleanup;
+
+journal_error:
+	ext3_std_error(dir->i_sb, err);
+cleanup:
+	if (bh)
+		brelse(bh);
+	dx_release(frames);
+	return err;
+}
+#endif
+
+/*
+ * ext3_delete_entry deletes a directory entry by merging it with the
+ * previous entry
+ */
+static int ext3_delete_entry (handle_t *handle,
+			      struct inode * dir,
+			      struct ext3_dir_entry_2 * de_del,
+			      struct buffer_head * bh)
+{
+	struct ext3_dir_entry_2 * de, * pde;
+	int i;
+
+	i = 0;
+	pde = NULL;
+	de = (struct ext3_dir_entry_2 *) bh->b_data;
+	while (i < bh->b_size) {
+		if (!ext3_check_dir_entry("ext3_delete_entry", dir, de, bh, i))
+			return -EIO;
+		if (de == de_del)  {
+			BUFFER_TRACE(bh, "get_write_access");
+			ext3_journal_get_write_access(handle, bh);
+			if (pde)
+				pde->rec_len =
+					cpu_to_le16(le16_to_cpu(pde->rec_len) +
+						    le16_to_cpu(de->rec_len));
+			else
+				de->inode = 0;
+			dir->i_version++;
+			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+			ext3_journal_dirty_metadata(handle, bh);
+			return 0;
+		}
+		i += le16_to_cpu(de->rec_len);
+		pde = de;
+		de = (struct ext3_dir_entry_2 *)
+			((char *) de + le16_to_cpu(de->rec_len));
+	}
+	return -ENOENT;
+}
+
+/*
+ * ext3_mark_inode_dirty is somewhat expensive, so unlike ext2 we
+ * do not perform it in these functions.  We perform it at the call site,
+ * if it is needed.
+ */
+static inline void ext3_inc_count(handle_t *handle, struct inode *inode)
+{
+	inc_nlink(inode);
+}
+
+static inline void ext3_dec_count(handle_t *handle, struct inode *inode)
+{
+	drop_nlink(inode);
+}
+
+static int ext3_add_nondir(handle_t *handle,
+		struct dentry *dentry, struct inode *inode)
+{
+	int err = ext3_add_entry(handle, dentry, inode);
+	if (!err) {
+		ext3_mark_inode_dirty(handle, inode);
+		d_instantiate(dentry, inode);
+		return 0;
+	}
+	ext3_dec_count(handle, inode);
+	iput(inode);
+	return err;
+}
+
+/*
+ * By the time this is called, we already have created
+ * the directory cache entry for the new file, but it
+ * is so far negative - it has no inode.
+ *
+ * If the create succeeds, we fill in the inode information
+ * with d_instantiate().
+ */
+static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
+		struct nameidata *nd)
+{
+	handle_t *handle;
+	struct inode * inode;
+	int err, retries = 0;
+
+retry:
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
+					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	if (IS_DIRSYNC(dir))
+		handle->h_sync = 1;
+
+	inode = ext3_new_inode (handle, dir, mode);
+	err = PTR_ERR(inode);
+	if (!IS_ERR(inode)) {
+		inode->i_op = &ext3_file_inode_operations;
+		inode->i_fop = &ext3_file_operations;
+		ext3_set_aops(inode);
+		err = ext3_add_nondir(handle, dentry, inode);
+	}
+	ext3_journal_stop(handle);
+	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+		goto retry;
+	return err;
+}
+
+static int ext3_mknod (struct inode * dir, struct dentry *dentry,
+			int mode, dev_t rdev)
+{
+	handle_t *handle;
+	struct inode *inode;
+	int err, retries = 0;
+
+	if (!new_valid_dev(rdev))
+		return -EINVAL;
+
+retry:
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
+					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	if (IS_DIRSYNC(dir))
+		handle->h_sync = 1;
+
+	inode = ext3_new_inode (handle, dir, mode);
+	err = PTR_ERR(inode);
+	if (!IS_ERR(inode)) {
+		init_special_inode(inode, inode->i_mode, rdev);
+#ifdef CONFIG_EXT3_FS_XATTR
+		inode->i_op = &ext3_special_inode_operations;
+#endif
+		err = ext3_add_nondir(handle, dentry, inode);
+	}
+	ext3_journal_stop(handle);
+	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+		goto retry;
+	return err;
+}
+
+static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
+{
+	handle_t *handle;
+	struct inode * inode;
+	struct buffer_head * dir_block;
+	struct ext3_dir_entry_2 * de;
+	int err, retries = 0;
+
+	if (dir->i_nlink >= EXT3_LINK_MAX)
+		return -EMLINK;
+
+retry:
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
+					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	if (IS_DIRSYNC(dir))
+		handle->h_sync = 1;
+
+	inode = ext3_new_inode (handle, dir, S_IFDIR | mode);
+	err = PTR_ERR(inode);
+	if (IS_ERR(inode))
+		goto out_stop;
+
+	inode->i_op = &ext3_dir_inode_operations;
+	inode->i_fop = &ext3_dir_operations;
+	inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
+	dir_block = ext3_bread (handle, inode, 0, 1, &err);
+	if (!dir_block) {
+		drop_nlink(inode); /* is this nlink == 0? */
+		ext3_mark_inode_dirty(handle, inode);
+		iput (inode);
+		goto out_stop;
+	}
+	BUFFER_TRACE(dir_block, "get_write_access");
+	ext3_journal_get_write_access(handle, dir_block);
+	de = (struct ext3_dir_entry_2 *) dir_block->b_data;
+	de->inode = cpu_to_le32(inode->i_ino);
+	de->name_len = 1;
+	de->rec_len = cpu_to_le16(EXT3_DIR_REC_LEN(de->name_len));
+	strcpy (de->name, ".");
+	ext3_set_de_type(dir->i_sb, de, S_IFDIR);
+	de = (struct ext3_dir_entry_2 *)
+			((char *) de + le16_to_cpu(de->rec_len));
+	de->inode = cpu_to_le32(dir->i_ino);
+	de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT3_DIR_REC_LEN(1));
+	de->name_len = 2;
+	strcpy (de->name, "..");
+	ext3_set_de_type(dir->i_sb, de, S_IFDIR);
+	inode->i_nlink = 2;
+	BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
+	ext3_journal_dirty_metadata(handle, dir_block);
+	brelse (dir_block);
+	ext3_mark_inode_dirty(handle, inode);
+	err = ext3_add_entry (handle, dentry, inode);
+	if (err) {
+		inode->i_nlink = 0;
+		ext3_mark_inode_dirty(handle, inode);
+		iput (inode);
+		goto out_stop;
+	}
+	inc_nlink(dir);
+	ext3_update_dx_flag(dir);
+	ext3_mark_inode_dirty(handle, dir);
+	d_instantiate(dentry, inode);
+out_stop:
+	ext3_journal_stop(handle);
+	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+		goto retry;
+	return err;
+}
+
+/*
+ * routine to check that the specified directory is empty (for rmdir)
+ */
+static int empty_dir (struct inode * inode)
+{
+	unsigned long offset;
+	struct buffer_head * bh;
+	struct ext3_dir_entry_2 * de, * de1;
+	struct super_block * sb;
+	int err = 0;
+
+	sb = inode->i_sb;
+	if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) ||
+	    !(bh = ext3_bread (NULL, inode, 0, 0, &err))) {
+		if (err)
+			ext3_error(inode->i_sb, __FUNCTION__,
+				   "error %d reading directory #%lu offset 0",
+				   err, inode->i_ino);
+		else
+			ext3_warning(inode->i_sb, __FUNCTION__,
+				     "bad directory (dir #%lu) - no data block",
+				     inode->i_ino);
+		return 1;
+	}
+	de = (struct ext3_dir_entry_2 *) bh->b_data;
+	de1 = (struct ext3_dir_entry_2 *)
+			((char *) de + le16_to_cpu(de->rec_len));
+	if (le32_to_cpu(de->inode) != inode->i_ino ||
+			!le32_to_cpu(de1->inode) ||
+			strcmp (".", de->name) ||
+			strcmp ("..", de1->name)) {
+		ext3_warning (inode->i_sb, "empty_dir",
+			      "bad directory (dir #%lu) - no `.' or `..'",
+			      inode->i_ino);
+		brelse (bh);
+		return 1;
+	}
+	offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len);
+	de = (struct ext3_dir_entry_2 *)
+			((char *) de1 + le16_to_cpu(de1->rec_len));
+	while (offset < inode->i_size ) {
+		if (!bh ||
+			(void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
+			err = 0;
+			brelse (bh);
+			bh = ext3_bread (NULL, inode,
+				offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err);
+			if (!bh) {
+				if (err)
+					ext3_error(sb, __FUNCTION__,
+						   "error %d reading directory"
+						   " #%lu offset %lu",
+						   err, inode->i_ino, offset);
+				offset += sb->s_blocksize;
+				continue;
+			}
+			de = (struct ext3_dir_entry_2 *) bh->b_data;
+		}
+		if (!ext3_check_dir_entry("empty_dir", inode, de, bh, offset)) {
+			de = (struct ext3_dir_entry_2 *)(bh->b_data +
+							 sb->s_blocksize);
+			offset = (offset | (sb->s_blocksize - 1)) + 1;
+			continue;
+		}
+		if (le32_to_cpu(de->inode)) {
+			brelse (bh);
+			return 0;
+		}
+		offset += le16_to_cpu(de->rec_len);
+		de = (struct ext3_dir_entry_2 *)
+				((char *) de + le16_to_cpu(de->rec_len));
+	}
+	brelse (bh);
+	return 1;
+}
+
+/* ext3_orphan_add() links an unlinked or truncated inode into a list of
+ * such inodes, starting at the superblock, in case we crash before the
+ * file is closed/deleted, or in case the inode truncate spans multiple
+ * transactions and the last transaction is not recovered after a crash.
+ *
+ * At filesystem recovery time, we walk this list deleting unlinked
+ * inodes and truncating linked inodes in ext3_orphan_cleanup().
+ */
+int ext3_orphan_add(handle_t *handle, struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	struct ext3_iloc iloc;
+	int err = 0, rc;
+
+	lock_super(sb);
+	if (!list_empty(&EXT3_I(inode)->i_orphan))
+		goto out_unlock;
+
+	/* Orphan handling is only valid for files with data blocks
+	 * being truncated, or files being unlinked. */
+
+	/* @@@ FIXME: Observation from aviro:
+	 * I think I can trigger J_ASSERT in ext3_orphan_add().  We block
+	 * here (on lock_super()), so race with ext3_link() which might bump
+	 * ->i_nlink. For, say it, character device. Not a regular file,
+	 * not a directory, not a symlink and ->i_nlink > 0.
+	 */
+	J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+		S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
+
+	BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access");
+	err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
+	if (err)
+		goto out_unlock;
+
+	err = ext3_reserve_inode_write(handle, inode, &iloc);
+	if (err)
+		goto out_unlock;
+
+	/* Insert this inode at the head of the on-disk orphan list... */
+	NEXT_ORPHAN(inode) = le32_to_cpu(EXT3_SB(sb)->s_es->s_last_orphan);
+	EXT3_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
+	err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+	rc = ext3_mark_iloc_dirty(handle, inode, &iloc);
+	if (!err)
+		err = rc;
+
+	/* Only add to the head of the in-memory list if all the
+	 * previous operations succeeded.  If the orphan_add is going to
+	 * fail (possibly taking the journal offline), we can't risk
+	 * leaving the inode on the orphan list: stray orphan-list
+	 * entries can cause panics at unmount time.
+	 *
+	 * This is safe: on error we're going to ignore the orphan list
+	 * anyway on the next recovery. */
+	if (!err)
+		list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
+
+	jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
+	jbd_debug(4, "orphan inode %lu will point to %d\n",
+			inode->i_ino, NEXT_ORPHAN(inode));
+out_unlock:
+	unlock_super(sb);
+	ext3_std_error(inode->i_sb, err);
+	return err;
+}
+
+/*
+ * ext3_orphan_del() removes an unlinked or truncated inode from the list
+ * of such inodes stored on disk, because it is finally being cleaned up.
+ */
+int ext3_orphan_del(handle_t *handle, struct inode *inode)
+{
+	struct list_head *prev;
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext3_sb_info *sbi;
+	unsigned long ino_next;
+	struct ext3_iloc iloc;
+	int err = 0;
+
+	lock_super(inode->i_sb);
+	if (list_empty(&ei->i_orphan)) {
+		unlock_super(inode->i_sb);
+		return 0;
+	}
+
+	ino_next = NEXT_ORPHAN(inode);
+	prev = ei->i_orphan.prev;
+	sbi = EXT3_SB(inode->i_sb);
+
+	jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
+
+	list_del_init(&ei->i_orphan);
+
+	/* If we're on an error path, we may not have a valid
+	 * transaction handle with which to update the orphan list on
+	 * disk, but we still need to remove the inode from the linked
+	 * list in memory. */
+	if (!handle)
+		goto out;
+
+	err = ext3_reserve_inode_write(handle, inode, &iloc);
+	if (err)
+		goto out_err;
+
+	if (prev == &sbi->s_orphan) {
+		jbd_debug(4, "superblock will point to %lu\n", ino_next);
+		BUFFER_TRACE(sbi->s_sbh, "get_write_access");
+		err = ext3_journal_get_write_access(handle, sbi->s_sbh);
+		if (err)
+			goto out_brelse;
+		sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
+		err = ext3_journal_dirty_metadata(handle, sbi->s_sbh);
+	} else {
+		struct ext3_iloc iloc2;
+		struct inode *i_prev =
+			&list_entry(prev, struct ext3_inode_info, i_orphan)->vfs_inode;
+
+		jbd_debug(4, "orphan inode %lu will point to %lu\n",
+			  i_prev->i_ino, ino_next);
+		err = ext3_reserve_inode_write(handle, i_prev, &iloc2);
+		if (err)
+			goto out_brelse;
+		NEXT_ORPHAN(i_prev) = ino_next;
+		err = ext3_mark_iloc_dirty(handle, i_prev, &iloc2);
+	}
+	if (err)
+		goto out_brelse;
+	NEXT_ORPHAN(inode) = 0;
+	err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+
+out_err:
+	ext3_std_error(inode->i_sb, err);
+out:
+	unlock_super(inode->i_sb);
+	return err;
+
+out_brelse:
+	brelse(iloc.bh);
+	goto out_err;
+}
+
+static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
+{
+	int retval;
+	struct inode * inode;
+	struct buffer_head * bh;
+	struct ext3_dir_entry_2 * de;
+	handle_t *handle;
+
+	/* Initialize quotas before so that eventual writes go in
+	 * separate transaction */
+	DQUOT_INIT(dentry->d_inode);
+	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	retval = -ENOENT;
+	bh = ext3_find_entry (dentry, &de);
+	if (!bh)
+		goto end_rmdir;
+
+	if (IS_DIRSYNC(dir))
+		handle->h_sync = 1;
+
+	inode = dentry->d_inode;
+
+	retval = -EIO;
+	if (le32_to_cpu(de->inode) != inode->i_ino)
+		goto end_rmdir;
+
+	retval = -ENOTEMPTY;
+	if (!empty_dir (inode))
+		goto end_rmdir;
+
+	retval = ext3_delete_entry(handle, dir, de, bh);
+	if (retval)
+		goto end_rmdir;
+	if (inode->i_nlink != 2)
+		ext3_warning (inode->i_sb, "ext3_rmdir",
+			      "empty directory has nlink!=2 (%d)",
+			      inode->i_nlink);
+	inode->i_version++;
+	clear_nlink(inode);
+	/* There's no need to set i_disksize: the fact that i_nlink is
+	 * zero will ensure that the right thing happens during any
+	 * recovery. */
+	inode->i_size = 0;
+	ext3_orphan_add(handle, inode);
+	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
+	ext3_mark_inode_dirty(handle, inode);
+	drop_nlink(dir);
+	ext3_update_dx_flag(dir);
+	ext3_mark_inode_dirty(handle, dir);
+
+end_rmdir:
+	ext3_journal_stop(handle);
+	brelse (bh);
+	return retval;
+}
+
+static int ext3_unlink(struct inode * dir, struct dentry *dentry)
+{
+	int retval;
+	struct inode * inode;
+	struct buffer_head * bh;
+	struct ext3_dir_entry_2 * de;
+	handle_t *handle;
+
+	/* Initialize quotas before so that eventual writes go
+	 * in separate transaction */
+	DQUOT_INIT(dentry->d_inode);
+	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	if (IS_DIRSYNC(dir))
+		handle->h_sync = 1;
+
+	retval = -ENOENT;
+	bh = ext3_find_entry (dentry, &de);
+	if (!bh)
+		goto end_unlink;
+
+	inode = dentry->d_inode;
+
+	retval = -EIO;
+	if (le32_to_cpu(de->inode) != inode->i_ino)
+		goto end_unlink;
+
+	if (!inode->i_nlink) {
+		ext3_warning (inode->i_sb, "ext3_unlink",
+			      "Deleting nonexistent file (%lu), %d",
+			      inode->i_ino, inode->i_nlink);
+		inode->i_nlink = 1;
+	}
+	retval = ext3_delete_entry(handle, dir, de, bh);
+	if (retval)
+		goto end_unlink;
+	dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
+	ext3_update_dx_flag(dir);
+	ext3_mark_inode_dirty(handle, dir);
+	drop_nlink(inode);
+	if (!inode->i_nlink)
+		ext3_orphan_add(handle, inode);
+	inode->i_ctime = dir->i_ctime;
+	ext3_mark_inode_dirty(handle, inode);
+	retval = 0;
+
+end_unlink:
+	ext3_journal_stop(handle);
+	brelse (bh);
+	return retval;
+}
+
+static int ext3_symlink (struct inode * dir,
+		struct dentry *dentry, const char * symname)
+{
+	handle_t *handle;
+	struct inode * inode;
+	int l, err, retries = 0;
+
+	l = strlen(symname)+1;
+	if (l > dir->i_sb->s_blocksize)
+		return -ENAMETOOLONG;
+
+retry:
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
+					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
+					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	if (IS_DIRSYNC(dir))
+		handle->h_sync = 1;
+
+	inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO);
+	err = PTR_ERR(inode);
+	if (IS_ERR(inode))
+		goto out_stop;
+
+	if (l > sizeof (EXT3_I(inode)->i_data)) {
+		inode->i_op = &ext3_symlink_inode_operations;
+		ext3_set_aops(inode);
+		/*
+		 * page_symlink() calls into ext3_prepare/commit_write.
+		 * We have a transaction open.  All is sweetness.  It also sets
+		 * i_size in generic_commit_write().
+		 */
+		err = __page_symlink(inode, symname, l,
+				mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+		if (err) {
+			ext3_dec_count(handle, inode);
+			ext3_mark_inode_dirty(handle, inode);
+			iput (inode);
+			goto out_stop;
+		}
+	} else {
+		inode->i_op = &ext3_fast_symlink_inode_operations;
+		memcpy((char*)&EXT3_I(inode)->i_data,symname,l);
+		inode->i_size = l-1;
+	}
+	EXT3_I(inode)->i_disksize = inode->i_size;
+	err = ext3_add_nondir(handle, dentry, inode);
+out_stop:
+	ext3_journal_stop(handle);
+	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+		goto retry;
+	return err;
+}
+
+static int ext3_link (struct dentry * old_dentry,
+		struct inode * dir, struct dentry *dentry)
+{
+	handle_t *handle;
+	struct inode *inode = old_dentry->d_inode;
+	int err, retries = 0;
+
+	if (inode->i_nlink >= EXT3_LINK_MAX)
+		return -EMLINK;
+
+retry:
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
+					EXT3_INDEX_EXTRA_TRANS_BLOCKS);
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	if (IS_DIRSYNC(dir))
+		handle->h_sync = 1;
+
+	inode->i_ctime = CURRENT_TIME_SEC;
+	ext3_inc_count(handle, inode);
+	atomic_inc(&inode->i_count);
+
+	err = ext3_add_nondir(handle, dentry, inode);
+	ext3_journal_stop(handle);
+	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+		goto retry;
+	return err;
+}
+
+#define PARENT_INO(buffer) \
+	((struct ext3_dir_entry_2 *) ((char *) buffer + \
+	le16_to_cpu(((struct ext3_dir_entry_2 *) buffer)->rec_len)))->inode
+
+/*
+ * Anybody can rename anything with this: the permission checks are left to the
+ * higher-level routines.
+ */
+static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
+			   struct inode * new_dir,struct dentry *new_dentry)
+{
+	handle_t *handle;
+	struct inode * old_inode, * new_inode;
+	struct buffer_head * old_bh, * new_bh, * dir_bh;
+	struct ext3_dir_entry_2 * old_de, * new_de;
+	int retval;
+
+	old_bh = new_bh = dir_bh = NULL;
+
+	/* Initialize quotas before so that eventual writes go
+	 * in separate transaction */
+	if (new_dentry->d_inode)
+		DQUOT_INIT(new_dentry->d_inode);
+	handle = ext3_journal_start(old_dir, 2 *
+					EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) +
+					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
+		handle->h_sync = 1;
+
+	old_bh = ext3_find_entry (old_dentry, &old_de);
+	/*
+	 *  Check for inode number is _not_ due to possible IO errors.
+	 *  We might rmdir the source, keep it as pwd of some process
+	 *  and merrily kill the link to whatever was created under the
+	 *  same name. Goodbye sticky bit ;-<
+	 */
+	old_inode = old_dentry->d_inode;
+	retval = -ENOENT;
+	if (!old_bh || le32_to_cpu(old_de->inode) != old_inode->i_ino)
+		goto end_rename;
+
+	new_inode = new_dentry->d_inode;
+	new_bh = ext3_find_entry (new_dentry, &new_de);
+	if (new_bh) {
+		if (!new_inode) {
+			brelse (new_bh);
+			new_bh = NULL;
+		}
+	}
+	if (S_ISDIR(old_inode->i_mode)) {
+		if (new_inode) {
+			retval = -ENOTEMPTY;
+			if (!empty_dir (new_inode))
+				goto end_rename;
+		}
+		retval = -EIO;
+		dir_bh = ext3_bread (handle, old_inode, 0, 0, &retval);
+		if (!dir_bh)
+			goto end_rename;
+		if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
+			goto end_rename;
+		retval = -EMLINK;
+		if (!new_inode && new_dir!=old_dir &&
+				new_dir->i_nlink >= EXT3_LINK_MAX)
+			goto end_rename;
+	}
+	if (!new_bh) {
+		retval = ext3_add_entry (handle, new_dentry, old_inode);
+		if (retval)
+			goto end_rename;
+	} else {
+		BUFFER_TRACE(new_bh, "get write access");
+		ext3_journal_get_write_access(handle, new_bh);
+		new_de->inode = cpu_to_le32(old_inode->i_ino);
+		if (EXT3_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
+					      EXT3_FEATURE_INCOMPAT_FILETYPE))
+			new_de->file_type = old_de->file_type;
+		new_dir->i_version++;
+		BUFFER_TRACE(new_bh, "call ext3_journal_dirty_metadata");
+		ext3_journal_dirty_metadata(handle, new_bh);
+		brelse(new_bh);
+		new_bh = NULL;
+	}
+
+	/*
+	 * Like most other Unix systems, set the ctime for inodes on a
+	 * rename.
+	 */
+	old_inode->i_ctime = CURRENT_TIME_SEC;
+	ext3_mark_inode_dirty(handle, old_inode);
+
+	/*
+	 * ok, that's it
+	 */
+	if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
+	    old_de->name_len != old_dentry->d_name.len ||
+	    strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
+	    (retval = ext3_delete_entry(handle, old_dir,
+					old_de, old_bh)) == -ENOENT) {
+		/* old_de could have moved from under us during htree split, so
+		 * make sure that we are deleting the right entry.  We might
+		 * also be pointing to a stale entry in the unused part of
+		 * old_bh so just checking inum and the name isn't enough. */
+		struct buffer_head *old_bh2;
+		struct ext3_dir_entry_2 *old_de2;
+
+		old_bh2 = ext3_find_entry(old_dentry, &old_de2);
+		if (old_bh2) {
+			retval = ext3_delete_entry(handle, old_dir,
+						   old_de2, old_bh2);
+			brelse(old_bh2);
+		}
+	}
+	if (retval) {
+		ext3_warning(old_dir->i_sb, "ext3_rename",
+				"Deleting old file (%lu), %d, error=%d",
+				old_dir->i_ino, old_dir->i_nlink, retval);
+	}
+
+	if (new_inode) {
+		drop_nlink(new_inode);
+		new_inode->i_ctime = CURRENT_TIME_SEC;
+	}
+	old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC;
+	ext3_update_dx_flag(old_dir);
+	if (dir_bh) {
+		BUFFER_TRACE(dir_bh, "get_write_access");
+		ext3_journal_get_write_access(handle, dir_bh);
+		PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
+		BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata");
+		ext3_journal_dirty_metadata(handle, dir_bh);
+		drop_nlink(old_dir);
+		if (new_inode) {
+			drop_nlink(new_inode);
+		} else {
+			inc_nlink(new_dir);
+			ext3_update_dx_flag(new_dir);
+			ext3_mark_inode_dirty(handle, new_dir);
+		}
+	}
+	ext3_mark_inode_dirty(handle, old_dir);
+	if (new_inode) {
+		ext3_mark_inode_dirty(handle, new_inode);
+		if (!new_inode->i_nlink)
+			ext3_orphan_add(handle, new_inode);
+	}
+	retval = 0;
+
+end_rename:
+	brelse (dir_bh);
+	brelse (old_bh);
+	brelse (new_bh);
+	ext3_journal_stop(handle);
+	return retval;
+}
+
+/*
+ * directories can handle most operations...
+ */
+struct inode_operations ext3_dir_inode_operations = {
+	.create		= ext3_create,
+	.lookup		= ext3_lookup,
+	.link		= ext3_link,
+	.unlink		= ext3_unlink,
+	.symlink	= ext3_symlink,
+	.mkdir		= ext3_mkdir,
+	.rmdir		= ext3_rmdir,
+	.mknod		= ext3_mknod,
+	.rename		= ext3_rename,
+	.setattr	= ext3_setattr,
+#ifdef CONFIG_EXT3_FS_XATTR
+	.setxattr	= generic_setxattr,
+	.getxattr	= generic_getxattr,
+	.listxattr	= ext3_listxattr,
+	.removexattr	= generic_removexattr,
+#endif
+	.permission	= ext3_permission,
+};
+
+struct inode_operations ext3_special_inode_operations = {
+	.setattr	= ext3_setattr,
+#ifdef CONFIG_EXT3_FS_XATTR
+	.setxattr	= generic_setxattr,
+	.getxattr	= generic_getxattr,
+	.listxattr	= ext3_listxattr,
+	.removexattr	= generic_removexattr,
+#endif
+	.permission	= ext3_permission,
+};
diff --git a/fs/ext4/namei.h b/fs/ext4/namei.h
new file mode 100644
index 000000000000..f2ce2b0065c9
--- /dev/null
+++ b/fs/ext4/namei.h
@@ -0,0 +1,8 @@
+/*  linux/fs/ext3/namei.h
+ *
+ * Copyright (C) 2005 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *
+*/
+
+extern struct dentry *ext3_get_parent(struct dentry *child);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
new file mode 100644
index 000000000000..b73cba12f79c
--- /dev/null
+++ b/fs/ext4/resize.c
@@ -0,0 +1,1042 @@
+/*
+ *  linux/fs/ext3/resize.c
+ *
+ * Support for resizing an ext3 filesystem while it is mounted.
+ *
+ * Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com>
+ *
+ * This could probably be made into a module, because it is not often in use.
+ */
+
+
+#define EXT3FS_DEBUG
+
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/ext3_jbd.h>
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+
+
+#define outside(b, first, last)	((b) < (first) || (b) >= (last))
+#define inside(b, first, last)	((b) >= (first) && (b) < (last))
+
+static int verify_group_input(struct super_block *sb,
+			      struct ext3_new_group_data *input)
+{
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext3_super_block *es = sbi->s_es;
+	ext3_fsblk_t start = le32_to_cpu(es->s_blocks_count);
+	ext3_fsblk_t end = start + input->blocks_count;
+	unsigned group = input->group;
+	ext3_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
+	unsigned overhead = ext3_bg_has_super(sb, group) ?
+		(1 + ext3_bg_num_gdb(sb, group) +
+		 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
+	ext3_fsblk_t metaend = start + overhead;
+	struct buffer_head *bh = NULL;
+	ext3_grpblk_t free_blocks_count;
+	int err = -EINVAL;
+
+	input->free_blocks_count = free_blocks_count =
+		input->blocks_count - 2 - overhead - sbi->s_itb_per_group;
+
+	if (test_opt(sb, DEBUG))
+		printk(KERN_DEBUG "EXT3-fs: adding %s group %u: %u blocks "
+		       "(%d free, %u reserved)\n",
+		       ext3_bg_has_super(sb, input->group) ? "normal" :
+		       "no-super", input->group, input->blocks_count,
+		       free_blocks_count, input->reserved_blocks);
+
+	if (group != sbi->s_groups_count)
+		ext3_warning(sb, __FUNCTION__,
+			     "Cannot add at group %u (only %lu groups)",
+			     input->group, sbi->s_groups_count);
+	else if ((start - le32_to_cpu(es->s_first_data_block)) %
+		 EXT3_BLOCKS_PER_GROUP(sb))
+		ext3_warning(sb, __FUNCTION__, "Last group not full");
+	else if (input->reserved_blocks > input->blocks_count / 5)
+		ext3_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)",
+			     input->reserved_blocks);
+	else if (free_blocks_count < 0)
+		ext3_warning(sb, __FUNCTION__, "Bad blocks count %u",
+			     input->blocks_count);
+	else if (!(bh = sb_bread(sb, end - 1)))
+		ext3_warning(sb, __FUNCTION__,
+			     "Cannot read last block ("E3FSBLK")",
+			     end - 1);
+	else if (outside(input->block_bitmap, start, end))
+		ext3_warning(sb, __FUNCTION__,
+			     "Block bitmap not in group (block %u)",
+			     input->block_bitmap);
+	else if (outside(input->inode_bitmap, start, end))
+		ext3_warning(sb, __FUNCTION__,
+			     "Inode bitmap not in group (block %u)",
+			     input->inode_bitmap);
+	else if (outside(input->inode_table, start, end) ||
+	         outside(itend - 1, start, end))
+		ext3_warning(sb, __FUNCTION__,
+			     "Inode table not in group (blocks %u-"E3FSBLK")",
+			     input->inode_table, itend - 1);
+	else if (input->inode_bitmap == input->block_bitmap)
+		ext3_warning(sb, __FUNCTION__,
+			     "Block bitmap same as inode bitmap (%u)",
+			     input->block_bitmap);
+	else if (inside(input->block_bitmap, input->inode_table, itend))
+		ext3_warning(sb, __FUNCTION__,
+			     "Block bitmap (%u) in inode table (%u-"E3FSBLK")",
+			     input->block_bitmap, input->inode_table, itend-1);
+	else if (inside(input->inode_bitmap, input->inode_table, itend))
+		ext3_warning(sb, __FUNCTION__,
+			     "Inode bitmap (%u) in inode table (%u-"E3FSBLK")",
+			     input->inode_bitmap, input->inode_table, itend-1);
+	else if (inside(input->block_bitmap, start, metaend))
+		ext3_warning(sb, __FUNCTION__,
+			     "Block bitmap (%u) in GDT table"
+			     " ("E3FSBLK"-"E3FSBLK")",
+			     input->block_bitmap, start, metaend - 1);
+	else if (inside(input->inode_bitmap, start, metaend))
+		ext3_warning(sb, __FUNCTION__,
+			     "Inode bitmap (%u) in GDT table"
+			     " ("E3FSBLK"-"E3FSBLK")",
+			     input->inode_bitmap, start, metaend - 1);
+	else if (inside(input->inode_table, start, metaend) ||
+	         inside(itend - 1, start, metaend))
+		ext3_warning(sb, __FUNCTION__,
+			     "Inode table (%u-"E3FSBLK") overlaps"
+			     "GDT table ("E3FSBLK"-"E3FSBLK")",
+			     input->inode_table, itend - 1, start, metaend - 1);
+	else
+		err = 0;
+	brelse(bh);
+
+	return err;
+}
+
+static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
+				  ext3_fsblk_t blk)
+{
+	struct buffer_head *bh;
+	int err;
+
+	bh = sb_getblk(sb, blk);
+	if (!bh)
+		return ERR_PTR(-EIO);
+	if ((err = ext3_journal_get_write_access(handle, bh))) {
+		brelse(bh);
+		bh = ERR_PTR(err);
+	} else {
+		lock_buffer(bh);
+		memset(bh->b_data, 0, sb->s_blocksize);
+		set_buffer_uptodate(bh);
+		unlock_buffer(bh);
+	}
+
+	return bh;
+}
+
+/*
+ * To avoid calling the atomic setbit hundreds or thousands of times, we only
+ * need to use it within a single byte (to ensure we get endianness right).
+ * We can use memset for the rest of the bitmap as there are no other users.
+ */
+static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
+{
+	int i;
+
+	if (start_bit >= end_bit)
+		return;
+
+	ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
+	for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
+		ext3_set_bit(i, bitmap);
+	if (i < end_bit)
+		memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
+}
+
+/*
+ * Set up the block and inode bitmaps, and the inode table for the new group.
+ * This doesn't need to be part of the main transaction, since we are only
+ * changing blocks outside the actual filesystem.  We still do journaling to
+ * ensure the recovery is correct in case of a failure just after resize.
+ * If any part of this fails, we simply abort the resize.
+ */
+static int setup_new_group_blocks(struct super_block *sb,
+				  struct ext3_new_group_data *input)
+{
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	ext3_fsblk_t start = ext3_group_first_block_no(sb, input->group);
+	int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
+		le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0;
+	unsigned long gdblocks = ext3_bg_num_gdb(sb, input->group);
+	struct buffer_head *bh;
+	handle_t *handle;
+	ext3_fsblk_t block;
+	ext3_grpblk_t bit;
+	int i;
+	int err = 0, err2;
+
+	handle = ext3_journal_start_sb(sb, reserved_gdb + gdblocks +
+				       2 + sbi->s_itb_per_group);
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	lock_super(sb);
+	if (input->group != sbi->s_groups_count) {
+		err = -EBUSY;
+		goto exit_journal;
+	}
+
+	if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) {
+		err = PTR_ERR(bh);
+		goto exit_journal;
+	}
+
+	if (ext3_bg_has_super(sb, input->group)) {
+		ext3_debug("mark backup superblock %#04lx (+0)\n", start);
+		ext3_set_bit(0, bh->b_data);
+	}
+
+	/* Copy all of the GDT blocks into the backup in this group */
+	for (i = 0, bit = 1, block = start + 1;
+	     i < gdblocks; i++, block++, bit++) {
+		struct buffer_head *gdb;
+
+		ext3_debug("update backup group %#04lx (+%d)\n", block, bit);
+
+		gdb = sb_getblk(sb, block);
+		if (!gdb) {
+			err = -EIO;
+			goto exit_bh;
+		}
+		if ((err = ext3_journal_get_write_access(handle, gdb))) {
+			brelse(gdb);
+			goto exit_bh;
+		}
+		lock_buffer(bh);
+		memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, bh->b_size);
+		set_buffer_uptodate(gdb);
+		unlock_buffer(bh);
+		ext3_journal_dirty_metadata(handle, gdb);
+		ext3_set_bit(bit, bh->b_data);
+		brelse(gdb);
+	}
+
+	/* Zero out all of the reserved backup group descriptor table blocks */
+	for (i = 0, bit = gdblocks + 1, block = start + bit;
+	     i < reserved_gdb; i++, block++, bit++) {
+		struct buffer_head *gdb;
+
+		ext3_debug("clear reserved block %#04lx (+%d)\n", block, bit);
+
+		if (IS_ERR(gdb = bclean(handle, sb, block))) {
+			err = PTR_ERR(bh);
+			goto exit_bh;
+		}
+		ext3_journal_dirty_metadata(handle, gdb);
+		ext3_set_bit(bit, bh->b_data);
+		brelse(gdb);
+	}
+	ext3_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap,
+		   input->block_bitmap - start);
+	ext3_set_bit(input->block_bitmap - start, bh->b_data);
+	ext3_debug("mark inode bitmap %#04x (+%ld)\n", input->inode_bitmap,
+		   input->inode_bitmap - start);
+	ext3_set_bit(input->inode_bitmap - start, bh->b_data);
+
+	/* Zero out all of the inode table blocks */
+	for (i = 0, block = input->inode_table, bit = block - start;
+	     i < sbi->s_itb_per_group; i++, bit++, block++) {
+		struct buffer_head *it;
+
+		ext3_debug("clear inode block %#04lx (+%d)\n", block, bit);
+		if (IS_ERR(it = bclean(handle, sb, block))) {
+			err = PTR_ERR(it);
+			goto exit_bh;
+		}
+		ext3_journal_dirty_metadata(handle, it);
+		brelse(it);
+		ext3_set_bit(bit, bh->b_data);
+	}
+	mark_bitmap_end(input->blocks_count, EXT3_BLOCKS_PER_GROUP(sb),
+			bh->b_data);
+	ext3_journal_dirty_metadata(handle, bh);
+	brelse(bh);
+
+	/* Mark unused entries in inode bitmap used */
+	ext3_debug("clear inode bitmap %#04x (+%ld)\n",
+		   input->inode_bitmap, input->inode_bitmap - start);
+	if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) {
+		err = PTR_ERR(bh);
+		goto exit_journal;
+	}
+
+	mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb),
+			bh->b_data);
+	ext3_journal_dirty_metadata(handle, bh);
+exit_bh:
+	brelse(bh);
+
+exit_journal:
+	unlock_super(sb);
+	if ((err2 = ext3_journal_stop(handle)) && !err)
+		err = err2;
+
+	return err;
+}
+
+/*
+ * Iterate through the groups which hold BACKUP superblock/GDT copies in an
+ * ext3 filesystem.  The counters should be initialized to 1, 5, and 7 before
+ * calling this for the first time.  In a sparse filesystem it will be the
+ * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ...
+ * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ...
+ */
+static unsigned ext3_list_backups(struct super_block *sb, unsigned *three,
+				  unsigned *five, unsigned *seven)
+{
+	unsigned *min = three;
+	int mult = 3;
+	unsigned ret;
+
+	if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
+					EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
+		ret = *min;
+		*min += 1;
+		return ret;
+	}
+
+	if (*five < *min) {
+		min = five;
+		mult = 5;
+	}
+	if (*seven < *min) {
+		min = seven;
+		mult = 7;
+	}
+
+	ret = *min;
+	*min *= mult;
+
+	return ret;
+}
+
+/*
+ * Check that all of the backup GDT blocks are held in the primary GDT block.
+ * It is assumed that they are stored in group order.  Returns the number of
+ * groups in current filesystem that have BACKUPS, or -ve error code.
+ */
+static int verify_reserved_gdb(struct super_block *sb,
+			       struct buffer_head *primary)
+{
+	const ext3_fsblk_t blk = primary->b_blocknr;
+	const unsigned long end = EXT3_SB(sb)->s_groups_count;
+	unsigned three = 1;
+	unsigned five = 5;
+	unsigned seven = 7;
+	unsigned grp;
+	__le32 *p = (__le32 *)primary->b_data;
+	int gdbackups = 0;
+
+	while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
+		if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){
+			ext3_warning(sb, __FUNCTION__,
+				     "reserved GDT "E3FSBLK
+				     " missing grp %d ("E3FSBLK")",
+				     blk, grp,
+				     grp * EXT3_BLOCKS_PER_GROUP(sb) + blk);
+			return -EINVAL;
+		}
+		if (++gdbackups > EXT3_ADDR_PER_BLOCK(sb))
+			return -EFBIG;
+	}
+
+	return gdbackups;
+}
+
+/*
+ * Called when we need to bring a reserved group descriptor table block into
+ * use from the resize inode.  The primary copy of the new GDT block currently
+ * is an indirect block (under the double indirect block in the resize inode).
+ * The new backup GDT blocks will be stored as leaf blocks in this indirect
+ * block, in group order.  Even though we know all the block numbers we need,
+ * we check to ensure that the resize inode has actually reserved these blocks.
+ *
+ * Don't need to update the block bitmaps because the blocks are still in use.
+ *
+ * We get all of the error cases out of the way, so that we are sure to not
+ * fail once we start modifying the data on disk, because JBD has no rollback.
+ */
+static int add_new_gdb(handle_t *handle, struct inode *inode,
+		       struct ext3_new_group_data *input,
+		       struct buffer_head **primary)
+{
+	struct super_block *sb = inode->i_sb;
+	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
+	unsigned long gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
+	ext3_fsblk_t gdblock = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
+	struct buffer_head **o_group_desc, **n_group_desc;
+	struct buffer_head *dind;
+	int gdbackups;
+	struct ext3_iloc iloc;
+	__le32 *data;
+	int err;
+
+	if (test_opt(sb, DEBUG))
+		printk(KERN_DEBUG
+		       "EXT3-fs: ext3_add_new_gdb: adding group block %lu\n",
+		       gdb_num);
+
+	/*
+	 * If we are not using the primary superblock/GDT copy don't resize,
+	 * because the user tools have no way of handling this.  Probably a
+	 * bad time to do it anyways.
+	 */
+	if (EXT3_SB(sb)->s_sbh->b_blocknr !=
+	    le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) {
+		ext3_warning(sb, __FUNCTION__,
+			"won't resize using backup superblock at %llu",
+			(unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr);
+		return -EPERM;
+	}
+
+	*primary = sb_bread(sb, gdblock);
+	if (!*primary)
+		return -EIO;
+
+	if ((gdbackups = verify_reserved_gdb(sb, *primary)) < 0) {
+		err = gdbackups;
+		goto exit_bh;
+	}
+
+	data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK;
+	dind = sb_bread(sb, le32_to_cpu(*data));
+	if (!dind) {
+		err = -EIO;
+		goto exit_bh;
+	}
+
+	data = (__le32 *)dind->b_data;
+	if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) {
+		ext3_warning(sb, __FUNCTION__,
+			     "new group %u GDT block "E3FSBLK" not reserved",
+			     input->group, gdblock);
+		err = -EINVAL;
+		goto exit_dind;
+	}
+
+	if ((err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh)))
+		goto exit_dind;
+
+	if ((err = ext3_journal_get_write_access(handle, *primary)))
+		goto exit_sbh;
+
+	if ((err = ext3_journal_get_write_access(handle, dind)))
+		goto exit_primary;
+
+	/* ext3_reserve_inode_write() gets a reference on the iloc */
+	if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
+		goto exit_dindj;
+
+	n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
+			GFP_KERNEL);
+	if (!n_group_desc) {
+		err = -ENOMEM;
+		ext3_warning (sb, __FUNCTION__,
+			      "not enough memory for %lu groups", gdb_num + 1);
+		goto exit_inode;
+	}
+
+	/*
+	 * Finally, we have all of the possible failures behind us...
+	 *
+	 * Remove new GDT block from inode double-indirect block and clear out
+	 * the new GDT block for use (which also "frees" the backup GDT blocks
+	 * from the reserved inode).  We don't need to change the bitmaps for
+	 * these blocks, because they are marked as in-use from being in the
+	 * reserved inode, and will become GDT blocks (primary and backup).
+	 */
+	data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)] = 0;
+	ext3_journal_dirty_metadata(handle, dind);
+	brelse(dind);
+	inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
+	ext3_mark_iloc_dirty(handle, inode, &iloc);
+	memset((*primary)->b_data, 0, sb->s_blocksize);
+	ext3_journal_dirty_metadata(handle, *primary);
+
+	o_group_desc = EXT3_SB(sb)->s_group_desc;
+	memcpy(n_group_desc, o_group_desc,
+	       EXT3_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
+	n_group_desc[gdb_num] = *primary;
+	EXT3_SB(sb)->s_group_desc = n_group_desc;
+	EXT3_SB(sb)->s_gdb_count++;
+	kfree(o_group_desc);
+
+	es->s_reserved_gdt_blocks =
+		cpu_to_le16(le16_to_cpu(es->s_reserved_gdt_blocks) - 1);
+	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+
+	return 0;
+
+exit_inode:
+	//ext3_journal_release_buffer(handle, iloc.bh);
+	brelse(iloc.bh);
+exit_dindj:
+	//ext3_journal_release_buffer(handle, dind);
+exit_primary:
+	//ext3_journal_release_buffer(handle, *primary);
+exit_sbh:
+	//ext3_journal_release_buffer(handle, *primary);
+exit_dind:
+	brelse(dind);
+exit_bh:
+	brelse(*primary);
+
+	ext3_debug("leaving with error %d\n", err);
+	return err;
+}
+
+/*
+ * Called when we are adding a new group which has a backup copy of each of
+ * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks.
+ * We need to add these reserved backup GDT blocks to the resize inode, so
+ * that they are kept for future resizing and not allocated to files.
+ *
+ * Each reserved backup GDT block will go into a different indirect block.
+ * The indirect blocks are actually the primary reserved GDT blocks,
+ * so we know in advance what their block numbers are.  We only get the
+ * double-indirect block to verify it is pointing to the primary reserved
+ * GDT blocks so we don't overwrite a data block by accident.  The reserved
+ * backup GDT blocks are stored in their reserved primary GDT block.
+ */
+static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
+			      struct ext3_new_group_data *input)
+{
+	struct super_block *sb = inode->i_sb;
+	int reserved_gdb =le16_to_cpu(EXT3_SB(sb)->s_es->s_reserved_gdt_blocks);
+	struct buffer_head **primary;
+	struct buffer_head *dind;
+	struct ext3_iloc iloc;
+	ext3_fsblk_t blk;
+	__le32 *data, *end;
+	int gdbackups = 0;
+	int res, i;
+	int err;
+
+	primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_KERNEL);
+	if (!primary)
+		return -ENOMEM;
+
+	data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK;
+	dind = sb_bread(sb, le32_to_cpu(*data));
+	if (!dind) {
+		err = -EIO;
+		goto exit_free;
+	}
+
+	blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count;
+	data = (__le32 *)dind->b_data + EXT3_SB(sb)->s_gdb_count;
+	end = (__le32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb);
+
+	/* Get each reserved primary GDT block and verify it holds backups */
+	for (res = 0; res < reserved_gdb; res++, blk++) {
+		if (le32_to_cpu(*data) != blk) {
+			ext3_warning(sb, __FUNCTION__,
+				     "reserved block "E3FSBLK
+				     " not at offset %ld",
+				     blk,
+				     (long)(data - (__le32 *)dind->b_data));
+			err = -EINVAL;
+			goto exit_bh;
+		}
+		primary[res] = sb_bread(sb, blk);
+		if (!primary[res]) {
+			err = -EIO;
+			goto exit_bh;
+		}
+		if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) {
+			brelse(primary[res]);
+			err = gdbackups;
+			goto exit_bh;
+		}
+		if (++data >= end)
+			data = (__le32 *)dind->b_data;
+	}
+
+	for (i = 0; i < reserved_gdb; i++) {
+		if ((err = ext3_journal_get_write_access(handle, primary[i]))) {
+			/*
+			int j;
+			for (j = 0; j < i; j++)
+				ext3_journal_release_buffer(handle, primary[j]);
+			 */
+			goto exit_bh;
+		}
+	}
+
+	if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
+		goto exit_bh;
+
+	/*
+	 * Finally we can add each of the reserved backup GDT blocks from
+	 * the new group to its reserved primary GDT block.
+	 */
+	blk = input->group * EXT3_BLOCKS_PER_GROUP(sb);
+	for (i = 0; i < reserved_gdb; i++) {
+		int err2;
+		data = (__le32 *)primary[i]->b_data;
+		/* printk("reserving backup %lu[%u] = %lu\n",
+		       primary[i]->b_blocknr, gdbackups,
+		       blk + primary[i]->b_blocknr); */
+		data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr);
+		err2 = ext3_journal_dirty_metadata(handle, primary[i]);
+		if (!err)
+			err = err2;
+	}
+	inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9;
+	ext3_mark_iloc_dirty(handle, inode, &iloc);
+
+exit_bh:
+	while (--res >= 0)
+		brelse(primary[res]);
+	brelse(dind);
+
+exit_free:
+	kfree(primary);
+
+	return err;
+}
+
+/*
+ * Update the backup copies of the ext3 metadata.  These don't need to be part
+ * of the main resize transaction, because e2fsck will re-write them if there
+ * is a problem (basically only OOM will cause a problem).  However, we
+ * _should_ update the backups if possible, in case the primary gets trashed
+ * for some reason and we need to run e2fsck from a backup superblock.  The
+ * important part is that the new block and inode counts are in the backup
+ * superblocks, and the location of the new group metadata in the GDT backups.
+ *
+ * We do not need lock_super() for this, because these blocks are not
+ * otherwise touched by the filesystem code when it is mounted.  We don't
+ * need to worry about last changing from sbi->s_groups_count, because the
+ * worst that can happen is that we do not copy the full number of backups
+ * at this time.  The resize which changed s_groups_count will backup again.
+ */
+static void update_backups(struct super_block *sb,
+			   int blk_off, char *data, int size)
+{
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	const unsigned long last = sbi->s_groups_count;
+	const int bpg = EXT3_BLOCKS_PER_GROUP(sb);
+	unsigned three = 1;
+	unsigned five = 5;
+	unsigned seven = 7;
+	unsigned group;
+	int rest = sb->s_blocksize - size;
+	handle_t *handle;
+	int err = 0, err2;
+
+	handle = ext3_journal_start_sb(sb, EXT3_MAX_TRANS_DATA);
+	if (IS_ERR(handle)) {
+		group = 1;
+		err = PTR_ERR(handle);
+		goto exit_err;
+	}
+
+	while ((group = ext3_list_backups(sb, &three, &five, &seven)) < last) {
+		struct buffer_head *bh;
+
+		/* Out of journal space, and can't get more - abort - so sad */
+		if (handle->h_buffer_credits == 0 &&
+		    ext3_journal_extend(handle, EXT3_MAX_TRANS_DATA) &&
+		    (err = ext3_journal_restart(handle, EXT3_MAX_TRANS_DATA)))
+			break;
+
+		bh = sb_getblk(sb, group * bpg + blk_off);
+		if (!bh) {
+			err = -EIO;
+			break;
+		}
+		ext3_debug("update metadata backup %#04lx\n",
+			  (unsigned long)bh->b_blocknr);
+		if ((err = ext3_journal_get_write_access(handle, bh)))
+			break;
+		lock_buffer(bh);
+		memcpy(bh->b_data, data, size);
+		if (rest)
+			memset(bh->b_data + size, 0, rest);
+		set_buffer_uptodate(bh);
+		unlock_buffer(bh);
+		ext3_journal_dirty_metadata(handle, bh);
+		brelse(bh);
+	}
+	if ((err2 = ext3_journal_stop(handle)) && !err)
+		err = err2;
+
+	/*
+	 * Ugh! Need to have e2fsck write the backup copies.  It is too
+	 * late to revert the resize, we shouldn't fail just because of
+	 * the backup copies (they are only needed in case of corruption).
+	 *
+	 * However, if we got here we have a journal problem too, so we
+	 * can't really start a transaction to mark the superblock.
+	 * Chicken out and just set the flag on the hope it will be written
+	 * to disk, and if not - we will simply wait until next fsck.
+	 */
+exit_err:
+	if (err) {
+		ext3_warning(sb, __FUNCTION__,
+			     "can't update backup for group %d (err %d), "
+			     "forcing fsck on next reboot", group, err);
+		sbi->s_mount_state &= ~EXT3_VALID_FS;
+		sbi->s_es->s_state &= cpu_to_le16(~EXT3_VALID_FS);
+		mark_buffer_dirty(sbi->s_sbh);
+	}
+}
+
+/* Add group descriptor data to an existing or new group descriptor block.
+ * Ensure we handle all possible error conditions _before_ we start modifying
+ * the filesystem, because we cannot abort the transaction and not have it
+ * write the data to disk.
+ *
+ * If we are on a GDT block boundary, we need to get the reserved GDT block.
+ * Otherwise, we may need to add backup GDT blocks for a sparse group.
+ *
+ * We only need to hold the superblock lock while we are actually adding
+ * in the new group's counts to the superblock.  Prior to that we have
+ * not really "added" the group at all.  We re-check that we are still
+ * adding in the last group in case things have changed since verifying.
+ */
+int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
+{
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext3_super_block *es = sbi->s_es;
+	int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
+		le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
+	struct buffer_head *primary = NULL;
+	struct ext3_group_desc *gdp;
+	struct inode *inode = NULL;
+	handle_t *handle;
+	int gdb_off, gdb_num;
+	int err, err2;
+
+	gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
+	gdb_off = input->group % EXT3_DESC_PER_BLOCK(sb);
+
+	if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb,
+					EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
+		ext3_warning(sb, __FUNCTION__,
+			     "Can't resize non-sparse filesystem further");
+		return -EPERM;
+	}
+
+	if (le32_to_cpu(es->s_blocks_count) + input->blocks_count <
+	    le32_to_cpu(es->s_blocks_count)) {
+		ext3_warning(sb, __FUNCTION__, "blocks_count overflow\n");
+		return -EINVAL;
+	}
+
+	if (le32_to_cpu(es->s_inodes_count) + EXT3_INODES_PER_GROUP(sb) <
+	    le32_to_cpu(es->s_inodes_count)) {
+		ext3_warning(sb, __FUNCTION__, "inodes_count overflow\n");
+		return -EINVAL;
+	}
+
+	if (reserved_gdb || gdb_off == 0) {
+		if (!EXT3_HAS_COMPAT_FEATURE(sb,
+					     EXT3_FEATURE_COMPAT_RESIZE_INODE)){
+			ext3_warning(sb, __FUNCTION__,
+				     "No reserved GDT blocks, can't resize");
+			return -EPERM;
+		}
+		inode = iget(sb, EXT3_RESIZE_INO);
+		if (!inode || is_bad_inode(inode)) {
+			ext3_warning(sb, __FUNCTION__,
+				     "Error opening resize inode");
+			iput(inode);
+			return -ENOENT;
+		}
+	}
+
+	if ((err = verify_group_input(sb, input)))
+		goto exit_put;
+
+	if ((err = setup_new_group_blocks(sb, input)))
+		goto exit_put;
+
+	/*
+	 * We will always be modifying at least the superblock and a GDT
+	 * block.  If we are adding a group past the last current GDT block,
+	 * we will also modify the inode and the dindirect block.  If we
+	 * are adding a group with superblock/GDT backups  we will also
+	 * modify each of the reserved GDT dindirect blocks.
+	 */
+	handle = ext3_journal_start_sb(sb,
+				       ext3_bg_has_super(sb, input->group) ?
+				       3 + reserved_gdb : 4);
+	if (IS_ERR(handle)) {
+		err = PTR_ERR(handle);
+		goto exit_put;
+	}
+
+	lock_super(sb);
+	if (input->group != sbi->s_groups_count) {
+		ext3_warning(sb, __FUNCTION__,
+			     "multiple resizers run on filesystem!");
+		err = -EBUSY;
+		goto exit_journal;
+	}
+
+	if ((err = ext3_journal_get_write_access(handle, sbi->s_sbh)))
+		goto exit_journal;
+
+	/*
+	 * We will only either add reserved group blocks to a backup group
+	 * or remove reserved blocks for the first group in a new group block.
+	 * Doing both would be mean more complex code, and sane people don't
+	 * use non-sparse filesystems anymore.  This is already checked above.
+	 */
+	if (gdb_off) {
+		primary = sbi->s_group_desc[gdb_num];
+		if ((err = ext3_journal_get_write_access(handle, primary)))
+			goto exit_journal;
+
+		if (reserved_gdb && ext3_bg_num_gdb(sb, input->group) &&
+		    (err = reserve_backup_gdb(handle, inode, input)))
+			goto exit_journal;
+	} else if ((err = add_new_gdb(handle, inode, input, &primary)))
+		goto exit_journal;
+
+	/*
+	 * OK, now we've set up the new group.  Time to make it active.
+	 *
+	 * Current kernels don't lock all allocations via lock_super(),
+	 * so we have to be safe wrt. concurrent accesses the group
+	 * data.  So we need to be careful to set all of the relevant
+	 * group descriptor data etc. *before* we enable the group.
+	 *
+	 * The key field here is sbi->s_groups_count: as long as
+	 * that retains its old value, nobody is going to access the new
+	 * group.
+	 *
+	 * So first we update all the descriptor metadata for the new
+	 * group; then we update the total disk blocks count; then we
+	 * update the groups count to enable the group; then finally we
+	 * update the free space counts so that the system can start
+	 * using the new disk blocks.
+	 */
+
+	/* Update group descriptor block for new group */
+	gdp = (struct ext3_group_desc *)primary->b_data + gdb_off;
+
+	gdp->bg_block_bitmap = cpu_to_le32(input->block_bitmap);
+	gdp->bg_inode_bitmap = cpu_to_le32(input->inode_bitmap);
+	gdp->bg_inode_table = cpu_to_le32(input->inode_table);
+	gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
+	gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb));
+
+	/*
+	 * Make the new blocks and inodes valid next.  We do this before
+	 * increasing the group count so that once the group is enabled,
+	 * all of its blocks and inodes are already valid.
+	 *
+	 * We always allocate group-by-group, then block-by-block or
+	 * inode-by-inode within a group, so enabling these
+	 * blocks/inodes before the group is live won't actually let us
+	 * allocate the new space yet.
+	 */
+	es->s_blocks_count = cpu_to_le32(le32_to_cpu(es->s_blocks_count) +
+		input->blocks_count);
+	es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) +
+		EXT3_INODES_PER_GROUP(sb));
+
+	/*
+	 * We need to protect s_groups_count against other CPUs seeing
+	 * inconsistent state in the superblock.
+	 *
+	 * The precise rules we use are:
+	 *
+	 * * Writers of s_groups_count *must* hold lock_super
+	 * AND
+	 * * Writers must perform a smp_wmb() after updating all dependent
+	 *   data and before modifying the groups count
+	 *
+	 * * Readers must hold lock_super() over the access
+	 * OR
+	 * * Readers must perform an smp_rmb() after reading the groups count
+	 *   and before reading any dependent data.
+	 *
+	 * NB. These rules can be relaxed when checking the group count
+	 * while freeing data, as we can only allocate from a block
+	 * group after serialising against the group count, and we can
+	 * only then free after serialising in turn against that
+	 * allocation.
+	 */
+	smp_wmb();
+
+	/* Update the global fs size fields */
+	sbi->s_groups_count++;
+
+	ext3_journal_dirty_metadata(handle, primary);
+
+	/* Update the reserved block counts only once the new group is
+	 * active. */
+	es->s_r_blocks_count = cpu_to_le32(le32_to_cpu(es->s_r_blocks_count) +
+		input->reserved_blocks);
+
+	/* Update the free space counts */
+	percpu_counter_mod(&sbi->s_freeblocks_counter,
+			   input->free_blocks_count);
+	percpu_counter_mod(&sbi->s_freeinodes_counter,
+			   EXT3_INODES_PER_GROUP(sb));
+
+	ext3_journal_dirty_metadata(handle, sbi->s_sbh);
+	sb->s_dirt = 1;
+
+exit_journal:
+	unlock_super(sb);
+	if ((err2 = ext3_journal_stop(handle)) && !err)
+		err = err2;
+	if (!err) {
+		update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
+			       sizeof(struct ext3_super_block));
+		update_backups(sb, primary->b_blocknr, primary->b_data,
+			       primary->b_size);
+	}
+exit_put:
+	iput(inode);
+	return err;
+} /* ext3_group_add */
+
+/* Extend the filesystem to the new number of blocks specified.  This entry
+ * point is only used to extend the current filesystem to the end of the last
+ * existing group.  It can be accessed via ioctl, or by "remount,resize=<size>"
+ * for emergencies (because it has no dependencies on reserved blocks).
+ *
+ * If we _really_ wanted, we could use default values to call ext3_group_add()
+ * allow the "remount" trick to work for arbitrary resizing, assuming enough
+ * GDT blocks are reserved to grow to the desired size.
+ */
+int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
+		      ext3_fsblk_t n_blocks_count)
+{
+	ext3_fsblk_t o_blocks_count;
+	unsigned long o_groups_count;
+	ext3_grpblk_t last;
+	ext3_grpblk_t add;
+	struct buffer_head * bh;
+	handle_t *handle;
+	int err;
+	unsigned long freed_blocks;
+
+	/* We don't need to worry about locking wrt other resizers just
+	 * yet: we're going to revalidate es->s_blocks_count after
+	 * taking lock_super() below. */
+	o_blocks_count = le32_to_cpu(es->s_blocks_count);
+	o_groups_count = EXT3_SB(sb)->s_groups_count;
+
+	if (test_opt(sb, DEBUG))
+		printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK" uto "E3FSBLK" blocks\n",
+		       o_blocks_count, n_blocks_count);
+
+	if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
+		return 0;
+
+	if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
+		printk(KERN_ERR "EXT3-fs: filesystem on %s:"
+			" too large to resize to %lu blocks safely\n",
+			sb->s_id, n_blocks_count);
+		if (sizeof(sector_t) < 8)
+			ext3_warning(sb, __FUNCTION__,
+			"CONFIG_LBD not enabled\n");
+		return -EINVAL;
+	}
+
+	if (n_blocks_count < o_blocks_count) {
+		ext3_warning(sb, __FUNCTION__,
+			     "can't shrink FS - resize aborted");
+		return -EBUSY;
+	}
+
+	/* Handle the remaining blocks in the last group only. */
+	last = (o_blocks_count - le32_to_cpu(es->s_first_data_block)) %
+		EXT3_BLOCKS_PER_GROUP(sb);
+
+	if (last == 0) {
+		ext3_warning(sb, __FUNCTION__,
+			     "need to use ext2online to resize further");
+		return -EPERM;
+	}
+
+	add = EXT3_BLOCKS_PER_GROUP(sb) - last;
+
+	if (o_blocks_count + add < o_blocks_count) {
+		ext3_warning(sb, __FUNCTION__, "blocks_count overflow");
+		return -EINVAL;
+	}
+
+	if (o_blocks_count + add > n_blocks_count)
+		add = n_blocks_count - o_blocks_count;
+
+	if (o_blocks_count + add < n_blocks_count)
+		ext3_warning(sb, __FUNCTION__,
+			     "will only finish group ("E3FSBLK
+			     " blocks, %u new)",
+			     o_blocks_count + add, add);
+
+	/* See if the device is actually as big as what was requested */
+	bh = sb_bread(sb, o_blocks_count + add -1);
+	if (!bh) {
+		ext3_warning(sb, __FUNCTION__,
+			     "can't read last block, resize aborted");
+		return -ENOSPC;
+	}
+	brelse(bh);
+
+	/* We will update the superblock, one block bitmap, and
+	 * one group descriptor via ext3_free_blocks().
+	 */
+	handle = ext3_journal_start_sb(sb, 3);
+	if (IS_ERR(handle)) {
+		err = PTR_ERR(handle);
+		ext3_warning(sb, __FUNCTION__, "error %d on journal start",err);
+		goto exit_put;
+	}
+
+	lock_super(sb);
+	if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
+		ext3_warning(sb, __FUNCTION__,
+			     "multiple resizers run on filesystem!");
+		unlock_super(sb);
+		err = -EBUSY;
+		goto exit_put;
+	}
+
+	if ((err = ext3_journal_get_write_access(handle,
+						 EXT3_SB(sb)->s_sbh))) {
+		ext3_warning(sb, __FUNCTION__,
+			     "error %d on journal write access", err);
+		unlock_super(sb);
+		ext3_journal_stop(handle);
+		goto exit_put;
+	}
+	es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
+	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+	sb->s_dirt = 1;
+	unlock_super(sb);
+	ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
+		   o_blocks_count + add);
+	ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
+	ext3_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n", o_blocks_count,
+		   o_blocks_count + add);
+	if ((err = ext3_journal_stop(handle)))
+		goto exit_put;
+	if (test_opt(sb, DEBUG))
+		printk(KERN_DEBUG "EXT3-fs: extended group to %u blocks\n",
+		       le32_to_cpu(es->s_blocks_count));
+	update_backups(sb, EXT3_SB(sb)->s_sbh->b_blocknr, (char *)es,
+		       sizeof(struct ext3_super_block));
+exit_put:
+	return err;
+} /* ext3_group_extend */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
new file mode 100644
index 000000000000..8bfd56ef18ca
--- /dev/null
+++ b/fs/ext4/super.c
@@ -0,0 +1,2754 @@
+/*
+ *  linux/fs/ext3/super.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  from
+ *
+ *  linux/fs/minix/inode.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Big-endian to little-endian byte-swapping/bitmaps by
+ *        David S. Miller (davem@caip.rutgers.edu), 1995
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/time.h>
+#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
+#include <linux/ext3_jbd.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/parser.h>
+#include <linux/smp_lock.h>
+#include <linux/buffer_head.h>
+#include <linux/vfs.h>
+#include <linux/random.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/quotaops.h>
+#include <linux/seq_file.h>
+
+#include <asm/uaccess.h>
+
+#include "xattr.h"
+#include "acl.h"
+#include "namei.h"
+
+static int ext3_load_journal(struct super_block *, struct ext3_super_block *,
+			     unsigned long journal_devnum);
+static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
+			       unsigned int);
+static void ext3_commit_super (struct super_block * sb,
+			       struct ext3_super_block * es,
+			       int sync);
+static void ext3_mark_recovery_complete(struct super_block * sb,
+					struct ext3_super_block * es);
+static void ext3_clear_journal_err(struct super_block * sb,
+				   struct ext3_super_block * es);
+static int ext3_sync_fs(struct super_block *sb, int wait);
+static const char *ext3_decode_error(struct super_block * sb, int errno,
+				     char nbuf[16]);
+static int ext3_remount (struct super_block * sb, int * flags, char * data);
+static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf);
+static void ext3_unlockfs(struct super_block *sb);
+static void ext3_write_super (struct super_block * sb);
+static void ext3_write_super_lockfs(struct super_block *sb);
+
+/*
+ * Wrappers for journal_start/end.
+ *
+ * The only special thing we need to do here is to make sure that all
+ * journal_end calls result in the superblock being marked dirty, so
+ * that sync() will call the filesystem's write_super callback if
+ * appropriate.
+ */
+handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
+{
+	journal_t *journal;
+
+	if (sb->s_flags & MS_RDONLY)
+		return ERR_PTR(-EROFS);
+
+	/* Special case here: if the journal has aborted behind our
+	 * backs (eg. EIO in the commit thread), then we still need to
+	 * take the FS itself readonly cleanly. */
+	journal = EXT3_SB(sb)->s_journal;
+	if (is_journal_aborted(journal)) {
+		ext3_abort(sb, __FUNCTION__,
+			   "Detected aborted journal");
+		return ERR_PTR(-EROFS);
+	}
+
+	return journal_start(journal, nblocks);
+}
+
+/*
+ * The only special thing we need to do here is to make sure that all
+ * journal_stop calls result in the superblock being marked dirty, so
+ * that sync() will call the filesystem's write_super callback if
+ * appropriate.
+ */
+int __ext3_journal_stop(const char *where, handle_t *handle)
+{
+	struct super_block *sb;
+	int err;
+	int rc;
+
+	sb = handle->h_transaction->t_journal->j_private;
+	err = handle->h_err;
+	rc = journal_stop(handle);
+
+	if (!err)
+		err = rc;
+	if (err)
+		__ext3_std_error(sb, where, err);
+	return err;
+}
+
+void ext3_journal_abort_handle(const char *caller, const char *err_fn,
+		struct buffer_head *bh, handle_t *handle, int err)
+{
+	char nbuf[16];
+	const char *errstr = ext3_decode_error(NULL, err, nbuf);
+
+	if (bh)
+		BUFFER_TRACE(bh, "abort");
+
+	if (!handle->h_err)
+		handle->h_err = err;
+
+	if (is_handle_aborted(handle))
+		return;
+
+	printk(KERN_ERR "%s: aborting transaction: %s in %s\n",
+	       caller, errstr, err_fn);
+
+	journal_abort_handle(handle);
+}
+
+/* Deal with the reporting of failure conditions on a filesystem such as
+ * inconsistencies detected or read IO failures.
+ *
+ * On ext2, we can store the error state of the filesystem in the
+ * superblock.  That is not possible on ext3, because we may have other
+ * write ordering constraints on the superblock which prevent us from
+ * writing it out straight away; and given that the journal is about to
+ * be aborted, we can't rely on the current, or future, transactions to
+ * write out the superblock safely.
+ *
+ * We'll just use the journal_abort() error code to record an error in
+ * the journal instead.  On recovery, the journal will compain about
+ * that error until we've noted it down and cleared it.
+ */
+
+static void ext3_handle_error(struct super_block *sb)
+{
+	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
+
+	EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
+	es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
+
+	if (sb->s_flags & MS_RDONLY)
+		return;
+
+	if (!test_opt (sb, ERRORS_CONT)) {
+		journal_t *journal = EXT3_SB(sb)->s_journal;
+
+		EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
+		if (journal)
+			journal_abort(journal, -EIO);
+	}
+	if (test_opt (sb, ERRORS_RO)) {
+		printk (KERN_CRIT "Remounting filesystem read-only\n");
+		sb->s_flags |= MS_RDONLY;
+	}
+	ext3_commit_super(sb, es, 1);
+	if (test_opt(sb, ERRORS_PANIC))
+		panic("EXT3-fs (device %s): panic forced after error\n",
+			sb->s_id);
+}
+
+void ext3_error (struct super_block * sb, const char * function,
+		 const char * fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
+	vprintk(fmt, args);
+	printk("\n");
+	va_end(args);
+
+	ext3_handle_error(sb);
+}
+
+static const char *ext3_decode_error(struct super_block * sb, int errno,
+				     char nbuf[16])
+{
+	char *errstr = NULL;
+
+	switch (errno) {
+	case -EIO:
+		errstr = "IO failure";
+		break;
+	case -ENOMEM:
+		errstr = "Out of memory";
+		break;
+	case -EROFS:
+		if (!sb || EXT3_SB(sb)->s_journal->j_flags & JFS_ABORT)
+			errstr = "Journal has aborted";
+		else
+			errstr = "Readonly filesystem";
+		break;
+	default:
+		/* If the caller passed in an extra buffer for unknown
+		 * errors, textualise them now.  Else we just return
+		 * NULL. */
+		if (nbuf) {
+			/* Check for truncated error codes... */
+			if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
+				errstr = nbuf;
+		}
+		break;
+	}
+
+	return errstr;
+}
+
+/* __ext3_std_error decodes expected errors from journaling functions
+ * automatically and invokes the appropriate error response.  */
+
+void __ext3_std_error (struct super_block * sb, const char * function,
+		       int errno)
+{
+	char nbuf[16];
+	const char *errstr;
+
+	/* Special case: if the error is EROFS, and we're not already
+	 * inside a transaction, then there's really no point in logging
+	 * an error. */
+	if (errno == -EROFS && journal_current_handle() == NULL &&
+	    (sb->s_flags & MS_RDONLY))
+		return;
+
+	errstr = ext3_decode_error(sb, errno, nbuf);
+	printk (KERN_CRIT "EXT3-fs error (device %s) in %s: %s\n",
+		sb->s_id, function, errstr);
+
+	ext3_handle_error(sb);
+}
+
+/*
+ * ext3_abort is a much stronger failure handler than ext3_error.  The
+ * abort function may be used to deal with unrecoverable failures such
+ * as journal IO errors or ENOMEM at a critical moment in log management.
+ *
+ * We unconditionally force the filesystem into an ABORT|READONLY state,
+ * unless the error response on the fs has been set to panic in which
+ * case we take the easy way out and panic immediately.
+ */
+
+void ext3_abort (struct super_block * sb, const char * function,
+		 const char * fmt, ...)
+{
+	va_list args;
+
+	printk (KERN_CRIT "ext3_abort called.\n");
+
+	va_start(args, fmt);
+	printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
+	vprintk(fmt, args);
+	printk("\n");
+	va_end(args);
+
+	if (test_opt(sb, ERRORS_PANIC))
+		panic("EXT3-fs panic from previous error\n");
+
+	if (sb->s_flags & MS_RDONLY)
+		return;
+
+	printk(KERN_CRIT "Remounting filesystem read-only\n");
+	EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
+	sb->s_flags |= MS_RDONLY;
+	EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
+	journal_abort(EXT3_SB(sb)->s_journal, -EIO);
+}
+
+void ext3_warning (struct super_block * sb, const char * function,
+		   const char * fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	printk(KERN_WARNING "EXT3-fs warning (device %s): %s: ",
+	       sb->s_id, function);
+	vprintk(fmt, args);
+	printk("\n");
+	va_end(args);
+}
+
+void ext3_update_dynamic_rev(struct super_block *sb)
+{
+	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
+
+	if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV)
+		return;
+
+	ext3_warning(sb, __FUNCTION__,
+		     "updating to rev %d because of new feature flag, "
+		     "running e2fsck is recommended",
+		     EXT3_DYNAMIC_REV);
+
+	es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO);
+	es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE);
+	es->s_rev_level = cpu_to_le32(EXT3_DYNAMIC_REV);
+	/* leave es->s_feature_*compat flags alone */
+	/* es->s_uuid will be set by e2fsck if empty */
+
+	/*
+	 * The rest of the superblock fields should be zero, and if not it
+	 * means they are likely already in use, so leave them alone.  We
+	 * can leave it up to e2fsck to clean up any inconsistencies there.
+	 */
+}
+
+/*
+ * Open the external journal device
+ */
+static struct block_device *ext3_blkdev_get(dev_t dev)
+{
+	struct block_device *bdev;
+	char b[BDEVNAME_SIZE];
+
+	bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
+	if (IS_ERR(bdev))
+		goto fail;
+	return bdev;
+
+fail:
+	printk(KERN_ERR "EXT3: failed to open journal device %s: %ld\n",
+			__bdevname(dev, b), PTR_ERR(bdev));
+	return NULL;
+}
+
+/*
+ * Release the journal device
+ */
+static int ext3_blkdev_put(struct block_device *bdev)
+{
+	bd_release(bdev);
+	return blkdev_put(bdev);
+}
+
+static int ext3_blkdev_remove(struct ext3_sb_info *sbi)
+{
+	struct block_device *bdev;
+	int ret = -ENODEV;
+
+	bdev = sbi->journal_bdev;
+	if (bdev) {
+		ret = ext3_blkdev_put(bdev);
+		sbi->journal_bdev = NULL;
+	}
+	return ret;
+}
+
+static inline struct inode *orphan_list_entry(struct list_head *l)
+{
+	return &list_entry(l, struct ext3_inode_info, i_orphan)->vfs_inode;
+}
+
+static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi)
+{
+	struct list_head *l;
+
+	printk(KERN_ERR "sb orphan head is %d\n",
+	       le32_to_cpu(sbi->s_es->s_last_orphan));
+
+	printk(KERN_ERR "sb_info orphan list:\n");
+	list_for_each(l, &sbi->s_orphan) {
+		struct inode *inode = orphan_list_entry(l);
+		printk(KERN_ERR "  "
+		       "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
+		       inode->i_sb->s_id, inode->i_ino, inode,
+		       inode->i_mode, inode->i_nlink,
+		       NEXT_ORPHAN(inode));
+	}
+}
+
+static void ext3_put_super (struct super_block * sb)
+{
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext3_super_block *es = sbi->s_es;
+	int i;
+
+	ext3_xattr_put_super(sb);
+	journal_destroy(sbi->s_journal);
+	if (!(sb->s_flags & MS_RDONLY)) {
+		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+		es->s_state = cpu_to_le16(sbi->s_mount_state);
+		BUFFER_TRACE(sbi->s_sbh, "marking dirty");
+		mark_buffer_dirty(sbi->s_sbh);
+		ext3_commit_super(sb, es, 1);
+	}
+
+	for (i = 0; i < sbi->s_gdb_count; i++)
+		brelse(sbi->s_group_desc[i]);
+	kfree(sbi->s_group_desc);
+	percpu_counter_destroy(&sbi->s_freeblocks_counter);
+	percpu_counter_destroy(&sbi->s_freeinodes_counter);
+	percpu_counter_destroy(&sbi->s_dirs_counter);
+	brelse(sbi->s_sbh);
+#ifdef CONFIG_QUOTA
+	for (i = 0; i < MAXQUOTAS; i++)
+		kfree(sbi->s_qf_names[i]);
+#endif
+
+	/* Debugging code just in case the in-memory inode orphan list
+	 * isn't empty.  The on-disk one can be non-empty if we've
+	 * detected an error and taken the fs readonly, but the
+	 * in-memory list had better be clean by this point. */
+	if (!list_empty(&sbi->s_orphan))
+		dump_orphan_list(sb, sbi);
+	J_ASSERT(list_empty(&sbi->s_orphan));
+
+	invalidate_bdev(sb->s_bdev, 0);
+	if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
+		/*
+		 * Invalidate the journal device's buffers.  We don't want them
+		 * floating about in memory - the physical journal device may
+		 * hotswapped, and it breaks the `ro-after' testing code.
+		 */
+		sync_blockdev(sbi->journal_bdev);
+		invalidate_bdev(sbi->journal_bdev, 0);
+		ext3_blkdev_remove(sbi);
+	}
+	sb->s_fs_info = NULL;
+	kfree(sbi);
+	return;
+}
+
+static kmem_cache_t *ext3_inode_cachep;
+
+/*
+ * Called inside transaction, so use GFP_NOFS
+ */
+static struct inode *ext3_alloc_inode(struct super_block *sb)
+{
+	struct ext3_inode_info *ei;
+
+	ei = kmem_cache_alloc(ext3_inode_cachep, SLAB_NOFS);
+	if (!ei)
+		return NULL;
+#ifdef CONFIG_EXT3_FS_POSIX_ACL
+	ei->i_acl = EXT3_ACL_NOT_CACHED;
+	ei->i_default_acl = EXT3_ACL_NOT_CACHED;
+#endif
+	ei->i_block_alloc_info = NULL;
+	ei->vfs_inode.i_version = 1;
+	return &ei->vfs_inode;
+}
+
+static void ext3_destroy_inode(struct inode *inode)
+{
+	kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
+}
+
+static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+{
+	struct ext3_inode_info *ei = (struct ext3_inode_info *) foo;
+
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+	    SLAB_CTOR_CONSTRUCTOR) {
+		INIT_LIST_HEAD(&ei->i_orphan);
+#ifdef CONFIG_EXT3_FS_XATTR
+		init_rwsem(&ei->xattr_sem);
+#endif
+		mutex_init(&ei->truncate_mutex);
+		inode_init_once(&ei->vfs_inode);
+	}
+}
+
+static int init_inodecache(void)
+{
+	ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
+					     sizeof(struct ext3_inode_info),
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
+					     init_once, NULL);
+	if (ext3_inode_cachep == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+static void destroy_inodecache(void)
+{
+	kmem_cache_destroy(ext3_inode_cachep);
+}
+
+static void ext3_clear_inode(struct inode *inode)
+{
+	struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
+#ifdef CONFIG_EXT3_FS_POSIX_ACL
+	if (EXT3_I(inode)->i_acl &&
+			EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) {
+		posix_acl_release(EXT3_I(inode)->i_acl);
+		EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED;
+	}
+	if (EXT3_I(inode)->i_default_acl &&
+			EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) {
+		posix_acl_release(EXT3_I(inode)->i_default_acl);
+		EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED;
+	}
+#endif
+	ext3_discard_reservation(inode);
+	EXT3_I(inode)->i_block_alloc_info = NULL;
+	if (unlikely(rsv))
+		kfree(rsv);
+}
+
+static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb)
+{
+#if defined(CONFIG_QUOTA)
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+
+	if (sbi->s_jquota_fmt)
+		seq_printf(seq, ",jqfmt=%s",
+		(sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0");
+
+	if (sbi->s_qf_names[USRQUOTA])
+		seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
+
+	if (sbi->s_qf_names[GRPQUOTA])
+		seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
+
+	if (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA)
+		seq_puts(seq, ",usrquota");
+
+	if (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)
+		seq_puts(seq, ",grpquota");
+#endif
+}
+
+static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
+{
+	struct super_block *sb = vfs->mnt_sb;
+
+	if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA)
+		seq_puts(seq, ",data=journal");
+	else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA)
+		seq_puts(seq, ",data=ordered");
+	else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
+		seq_puts(seq, ",data=writeback");
+
+	ext3_show_quota_options(seq, sb);
+
+	return 0;
+}
+
+
+static struct dentry *ext3_get_dentry(struct super_block *sb, void *vobjp)
+{
+	__u32 *objp = vobjp;
+	unsigned long ino = objp[0];
+	__u32 generation = objp[1];
+	struct inode *inode;
+	struct dentry *result;
+
+	if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO)
+		return ERR_PTR(-ESTALE);
+	if (ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count))
+		return ERR_PTR(-ESTALE);
+
+	/* iget isn't really right if the inode is currently unallocated!!
+	 *
+	 * ext3_read_inode will return a bad_inode if the inode had been
+	 * deleted, so we should be safe.
+	 *
+	 * Currently we don't know the generation for parent directory, so
+	 * a generation of 0 means "accept any"
+	 */
+	inode = iget(sb, ino);
+	if (inode == NULL)
+		return ERR_PTR(-ENOMEM);
+	if (is_bad_inode(inode) ||
+	    (generation && inode->i_generation != generation)) {
+		iput(inode);
+		return ERR_PTR(-ESTALE);
+	}
+	/* now to find a dentry.
+	 * If possible, get a well-connected one
+	 */
+	result = d_alloc_anon(inode);
+	if (!result) {
+		iput(inode);
+		return ERR_PTR(-ENOMEM);
+	}
+	return result;
+}
+
+#ifdef CONFIG_QUOTA
+#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
+#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
+
+static int ext3_dquot_initialize(struct inode *inode, int type);
+static int ext3_dquot_drop(struct inode *inode);
+static int ext3_write_dquot(struct dquot *dquot);
+static int ext3_acquire_dquot(struct dquot *dquot);
+static int ext3_release_dquot(struct dquot *dquot);
+static int ext3_mark_dquot_dirty(struct dquot *dquot);
+static int ext3_write_info(struct super_block *sb, int type);
+static int ext3_quota_on(struct super_block *sb, int type, int format_id, char *path);
+static int ext3_quota_on_mount(struct super_block *sb, int type);
+static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
+			       size_t len, loff_t off);
+static ssize_t ext3_quota_write(struct super_block *sb, int type,
+				const char *data, size_t len, loff_t off);
+
+static struct dquot_operations ext3_quota_operations = {
+	.initialize	= ext3_dquot_initialize,
+	.drop		= ext3_dquot_drop,
+	.alloc_space	= dquot_alloc_space,
+	.alloc_inode	= dquot_alloc_inode,
+	.free_space	= dquot_free_space,
+	.free_inode	= dquot_free_inode,
+	.transfer	= dquot_transfer,
+	.write_dquot	= ext3_write_dquot,
+	.acquire_dquot	= ext3_acquire_dquot,
+	.release_dquot	= ext3_release_dquot,
+	.mark_dirty	= ext3_mark_dquot_dirty,
+	.write_info	= ext3_write_info
+};
+
+static struct quotactl_ops ext3_qctl_operations = {
+	.quota_on	= ext3_quota_on,
+	.quota_off	= vfs_quota_off,
+	.quota_sync	= vfs_quota_sync,
+	.get_info	= vfs_get_dqinfo,
+	.set_info	= vfs_set_dqinfo,
+	.get_dqblk	= vfs_get_dqblk,
+	.set_dqblk	= vfs_set_dqblk
+};
+#endif
+
+static struct super_operations ext3_sops = {
+	.alloc_inode	= ext3_alloc_inode,
+	.destroy_inode	= ext3_destroy_inode,
+	.read_inode	= ext3_read_inode,
+	.write_inode	= ext3_write_inode,
+	.dirty_inode	= ext3_dirty_inode,
+	.delete_inode	= ext3_delete_inode,
+	.put_super	= ext3_put_super,
+	.write_super	= ext3_write_super,
+	.sync_fs	= ext3_sync_fs,
+	.write_super_lockfs = ext3_write_super_lockfs,
+	.unlockfs	= ext3_unlockfs,
+	.statfs		= ext3_statfs,
+	.remount_fs	= ext3_remount,
+	.clear_inode	= ext3_clear_inode,
+	.show_options	= ext3_show_options,
+#ifdef CONFIG_QUOTA
+	.quota_read	= ext3_quota_read,
+	.quota_write	= ext3_quota_write,
+#endif
+};
+
+static struct export_operations ext3_export_ops = {
+	.get_parent = ext3_get_parent,
+	.get_dentry = ext3_get_dentry,
+};
+
+enum {
+	Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
+	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
+	Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
+	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
+	Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
+	Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
+	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
+	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
+	Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
+	Opt_grpquota
+};
+
+static match_table_t tokens = {
+	{Opt_bsd_df, "bsddf"},
+	{Opt_minix_df, "minixdf"},
+	{Opt_grpid, "grpid"},
+	{Opt_grpid, "bsdgroups"},
+	{Opt_nogrpid, "nogrpid"},
+	{Opt_nogrpid, "sysvgroups"},
+	{Opt_resgid, "resgid=%u"},
+	{Opt_resuid, "resuid=%u"},
+	{Opt_sb, "sb=%u"},
+	{Opt_err_cont, "errors=continue"},
+	{Opt_err_panic, "errors=panic"},
+	{Opt_err_ro, "errors=remount-ro"},
+	{Opt_nouid32, "nouid32"},
+	{Opt_nocheck, "nocheck"},
+	{Opt_nocheck, "check=none"},
+	{Opt_debug, "debug"},
+	{Opt_oldalloc, "oldalloc"},
+	{Opt_orlov, "orlov"},
+	{Opt_user_xattr, "user_xattr"},
+	{Opt_nouser_xattr, "nouser_xattr"},
+	{Opt_acl, "acl"},
+	{Opt_noacl, "noacl"},
+	{Opt_reservation, "reservation"},
+	{Opt_noreservation, "noreservation"},
+	{Opt_noload, "noload"},
+	{Opt_nobh, "nobh"},
+	{Opt_bh, "bh"},
+	{Opt_commit, "commit=%u"},
+	{Opt_journal_update, "journal=update"},
+	{Opt_journal_inum, "journal=%u"},
+	{Opt_journal_dev, "journal_dev=%u"},
+	{Opt_abort, "abort"},
+	{Opt_data_journal, "data=journal"},
+	{Opt_data_ordered, "data=ordered"},
+	{Opt_data_writeback, "data=writeback"},
+	{Opt_offusrjquota, "usrjquota="},
+	{Opt_usrjquota, "usrjquota=%s"},
+	{Opt_offgrpjquota, "grpjquota="},
+	{Opt_grpjquota, "grpjquota=%s"},
+	{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
+	{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
+	{Opt_grpquota, "grpquota"},
+	{Opt_noquota, "noquota"},
+	{Opt_quota, "quota"},
+	{Opt_usrquota, "usrquota"},
+	{Opt_barrier, "barrier=%u"},
+	{Opt_err, NULL},
+	{Opt_resize, "resize"},
+};
+
+static ext3_fsblk_t get_sb_block(void **data)
+{
+	ext3_fsblk_t	sb_block;
+	char		*options = (char *) *data;
+
+	if (!options || strncmp(options, "sb=", 3) != 0)
+		return 1;	/* Default location */
+	options += 3;
+	/*todo: use simple_strtoll with >32bit ext3 */
+	sb_block = simple_strtoul(options, &options, 0);
+	if (*options && *options != ',') {
+		printk("EXT3-fs: Invalid sb specification: %s\n",
+		       (char *) *data);
+		return 1;
+	}
+	if (*options == ',')
+		options++;
+	*data = (void *) options;
+	return sb_block;
+}
+
+static int parse_options (char *options, struct super_block *sb,
+			  unsigned int *inum, unsigned long *journal_devnum,
+			  ext3_fsblk_t *n_blocks_count, int is_remount)
+{
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	char * p;
+	substring_t args[MAX_OPT_ARGS];
+	int data_opt = 0;
+	int option;
+#ifdef CONFIG_QUOTA
+	int qtype;
+	char *qname;
+#endif
+
+	if (!options)
+		return 1;
+
+	while ((p = strsep (&options, ",")) != NULL) {
+		int token;
+		if (!*p)
+			continue;
+
+		token = match_token(p, tokens, args);
+		switch (token) {
+		case Opt_bsd_df:
+			clear_opt (sbi->s_mount_opt, MINIX_DF);
+			break;
+		case Opt_minix_df:
+			set_opt (sbi->s_mount_opt, MINIX_DF);
+			break;
+		case Opt_grpid:
+			set_opt (sbi->s_mount_opt, GRPID);
+			break;
+		case Opt_nogrpid:
+			clear_opt (sbi->s_mount_opt, GRPID);
+			break;
+		case Opt_resuid:
+			if (match_int(&args[0], &option))
+				return 0;
+			sbi->s_resuid = option;
+			break;
+		case Opt_resgid:
+			if (match_int(&args[0], &option))
+				return 0;
+			sbi->s_resgid = option;
+			break;
+		case Opt_sb:
+			/* handled by get_sb_block() instead of here */
+			/* *sb_block = match_int(&args[0]); */
+			break;
+		case Opt_err_panic:
+			clear_opt (sbi->s_mount_opt, ERRORS_CONT);
+			clear_opt (sbi->s_mount_opt, ERRORS_RO);
+			set_opt (sbi->s_mount_opt, ERRORS_PANIC);
+			break;
+		case Opt_err_ro:
+			clear_opt (sbi->s_mount_opt, ERRORS_CONT);
+			clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
+			set_opt (sbi->s_mount_opt, ERRORS_RO);
+			break;
+		case Opt_err_cont:
+			clear_opt (sbi->s_mount_opt, ERRORS_RO);
+			clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
+			set_opt (sbi->s_mount_opt, ERRORS_CONT);
+			break;
+		case Opt_nouid32:
+			set_opt (sbi->s_mount_opt, NO_UID32);
+			break;
+		case Opt_nocheck:
+			clear_opt (sbi->s_mount_opt, CHECK);
+			break;
+		case Opt_debug:
+			set_opt (sbi->s_mount_opt, DEBUG);
+			break;
+		case Opt_oldalloc:
+			set_opt (sbi->s_mount_opt, OLDALLOC);
+			break;
+		case Opt_orlov:
+			clear_opt (sbi->s_mount_opt, OLDALLOC);
+			break;
+#ifdef CONFIG_EXT3_FS_XATTR
+		case Opt_user_xattr:
+			set_opt (sbi->s_mount_opt, XATTR_USER);
+			break;
+		case Opt_nouser_xattr:
+			clear_opt (sbi->s_mount_opt, XATTR_USER);
+			break;
+#else
+		case Opt_user_xattr:
+		case Opt_nouser_xattr:
+			printk("EXT3 (no)user_xattr options not supported\n");
+			break;
+#endif
+#ifdef CONFIG_EXT3_FS_POSIX_ACL
+		case Opt_acl:
+			set_opt(sbi->s_mount_opt, POSIX_ACL);
+			break;
+		case Opt_noacl:
+			clear_opt(sbi->s_mount_opt, POSIX_ACL);
+			break;
+#else
+		case Opt_acl:
+		case Opt_noacl:
+			printk("EXT3 (no)acl options not supported\n");
+			break;
+#endif
+		case Opt_reservation:
+			set_opt(sbi->s_mount_opt, RESERVATION);
+			break;
+		case Opt_noreservation:
+			clear_opt(sbi->s_mount_opt, RESERVATION);
+			break;
+		case Opt_journal_update:
+			/* @@@ FIXME */
+			/* Eventually we will want to be able to create
+			   a journal file here.  For now, only allow the
+			   user to specify an existing inode to be the
+			   journal file. */
+			if (is_remount) {
+				printk(KERN_ERR "EXT3-fs: cannot specify "
+				       "journal on remount\n");
+				return 0;
+			}
+			set_opt (sbi->s_mount_opt, UPDATE_JOURNAL);
+			break;
+		case Opt_journal_inum:
+			if (is_remount) {
+				printk(KERN_ERR "EXT3-fs: cannot specify "
+				       "journal on remount\n");
+				return 0;
+			}
+			if (match_int(&args[0], &option))
+				return 0;
+			*inum = option;
+			break;
+		case Opt_journal_dev:
+			if (is_remount) {
+				printk(KERN_ERR "EXT3-fs: cannot specify "
+				       "journal on remount\n");
+				return 0;
+			}
+			if (match_int(&args[0], &option))
+				return 0;
+			*journal_devnum = option;
+			break;
+		case Opt_noload:
+			set_opt (sbi->s_mount_opt, NOLOAD);
+			break;
+		case Opt_commit:
+			if (match_int(&args[0], &option))
+				return 0;
+			if (option < 0)
+				return 0;
+			if (option == 0)
+				option = JBD_DEFAULT_MAX_COMMIT_AGE;
+			sbi->s_commit_interval = HZ * option;
+			break;
+		case Opt_data_journal:
+			data_opt = EXT3_MOUNT_JOURNAL_DATA;
+			goto datacheck;
+		case Opt_data_ordered:
+			data_opt = EXT3_MOUNT_ORDERED_DATA;
+			goto datacheck;
+		case Opt_data_writeback:
+			data_opt = EXT3_MOUNT_WRITEBACK_DATA;
+		datacheck:
+			if (is_remount) {
+				if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS)
+						!= data_opt) {
+					printk(KERN_ERR
+						"EXT3-fs: cannot change data "
+						"mode on remount\n");
+					return 0;
+				}
+			} else {
+				sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS;
+				sbi->s_mount_opt |= data_opt;
+			}
+			break;
+#ifdef CONFIG_QUOTA
+		case Opt_usrjquota:
+			qtype = USRQUOTA;
+			goto set_qf_name;
+		case Opt_grpjquota:
+			qtype = GRPQUOTA;
+set_qf_name:
+			if (sb_any_quota_enabled(sb)) {
+				printk(KERN_ERR
+					"EXT3-fs: Cannot change journalled "
+					"quota options when quota turned on.\n");
+				return 0;
+			}
+			qname = match_strdup(&args[0]);
+			if (!qname) {
+				printk(KERN_ERR
+					"EXT3-fs: not enough memory for "
+					"storing quotafile name.\n");
+				return 0;
+			}
+			if (sbi->s_qf_names[qtype] &&
+			    strcmp(sbi->s_qf_names[qtype], qname)) {
+				printk(KERN_ERR
+					"EXT3-fs: %s quota file already "
+					"specified.\n", QTYPE2NAME(qtype));
+				kfree(qname);
+				return 0;
+			}
+			sbi->s_qf_names[qtype] = qname;
+			if (strchr(sbi->s_qf_names[qtype], '/')) {
+				printk(KERN_ERR
+					"EXT3-fs: quotafile must be on "
+					"filesystem root.\n");
+				kfree(sbi->s_qf_names[qtype]);
+				sbi->s_qf_names[qtype] = NULL;
+				return 0;
+			}
+			set_opt(sbi->s_mount_opt, QUOTA);
+			break;
+		case Opt_offusrjquota:
+			qtype = USRQUOTA;
+			goto clear_qf_name;
+		case Opt_offgrpjquota:
+			qtype = GRPQUOTA;
+clear_qf_name:
+			if (sb_any_quota_enabled(sb)) {
+				printk(KERN_ERR "EXT3-fs: Cannot change "
+					"journalled quota options when "
+					"quota turned on.\n");
+				return 0;
+			}
+			/*
+			 * The space will be released later when all options
+			 * are confirmed to be correct
+			 */
+			sbi->s_qf_names[qtype] = NULL;
+			break;
+		case Opt_jqfmt_vfsold:
+			sbi->s_jquota_fmt = QFMT_VFS_OLD;
+			break;
+		case Opt_jqfmt_vfsv0:
+			sbi->s_jquota_fmt = QFMT_VFS_V0;
+			break;
+		case Opt_quota:
+		case Opt_usrquota:
+			set_opt(sbi->s_mount_opt, QUOTA);
+			set_opt(sbi->s_mount_opt, USRQUOTA);
+			break;
+		case Opt_grpquota:
+			set_opt(sbi->s_mount_opt, QUOTA);
+			set_opt(sbi->s_mount_opt, GRPQUOTA);
+			break;
+		case Opt_noquota:
+			if (sb_any_quota_enabled(sb)) {
+				printk(KERN_ERR "EXT3-fs: Cannot change quota "
+					"options when quota turned on.\n");
+				return 0;
+			}
+			clear_opt(sbi->s_mount_opt, QUOTA);
+			clear_opt(sbi->s_mount_opt, USRQUOTA);
+			clear_opt(sbi->s_mount_opt, GRPQUOTA);
+			break;
+#else
+		case Opt_quota:
+		case Opt_usrquota:
+		case Opt_grpquota:
+		case Opt_usrjquota:
+		case Opt_grpjquota:
+		case Opt_offusrjquota:
+		case Opt_offgrpjquota:
+		case Opt_jqfmt_vfsold:
+		case Opt_jqfmt_vfsv0:
+			printk(KERN_ERR
+				"EXT3-fs: journalled quota options not "
+				"supported.\n");
+			break;
+		case Opt_noquota:
+			break;
+#endif
+		case Opt_abort:
+			set_opt(sbi->s_mount_opt, ABORT);
+			break;
+		case Opt_barrier:
+			if (match_int(&args[0], &option))
+				return 0;
+			if (option)
+				set_opt(sbi->s_mount_opt, BARRIER);
+			else
+				clear_opt(sbi->s_mount_opt, BARRIER);
+			break;
+		case Opt_ignore:
+			break;
+		case Opt_resize:
+			if (!is_remount) {
+				printk("EXT3-fs: resize option only available "
+					"for remount\n");
+				return 0;
+			}
+			if (match_int(&args[0], &option) != 0)
+				return 0;
+			*n_blocks_count = option;
+			break;
+		case Opt_nobh:
+			set_opt(sbi->s_mount_opt, NOBH);
+			break;
+		case Opt_bh:
+			clear_opt(sbi->s_mount_opt, NOBH);
+			break;
+		default:
+			printk (KERN_ERR
+				"EXT3-fs: Unrecognized mount option \"%s\" "
+				"or missing value\n", p);
+			return 0;
+		}
+	}
+#ifdef CONFIG_QUOTA
+	if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
+		if ((sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) &&
+		     sbi->s_qf_names[USRQUOTA])
+			clear_opt(sbi->s_mount_opt, USRQUOTA);
+
+		if ((sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) &&
+		     sbi->s_qf_names[GRPQUOTA])
+			clear_opt(sbi->s_mount_opt, GRPQUOTA);
+
+		if ((sbi->s_qf_names[USRQUOTA] &&
+				(sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)) ||
+		    (sbi->s_qf_names[GRPQUOTA] &&
+				(sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA))) {
+			printk(KERN_ERR "EXT3-fs: old and new quota "
+					"format mixing.\n");
+			return 0;
+		}
+
+		if (!sbi->s_jquota_fmt) {
+			printk(KERN_ERR "EXT3-fs: journalled quota format "
+					"not specified.\n");
+			return 0;
+		}
+	} else {
+		if (sbi->s_jquota_fmt) {
+			printk(KERN_ERR "EXT3-fs: journalled quota format "
+					"specified with no journalling "
+					"enabled.\n");
+			return 0;
+		}
+	}
+#endif
+	return 1;
+}
+
+static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
+			    int read_only)
+{
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	int res = 0;
+
+	if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) {
+		printk (KERN_ERR "EXT3-fs warning: revision level too high, "
+			"forcing read-only mode\n");
+		res = MS_RDONLY;
+	}
+	if (read_only)
+		return res;
+	if (!(sbi->s_mount_state & EXT3_VALID_FS))
+		printk (KERN_WARNING "EXT3-fs warning: mounting unchecked fs, "
+			"running e2fsck is recommended\n");
+	else if ((sbi->s_mount_state & EXT3_ERROR_FS))
+		printk (KERN_WARNING
+			"EXT3-fs warning: mounting fs with errors, "
+			"running e2fsck is recommended\n");
+	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
+		 le16_to_cpu(es->s_mnt_count) >=
+		 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
+		printk (KERN_WARNING
+			"EXT3-fs warning: maximal mount count reached, "
+			"running e2fsck is recommended\n");
+	else if (le32_to_cpu(es->s_checkinterval) &&
+		(le32_to_cpu(es->s_lastcheck) +
+			le32_to_cpu(es->s_checkinterval) <= get_seconds()))
+		printk (KERN_WARNING
+			"EXT3-fs warning: checktime reached, "
+			"running e2fsck is recommended\n");
+#if 0
+		/* @@@ We _will_ want to clear the valid bit if we find
+                   inconsistencies, to force a fsck at reboot.  But for
+                   a plain journaled filesystem we can keep it set as
+                   valid forever! :) */
+	es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT3_VALID_FS);
+#endif
+	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
+		es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
+	es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1);
+	es->s_mtime = cpu_to_le32(get_seconds());
+	ext3_update_dynamic_rev(sb);
+	EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+
+	ext3_commit_super(sb, es, 1);
+	if (test_opt(sb, DEBUG))
+		printk(KERN_INFO "[EXT3 FS bs=%lu, gc=%lu, "
+				"bpg=%lu, ipg=%lu, mo=%04lx]\n",
+			sb->s_blocksize,
+			sbi->s_groups_count,
+			EXT3_BLOCKS_PER_GROUP(sb),
+			EXT3_INODES_PER_GROUP(sb),
+			sbi->s_mount_opt);
+
+	printk(KERN_INFO "EXT3 FS on %s, ", sb->s_id);
+	if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
+		char b[BDEVNAME_SIZE];
+
+		printk("external journal on %s\n",
+			bdevname(EXT3_SB(sb)->s_journal->j_dev, b));
+	} else {
+		printk("internal journal\n");
+	}
+	return res;
+}
+
+/* Called at mount-time, super-block is locked */
+static int ext3_check_descriptors (struct super_block * sb)
+{
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	ext3_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
+	ext3_fsblk_t last_block;
+	struct ext3_group_desc * gdp = NULL;
+	int desc_block = 0;
+	int i;
+
+	ext3_debug ("Checking group descriptors");
+
+	for (i = 0; i < sbi->s_groups_count; i++)
+	{
+		if (i == sbi->s_groups_count - 1)
+			last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
+		else
+			last_block = first_block +
+				(EXT3_BLOCKS_PER_GROUP(sb) - 1);
+
+		if ((i % EXT3_DESC_PER_BLOCK(sb)) == 0)
+			gdp = (struct ext3_group_desc *)
+					sbi->s_group_desc[desc_block++]->b_data;
+		if (le32_to_cpu(gdp->bg_block_bitmap) < first_block ||
+		    le32_to_cpu(gdp->bg_block_bitmap) > last_block)
+		{
+			ext3_error (sb, "ext3_check_descriptors",
+				    "Block bitmap for group %d"
+				    " not in group (block %lu)!",
+				    i, (unsigned long)
+					le32_to_cpu(gdp->bg_block_bitmap));
+			return 0;
+		}
+		if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block ||
+		    le32_to_cpu(gdp->bg_inode_bitmap) > last_block)
+		{
+			ext3_error (sb, "ext3_check_descriptors",
+				    "Inode bitmap for group %d"
+				    " not in group (block %lu)!",
+				    i, (unsigned long)
+					le32_to_cpu(gdp->bg_inode_bitmap));
+			return 0;
+		}
+		if (le32_to_cpu(gdp->bg_inode_table) < first_block ||
+		    le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >
+		    last_block)
+		{
+			ext3_error (sb, "ext3_check_descriptors",
+				    "Inode table for group %d"
+				    " not in group (block %lu)!",
+				    i, (unsigned long)
+					le32_to_cpu(gdp->bg_inode_table));
+			return 0;
+		}
+		first_block += EXT3_BLOCKS_PER_GROUP(sb);
+		gdp++;
+	}
+
+	sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb));
+	sbi->s_es->s_free_inodes_count=cpu_to_le32(ext3_count_free_inodes(sb));
+	return 1;
+}
+
+
+/* ext3_orphan_cleanup() walks a singly-linked list of inodes (starting at
+ * the superblock) which were deleted from all directories, but held open by
+ * a process at the time of a crash.  We walk the list and try to delete these
+ * inodes at recovery time (only with a read-write filesystem).
+ *
+ * In order to keep the orphan inode chain consistent during traversal (in
+ * case of crash during recovery), we link each inode into the superblock
+ * orphan list_head and handle it the same way as an inode deletion during
+ * normal operation (which journals the operations for us).
+ *
+ * We only do an iget() and an iput() on each inode, which is very safe if we
+ * accidentally point at an in-use or already deleted inode.  The worst that
+ * can happen in this case is that we get a "bit already cleared" message from
+ * ext3_free_inode().  The only reason we would point at a wrong inode is if
+ * e2fsck was run on this filesystem, and it must have already done the orphan
+ * inode cleanup for us, so we can safely abort without any further action.
+ */
+static void ext3_orphan_cleanup (struct super_block * sb,
+				 struct ext3_super_block * es)
+{
+	unsigned int s_flags = sb->s_flags;
+	int nr_orphans = 0, nr_truncates = 0;
+#ifdef CONFIG_QUOTA
+	int i;
+#endif
+	if (!es->s_last_orphan) {
+		jbd_debug(4, "no orphan inodes to clean up\n");
+		return;
+	}
+
+	if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
+		if (es->s_last_orphan)
+			jbd_debug(1, "Errors on filesystem, "
+				  "clearing orphan list.\n");
+		es->s_last_orphan = 0;
+		jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
+		return;
+	}
+
+	if (s_flags & MS_RDONLY) {
+		printk(KERN_INFO "EXT3-fs: %s: orphan cleanup on readonly fs\n",
+		       sb->s_id);
+		sb->s_flags &= ~MS_RDONLY;
+	}
+#ifdef CONFIG_QUOTA
+	/* Needed for iput() to work correctly and not trash data */
+	sb->s_flags |= MS_ACTIVE;
+	/* Turn on quotas so that they are updated correctly */
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (EXT3_SB(sb)->s_qf_names[i]) {
+			int ret = ext3_quota_on_mount(sb, i);
+			if (ret < 0)
+				printk(KERN_ERR
+					"EXT3-fs: Cannot turn on journalled "
+					"quota: error %d\n", ret);
+		}
+	}
+#endif
+
+	while (es->s_last_orphan) {
+		struct inode *inode;
+
+		if (!(inode =
+		      ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) {
+			es->s_last_orphan = 0;
+			break;
+		}
+
+		list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
+		DQUOT_INIT(inode);
+		if (inode->i_nlink) {
+			printk(KERN_DEBUG
+				"%s: truncating inode %lu to %Ld bytes\n",
+				__FUNCTION__, inode->i_ino, inode->i_size);
+			jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
+				  inode->i_ino, inode->i_size);
+			ext3_truncate(inode);
+			nr_truncates++;
+		} else {
+			printk(KERN_DEBUG
+				"%s: deleting unreferenced inode %lu\n",
+				__FUNCTION__, inode->i_ino);
+			jbd_debug(2, "deleting unreferenced inode %lu\n",
+				  inode->i_ino);
+			nr_orphans++;
+		}
+		iput(inode);  /* The delete magic happens here! */
+	}
+
+#define PLURAL(x) (x), ((x)==1) ? "" : "s"
+
+	if (nr_orphans)
+		printk(KERN_INFO "EXT3-fs: %s: %d orphan inode%s deleted\n",
+		       sb->s_id, PLURAL(nr_orphans));
+	if (nr_truncates)
+		printk(KERN_INFO "EXT3-fs: %s: %d truncate%s cleaned up\n",
+		       sb->s_id, PLURAL(nr_truncates));
+#ifdef CONFIG_QUOTA
+	/* Turn quotas off */
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (sb_dqopt(sb)->files[i])
+			vfs_quota_off(sb, i);
+	}
+#endif
+	sb->s_flags = s_flags; /* Restore MS_RDONLY status */
+}
+
+#define log2(n) ffz(~(n))
+
+/*
+ * Maximal file size.  There is a direct, and {,double-,triple-}indirect
+ * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks.
+ * We need to be 1 filesystem block less than the 2^32 sector limit.
+ */
+static loff_t ext3_max_size(int bits)
+{
+	loff_t res = EXT3_NDIR_BLOCKS;
+	/* This constant is calculated to be the largest file size for a
+	 * dense, 4k-blocksize file such that the total number of
+	 * sectors in the file, including data and all indirect blocks,
+	 * does not exceed 2^32. */
+	const loff_t upper_limit = 0x1ff7fffd000LL;
+
+	res += 1LL << (bits-2);
+	res += 1LL << (2*(bits-2));
+	res += 1LL << (3*(bits-2));
+	res <<= bits;
+	if (res > upper_limit)
+		res = upper_limit;
+	return res;
+}
+
+static ext3_fsblk_t descriptor_loc(struct super_block *sb,
+				    ext3_fsblk_t logic_sb_block,
+				    int nr)
+{
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	unsigned long bg, first_meta_bg;
+	int has_super = 0;
+
+	first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
+
+	if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) ||
+	    nr < first_meta_bg)
+		return (logic_sb_block + nr + 1);
+	bg = sbi->s_desc_per_block * nr;
+	if (ext3_bg_has_super(sb, bg))
+		has_super = 1;
+	return (has_super + ext3_group_first_block_no(sb, bg));
+}
+
+
+static int ext3_fill_super (struct super_block *sb, void *data, int silent)
+{
+	struct buffer_head * bh;
+	struct ext3_super_block *es = NULL;
+	struct ext3_sb_info *sbi;
+	ext3_fsblk_t block;
+	ext3_fsblk_t sb_block = get_sb_block(&data);
+	ext3_fsblk_t logic_sb_block;
+	unsigned long offset = 0;
+	unsigned int journal_inum = 0;
+	unsigned long journal_devnum = 0;
+	unsigned long def_mount_opts;
+	struct inode *root;
+	int blocksize;
+	int hblock;
+	int db_count;
+	int i;
+	int needs_recovery;
+	__le32 features;
+
+	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
+	if (!sbi)
+		return -ENOMEM;
+	sb->s_fs_info = sbi;
+	sbi->s_mount_opt = 0;
+	sbi->s_resuid = EXT3_DEF_RESUID;
+	sbi->s_resgid = EXT3_DEF_RESGID;
+
+	unlock_kernel();
+
+	blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
+	if (!blocksize) {
+		printk(KERN_ERR "EXT3-fs: unable to set blocksize\n");
+		goto out_fail;
+	}
+
+	/*
+	 * The ext3 superblock will not be buffer aligned for other than 1kB
+	 * block sizes.  We need to calculate the offset from buffer start.
+	 */
+	if (blocksize != EXT3_MIN_BLOCK_SIZE) {
+		logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
+		offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
+	} else {
+		logic_sb_block = sb_block;
+	}
+
+	if (!(bh = sb_bread(sb, logic_sb_block))) {
+		printk (KERN_ERR "EXT3-fs: unable to read superblock\n");
+		goto out_fail;
+	}
+	/*
+	 * Note: s_es must be initialized as soon as possible because
+	 *       some ext3 macro-instructions depend on its value
+	 */
+	es = (struct ext3_super_block *) (((char *)bh->b_data) + offset);
+	sbi->s_es = es;
+	sb->s_magic = le16_to_cpu(es->s_magic);
+	if (sb->s_magic != EXT3_SUPER_MAGIC)
+		goto cantfind_ext3;
+
+	/* Set defaults before we parse the mount options */
+	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
+	if (def_mount_opts & EXT3_DEFM_DEBUG)
+		set_opt(sbi->s_mount_opt, DEBUG);
+	if (def_mount_opts & EXT3_DEFM_BSDGROUPS)
+		set_opt(sbi->s_mount_opt, GRPID);
+	if (def_mount_opts & EXT3_DEFM_UID16)
+		set_opt(sbi->s_mount_opt, NO_UID32);
+	if (def_mount_opts & EXT3_DEFM_XATTR_USER)
+		set_opt(sbi->s_mount_opt, XATTR_USER);
+	if (def_mount_opts & EXT3_DEFM_ACL)
+		set_opt(sbi->s_mount_opt, POSIX_ACL);
+	if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA)
+		sbi->s_mount_opt |= EXT3_MOUNT_JOURNAL_DATA;
+	else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED)
+		sbi->s_mount_opt |= EXT3_MOUNT_ORDERED_DATA;
+	else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK)
+		sbi->s_mount_opt |= EXT3_MOUNT_WRITEBACK_DATA;
+
+	if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC)
+		set_opt(sbi->s_mount_opt, ERRORS_PANIC);
+	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_RO)
+		set_opt(sbi->s_mount_opt, ERRORS_RO);
+
+	sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
+	sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
+
+	set_opt(sbi->s_mount_opt, RESERVATION);
+
+	if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
+			    NULL, 0))
+		goto failed_mount;
+
+	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+		((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
+
+	if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV &&
+	    (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) ||
+	     EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
+	     EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U)))
+		printk(KERN_WARNING
+		       "EXT3-fs warning: feature flags set on rev 0 fs, "
+		       "running e2fsck is recommended\n");
+	/*
+	 * Check feature flags regardless of the revision level, since we
+	 * previously didn't change the revision level when setting the flags,
+	 * so there is a chance incompat flags are set on a rev 0 filesystem.
+	 */
+	features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP);
+	if (features) {
+		printk(KERN_ERR "EXT3-fs: %s: couldn't mount because of "
+		       "unsupported optional features (%x).\n",
+		       sb->s_id, le32_to_cpu(features));
+		goto failed_mount;
+	}
+	features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP);
+	if (!(sb->s_flags & MS_RDONLY) && features) {
+		printk(KERN_ERR "EXT3-fs: %s: couldn't mount RDWR because of "
+		       "unsupported optional features (%x).\n",
+		       sb->s_id, le32_to_cpu(features));
+		goto failed_mount;
+	}
+	blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
+
+	if (blocksize < EXT3_MIN_BLOCK_SIZE ||
+	    blocksize > EXT3_MAX_BLOCK_SIZE) {
+		printk(KERN_ERR
+		       "EXT3-fs: Unsupported filesystem blocksize %d on %s.\n",
+		       blocksize, sb->s_id);
+		goto failed_mount;
+	}
+
+	hblock = bdev_hardsect_size(sb->s_bdev);
+	if (sb->s_blocksize != blocksize) {
+		/*
+		 * Make sure the blocksize for the filesystem is larger
+		 * than the hardware sectorsize for the machine.
+		 */
+		if (blocksize < hblock) {
+			printk(KERN_ERR "EXT3-fs: blocksize %d too small for "
+			       "device blocksize %d.\n", blocksize, hblock);
+			goto failed_mount;
+		}
+
+		brelse (bh);
+		sb_set_blocksize(sb, blocksize);
+		logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
+		offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
+		bh = sb_bread(sb, logic_sb_block);
+		if (!bh) {
+			printk(KERN_ERR
+			       "EXT3-fs: Can't read superblock on 2nd try.\n");
+			goto failed_mount;
+		}
+		es = (struct ext3_super_block *)(((char *)bh->b_data) + offset);
+		sbi->s_es = es;
+		if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
+			printk (KERN_ERR
+				"EXT3-fs: Magic mismatch, very weird !\n");
+			goto failed_mount;
+		}
+	}
+
+	sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits);
+
+	if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV) {
+		sbi->s_inode_size = EXT3_GOOD_OLD_INODE_SIZE;
+		sbi->s_first_ino = EXT3_GOOD_OLD_FIRST_INO;
+	} else {
+		sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
+		sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
+		if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) ||
+		    (sbi->s_inode_size & (sbi->s_inode_size - 1)) ||
+		    (sbi->s_inode_size > blocksize)) {
+			printk (KERN_ERR
+				"EXT3-fs: unsupported inode size: %d\n",
+				sbi->s_inode_size);
+			goto failed_mount;
+		}
+	}
+	sbi->s_frag_size = EXT3_MIN_FRAG_SIZE <<
+				   le32_to_cpu(es->s_log_frag_size);
+	if (blocksize != sbi->s_frag_size) {
+		printk(KERN_ERR
+		       "EXT3-fs: fragsize %lu != blocksize %u (unsupported)\n",
+		       sbi->s_frag_size, blocksize);
+		goto failed_mount;
+	}
+	sbi->s_frags_per_block = 1;
+	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
+	sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
+	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
+	if (EXT3_INODE_SIZE(sb) == 0)
+		goto cantfind_ext3;
+	sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb);
+	if (sbi->s_inodes_per_block == 0)
+		goto cantfind_ext3;
+	sbi->s_itb_per_group = sbi->s_inodes_per_group /
+					sbi->s_inodes_per_block;
+	sbi->s_desc_per_block = blocksize / sizeof(struct ext3_group_desc);
+	sbi->s_sbh = bh;
+	sbi->s_mount_state = le16_to_cpu(es->s_state);
+	sbi->s_addr_per_block_bits = log2(EXT3_ADDR_PER_BLOCK(sb));
+	sbi->s_desc_per_block_bits = log2(EXT3_DESC_PER_BLOCK(sb));
+	for (i=0; i < 4; i++)
+		sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
+	sbi->s_def_hash_version = es->s_def_hash_version;
+
+	if (sbi->s_blocks_per_group > blocksize * 8) {
+		printk (KERN_ERR
+			"EXT3-fs: #blocks per group too big: %lu\n",
+			sbi->s_blocks_per_group);
+		goto failed_mount;
+	}
+	if (sbi->s_frags_per_group > blocksize * 8) {
+		printk (KERN_ERR
+			"EXT3-fs: #fragments per group too big: %lu\n",
+			sbi->s_frags_per_group);
+		goto failed_mount;
+	}
+	if (sbi->s_inodes_per_group > blocksize * 8) {
+		printk (KERN_ERR
+			"EXT3-fs: #inodes per group too big: %lu\n",
+			sbi->s_inodes_per_group);
+		goto failed_mount;
+	}
+
+	if (le32_to_cpu(es->s_blocks_count) >
+		    (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
+		printk(KERN_ERR "EXT3-fs: filesystem on %s:"
+			" too large to mount safely\n", sb->s_id);
+		if (sizeof(sector_t) < 8)
+			printk(KERN_WARNING "EXT3-fs: CONFIG_LBD not "
+					"enabled\n");
+		goto failed_mount;
+	}
+
+	if (EXT3_BLOCKS_PER_GROUP(sb) == 0)
+		goto cantfind_ext3;
+	sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
+			       le32_to_cpu(es->s_first_data_block) - 1)
+				       / EXT3_BLOCKS_PER_GROUP(sb)) + 1;
+	db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) /
+		   EXT3_DESC_PER_BLOCK(sb);
+	sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
+				    GFP_KERNEL);
+	if (sbi->s_group_desc == NULL) {
+		printk (KERN_ERR "EXT3-fs: not enough memory\n");
+		goto failed_mount;
+	}
+
+	bgl_lock_init(&sbi->s_blockgroup_lock);
+
+	for (i = 0; i < db_count; i++) {
+		block = descriptor_loc(sb, logic_sb_block, i);
+		sbi->s_group_desc[i] = sb_bread(sb, block);
+		if (!sbi->s_group_desc[i]) {
+			printk (KERN_ERR "EXT3-fs: "
+				"can't read group descriptor %d\n", i);
+			db_count = i;
+			goto failed_mount2;
+		}
+	}
+	if (!ext3_check_descriptors (sb)) {
+		printk(KERN_ERR "EXT3-fs: group descriptors corrupted!\n");
+		goto failed_mount2;
+	}
+	sbi->s_gdb_count = db_count;
+	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
+	spin_lock_init(&sbi->s_next_gen_lock);
+
+	percpu_counter_init(&sbi->s_freeblocks_counter,
+		ext3_count_free_blocks(sb));
+	percpu_counter_init(&sbi->s_freeinodes_counter,
+		ext3_count_free_inodes(sb));
+	percpu_counter_init(&sbi->s_dirs_counter,
+		ext3_count_dirs(sb));
+
+	/* per fileystem reservation list head & lock */
+	spin_lock_init(&sbi->s_rsv_window_lock);
+	sbi->s_rsv_window_root = RB_ROOT;
+	/* Add a single, static dummy reservation to the start of the
+	 * reservation window list --- it gives us a placeholder for
+	 * append-at-start-of-list which makes the allocation logic
+	 * _much_ simpler. */
+	sbi->s_rsv_window_head.rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+	sbi->s_rsv_window_head.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+	sbi->s_rsv_window_head.rsv_alloc_hit = 0;
+	sbi->s_rsv_window_head.rsv_goal_size = 0;
+	ext3_rsv_window_add(sb, &sbi->s_rsv_window_head);
+
+	/*
+	 * set up enough so that it can read an inode
+	 */
+	sb->s_op = &ext3_sops;
+	sb->s_export_op = &ext3_export_ops;
+	sb->s_xattr = ext3_xattr_handlers;
+#ifdef CONFIG_QUOTA
+	sb->s_qcop = &ext3_qctl_operations;
+	sb->dq_op = &ext3_quota_operations;
+#endif
+	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
+
+	sb->s_root = NULL;
+
+	needs_recovery = (es->s_last_orphan != 0 ||
+			  EXT3_HAS_INCOMPAT_FEATURE(sb,
+				    EXT3_FEATURE_INCOMPAT_RECOVER));
+
+	/*
+	 * The first inode we look at is the journal inode.  Don't try
+	 * root first: it may be modified in the journal!
+	 */
+	if (!test_opt(sb, NOLOAD) &&
+	    EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
+		if (ext3_load_journal(sb, es, journal_devnum))
+			goto failed_mount3;
+	} else if (journal_inum) {
+		if (ext3_create_journal(sb, es, journal_inum))
+			goto failed_mount3;
+	} else {
+		if (!silent)
+			printk (KERN_ERR
+				"ext3: No journal on filesystem on %s\n",
+				sb->s_id);
+		goto failed_mount3;
+	}
+
+	/* We have now updated the journal if required, so we can
+	 * validate the data journaling mode. */
+	switch (test_opt(sb, DATA_FLAGS)) {
+	case 0:
+		/* No mode set, assume a default based on the journal
+                   capabilities: ORDERED_DATA if the journal can
+                   cope, else JOURNAL_DATA */
+		if (journal_check_available_features
+		    (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE))
+			set_opt(sbi->s_mount_opt, ORDERED_DATA);
+		else
+			set_opt(sbi->s_mount_opt, JOURNAL_DATA);
+		break;
+
+	case EXT3_MOUNT_ORDERED_DATA:
+	case EXT3_MOUNT_WRITEBACK_DATA:
+		if (!journal_check_available_features
+		    (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) {
+			printk(KERN_ERR "EXT3-fs: Journal does not support "
+			       "requested data journaling mode\n");
+			goto failed_mount4;
+		}
+	default:
+		break;
+	}
+
+	if (test_opt(sb, NOBH)) {
+		if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
+			printk(KERN_WARNING "EXT3-fs: Ignoring nobh option - "
+				"its supported only with writeback mode\n");
+			clear_opt(sbi->s_mount_opt, NOBH);
+		}
+	}
+	/*
+	 * The journal_load will have done any necessary log recovery,
+	 * so we can safely mount the rest of the filesystem now.
+	 */
+
+	root = iget(sb, EXT3_ROOT_INO);
+	sb->s_root = d_alloc_root(root);
+	if (!sb->s_root) {
+		printk(KERN_ERR "EXT3-fs: get root inode failed\n");
+		iput(root);
+		goto failed_mount4;
+	}
+	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
+		dput(sb->s_root);
+		sb->s_root = NULL;
+		printk(KERN_ERR "EXT3-fs: corrupt root inode, run e2fsck\n");
+		goto failed_mount4;
+	}
+
+	ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
+	/*
+	 * akpm: core read_super() calls in here with the superblock locked.
+	 * That deadlocks, because orphan cleanup needs to lock the superblock
+	 * in numerous places.  Here we just pop the lock - it's relatively
+	 * harmless, because we are now ready to accept write_super() requests,
+	 * and aviro says that's the only reason for hanging onto the
+	 * superblock lock.
+	 */
+	EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
+	ext3_orphan_cleanup(sb, es);
+	EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
+	if (needs_recovery)
+		printk (KERN_INFO "EXT3-fs: recovery complete.\n");
+	ext3_mark_recovery_complete(sb, es);
+	printk (KERN_INFO "EXT3-fs: mounted filesystem with %s data mode.\n",
+		test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
+		test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
+		"writeback");
+
+	lock_kernel();
+	return 0;
+
+cantfind_ext3:
+	if (!silent)
+		printk(KERN_ERR "VFS: Can't find ext3 filesystem on dev %s.\n",
+		       sb->s_id);
+	goto failed_mount;
+
+failed_mount4:
+	journal_destroy(sbi->s_journal);
+failed_mount3:
+	percpu_counter_destroy(&sbi->s_freeblocks_counter);
+	percpu_counter_destroy(&sbi->s_freeinodes_counter);
+	percpu_counter_destroy(&sbi->s_dirs_counter);
+failed_mount2:
+	for (i = 0; i < db_count; i++)
+		brelse(sbi->s_group_desc[i]);
+	kfree(sbi->s_group_desc);
+failed_mount:
+#ifdef CONFIG_QUOTA
+	for (i = 0; i < MAXQUOTAS; i++)
+		kfree(sbi->s_qf_names[i]);
+#endif
+	ext3_blkdev_remove(sbi);
+	brelse(bh);
+out_fail:
+	sb->s_fs_info = NULL;
+	kfree(sbi);
+	lock_kernel();
+	return -EINVAL;
+}
+
+/*
+ * Setup any per-fs journal parameters now.  We'll do this both on
+ * initial mount, once the journal has been initialised but before we've
+ * done any recovery; and again on any subsequent remount.
+ */
+static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
+{
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+
+	if (sbi->s_commit_interval)
+		journal->j_commit_interval = sbi->s_commit_interval;
+	/* We could also set up an ext3-specific default for the commit
+	 * interval here, but for now we'll just fall back to the jbd
+	 * default. */
+
+	spin_lock(&journal->j_state_lock);
+	if (test_opt(sb, BARRIER))
+		journal->j_flags |= JFS_BARRIER;
+	else
+		journal->j_flags &= ~JFS_BARRIER;
+	spin_unlock(&journal->j_state_lock);
+}
+
+static journal_t *ext3_get_journal(struct super_block *sb,
+				   unsigned int journal_inum)
+{
+	struct inode *journal_inode;
+	journal_t *journal;
+
+	/* First, test for the existence of a valid inode on disk.  Bad
+	 * things happen if we iget() an unused inode, as the subsequent
+	 * iput() will try to delete it. */
+
+	journal_inode = iget(sb, journal_inum);
+	if (!journal_inode) {
+		printk(KERN_ERR "EXT3-fs: no journal found.\n");
+		return NULL;
+	}
+	if (!journal_inode->i_nlink) {
+		make_bad_inode(journal_inode);
+		iput(journal_inode);
+		printk(KERN_ERR "EXT3-fs: journal inode is deleted.\n");
+		return NULL;
+	}
+
+	jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
+		  journal_inode, journal_inode->i_size);
+	if (is_bad_inode(journal_inode) || !S_ISREG(journal_inode->i_mode)) {
+		printk(KERN_ERR "EXT3-fs: invalid journal inode.\n");
+		iput(journal_inode);
+		return NULL;
+	}
+
+	journal = journal_init_inode(journal_inode);
+	if (!journal) {
+		printk(KERN_ERR "EXT3-fs: Could not load journal inode\n");
+		iput(journal_inode);
+		return NULL;
+	}
+	journal->j_private = sb;
+	ext3_init_journal_params(sb, journal);
+	return journal;
+}
+
+static journal_t *ext3_get_dev_journal(struct super_block *sb,
+				       dev_t j_dev)
+{
+	struct buffer_head * bh;
+	journal_t *journal;
+	ext3_fsblk_t start;
+	ext3_fsblk_t len;
+	int hblock, blocksize;
+	ext3_fsblk_t sb_block;
+	unsigned long offset;
+	struct ext3_super_block * es;
+	struct block_device *bdev;
+
+	bdev = ext3_blkdev_get(j_dev);
+	if (bdev == NULL)
+		return NULL;
+
+	if (bd_claim(bdev, sb)) {
+		printk(KERN_ERR
+		        "EXT3: failed to claim external journal device.\n");
+		blkdev_put(bdev);
+		return NULL;
+	}
+
+	blocksize = sb->s_blocksize;
+	hblock = bdev_hardsect_size(bdev);
+	if (blocksize < hblock) {
+		printk(KERN_ERR
+			"EXT3-fs: blocksize too small for journal device.\n");
+		goto out_bdev;
+	}
+
+	sb_block = EXT3_MIN_BLOCK_SIZE / blocksize;
+	offset = EXT3_MIN_BLOCK_SIZE % blocksize;
+	set_blocksize(bdev, blocksize);
+	if (!(bh = __bread(bdev, sb_block, blocksize))) {
+		printk(KERN_ERR "EXT3-fs: couldn't read superblock of "
+		       "external journal\n");
+		goto out_bdev;
+	}
+
+	es = (struct ext3_super_block *) (((char *)bh->b_data) + offset);
+	if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) ||
+	    !(le32_to_cpu(es->s_feature_incompat) &
+	      EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
+		printk(KERN_ERR "EXT3-fs: external journal has "
+					"bad superblock\n");
+		brelse(bh);
+		goto out_bdev;
+	}
+
+	if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
+		printk(KERN_ERR "EXT3-fs: journal UUID does not match\n");
+		brelse(bh);
+		goto out_bdev;
+	}
+
+	len = le32_to_cpu(es->s_blocks_count);
+	start = sb_block + 1;
+	brelse(bh);	/* we're done with the superblock */
+
+	journal = journal_init_dev(bdev, sb->s_bdev,
+					start, len, blocksize);
+	if (!journal) {
+		printk(KERN_ERR "EXT3-fs: failed to create device journal\n");
+		goto out_bdev;
+	}
+	journal->j_private = sb;
+	ll_rw_block(READ, 1, &journal->j_sb_buffer);
+	wait_on_buffer(journal->j_sb_buffer);
+	if (!buffer_uptodate(journal->j_sb_buffer)) {
+		printk(KERN_ERR "EXT3-fs: I/O error on journal device\n");
+		goto out_journal;
+	}
+	if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
+		printk(KERN_ERR "EXT3-fs: External journal has more than one "
+					"user (unsupported) - %d\n",
+			be32_to_cpu(journal->j_superblock->s_nr_users));
+		goto out_journal;
+	}
+	EXT3_SB(sb)->journal_bdev = bdev;
+	ext3_init_journal_params(sb, journal);
+	return journal;
+out_journal:
+	journal_destroy(journal);
+out_bdev:
+	ext3_blkdev_put(bdev);
+	return NULL;
+}
+
+static int ext3_load_journal(struct super_block *sb,
+			     struct ext3_super_block *es,
+			     unsigned long journal_devnum)
+{
+	journal_t *journal;
+	unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
+	dev_t journal_dev;
+	int err = 0;
+	int really_read_only;
+
+	if (journal_devnum &&
+	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
+		printk(KERN_INFO "EXT3-fs: external journal device major/minor "
+			"numbers have changed\n");
+		journal_dev = new_decode_dev(journal_devnum);
+	} else
+		journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
+
+	really_read_only = bdev_read_only(sb->s_bdev);
+
+	/*
+	 * Are we loading a blank journal or performing recovery after a
+	 * crash?  For recovery, we need to check in advance whether we
+	 * can get read-write access to the device.
+	 */
+
+	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) {
+		if (sb->s_flags & MS_RDONLY) {
+			printk(KERN_INFO "EXT3-fs: INFO: recovery "
+					"required on readonly filesystem.\n");
+			if (really_read_only) {
+				printk(KERN_ERR "EXT3-fs: write access "
+					"unavailable, cannot proceed.\n");
+				return -EROFS;
+			}
+			printk (KERN_INFO "EXT3-fs: write access will "
+					"be enabled during recovery.\n");
+		}
+	}
+
+	if (journal_inum && journal_dev) {
+		printk(KERN_ERR "EXT3-fs: filesystem has both journal "
+		       "and inode journals!\n");
+		return -EINVAL;
+	}
+
+	if (journal_inum) {
+		if (!(journal = ext3_get_journal(sb, journal_inum)))
+			return -EINVAL;
+	} else {
+		if (!(journal = ext3_get_dev_journal(sb, journal_dev)))
+			return -EINVAL;
+	}
+
+	if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
+		err = journal_update_format(journal);
+		if (err)  {
+			printk(KERN_ERR "EXT3-fs: error updating journal.\n");
+			journal_destroy(journal);
+			return err;
+		}
+	}
+
+	if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER))
+		err = journal_wipe(journal, !really_read_only);
+	if (!err)
+		err = journal_load(journal);
+
+	if (err) {
+		printk(KERN_ERR "EXT3-fs: error loading journal.\n");
+		journal_destroy(journal);
+		return err;
+	}
+
+	EXT3_SB(sb)->s_journal = journal;
+	ext3_clear_journal_err(sb, es);
+
+	if (journal_devnum &&
+	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
+		es->s_journal_dev = cpu_to_le32(journal_devnum);
+		sb->s_dirt = 1;
+
+		/* Make sure we flush the recovery flag to disk. */
+		ext3_commit_super(sb, es, 1);
+	}
+
+	return 0;
+}
+
+static int ext3_create_journal(struct super_block * sb,
+			       struct ext3_super_block * es,
+			       unsigned int journal_inum)
+{
+	journal_t *journal;
+
+	if (sb->s_flags & MS_RDONLY) {
+		printk(KERN_ERR "EXT3-fs: readonly filesystem when trying to "
+				"create journal.\n");
+		return -EROFS;
+	}
+
+	if (!(journal = ext3_get_journal(sb, journal_inum)))
+		return -EINVAL;
+
+	printk(KERN_INFO "EXT3-fs: creating new journal on inode %u\n",
+	       journal_inum);
+
+	if (journal_create(journal)) {
+		printk(KERN_ERR "EXT3-fs: error creating journal.\n");
+		journal_destroy(journal);
+		return -EIO;
+	}
+
+	EXT3_SB(sb)->s_journal = journal;
+
+	ext3_update_dynamic_rev(sb);
+	EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+	EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL);
+
+	es->s_journal_inum = cpu_to_le32(journal_inum);
+	sb->s_dirt = 1;
+
+	/* Make sure we flush the recovery flag to disk. */
+	ext3_commit_super(sb, es, 1);
+
+	return 0;
+}
+
+static void ext3_commit_super (struct super_block * sb,
+			       struct ext3_super_block * es,
+			       int sync)
+{
+	struct buffer_head *sbh = EXT3_SB(sb)->s_sbh;
+
+	if (!sbh)
+		return;
+	es->s_wtime = cpu_to_le32(get_seconds());
+	es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb));
+	es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb));
+	BUFFER_TRACE(sbh, "marking dirty");
+	mark_buffer_dirty(sbh);
+	if (sync)
+		sync_dirty_buffer(sbh);
+}
+
+
+/*
+ * Have we just finished recovery?  If so, and if we are mounting (or
+ * remounting) the filesystem readonly, then we will end up with a
+ * consistent fs on disk.  Record that fact.
+ */
+static void ext3_mark_recovery_complete(struct super_block * sb,
+					struct ext3_super_block * es)
+{
+	journal_t *journal = EXT3_SB(sb)->s_journal;
+
+	journal_lock_updates(journal);
+	journal_flush(journal);
+	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
+	    sb->s_flags & MS_RDONLY) {
+		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+		sb->s_dirt = 0;
+		ext3_commit_super(sb, es, 1);
+	}
+	journal_unlock_updates(journal);
+}
+
+/*
+ * If we are mounting (or read-write remounting) a filesystem whose journal
+ * has recorded an error from a previous lifetime, move that error to the
+ * main filesystem now.
+ */
+static void ext3_clear_journal_err(struct super_block * sb,
+				   struct ext3_super_block * es)
+{
+	journal_t *journal;
+	int j_errno;
+	const char *errstr;
+
+	journal = EXT3_SB(sb)->s_journal;
+
+	/*
+	 * Now check for any error status which may have been recorded in the
+	 * journal by a prior ext3_error() or ext3_abort()
+	 */
+
+	j_errno = journal_errno(journal);
+	if (j_errno) {
+		char nbuf[16];
+
+		errstr = ext3_decode_error(sb, j_errno, nbuf);
+		ext3_warning(sb, __FUNCTION__, "Filesystem error recorded "
+			     "from previous mount: %s", errstr);
+		ext3_warning(sb, __FUNCTION__, "Marking fs in need of "
+			     "filesystem check.");
+
+		EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
+		es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
+		ext3_commit_super (sb, es, 1);
+
+		journal_clear_err(journal);
+	}
+}
+
+/*
+ * Force the running and committing transactions to commit,
+ * and wait on the commit.
+ */
+int ext3_force_commit(struct super_block *sb)
+{
+	journal_t *journal;
+	int ret;
+
+	if (sb->s_flags & MS_RDONLY)
+		return 0;
+
+	journal = EXT3_SB(sb)->s_journal;
+	sb->s_dirt = 0;
+	ret = ext3_journal_force_commit(journal);
+	return ret;
+}
+
+/*
+ * Ext3 always journals updates to the superblock itself, so we don't
+ * have to propagate any other updates to the superblock on disk at this
+ * point.  Just start an async writeback to get the buffers on their way
+ * to the disk.
+ *
+ * This implicitly triggers the writebehind on sync().
+ */
+
+static void ext3_write_super (struct super_block * sb)
+{
+	if (mutex_trylock(&sb->s_lock) != 0)
+		BUG();
+	sb->s_dirt = 0;
+}
+
+static int ext3_sync_fs(struct super_block *sb, int wait)
+{
+	tid_t target;
+
+	sb->s_dirt = 0;
+	if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
+		if (wait)
+			log_wait_commit(EXT3_SB(sb)->s_journal, target);
+	}
+	return 0;
+}
+
+/*
+ * LVM calls this function before a (read-only) snapshot is created.  This
+ * gives us a chance to flush the journal completely and mark the fs clean.
+ */
+static void ext3_write_super_lockfs(struct super_block *sb)
+{
+	sb->s_dirt = 0;
+
+	if (!(sb->s_flags & MS_RDONLY)) {
+		journal_t *journal = EXT3_SB(sb)->s_journal;
+
+		/* Now we set up the journal barrier. */
+		journal_lock_updates(journal);
+		journal_flush(journal);
+
+		/* Journal blocked and flushed, clear needs_recovery flag. */
+		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+		ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
+	}
+}
+
+/*
+ * Called by LVM after the snapshot is done.  We need to reset the RECOVER
+ * flag here, even though the filesystem is not technically dirty yet.
+ */
+static void ext3_unlockfs(struct super_block *sb)
+{
+	if (!(sb->s_flags & MS_RDONLY)) {
+		lock_super(sb);
+		/* Reser the needs_recovery flag before the fs is unlocked. */
+		EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+		ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
+		unlock_super(sb);
+		journal_unlock_updates(EXT3_SB(sb)->s_journal);
+	}
+}
+
+static int ext3_remount (struct super_block * sb, int * flags, char * data)
+{
+	struct ext3_super_block * es;
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	ext3_fsblk_t n_blocks_count = 0;
+	unsigned long old_sb_flags;
+	struct ext3_mount_options old_opts;
+	int err;
+#ifdef CONFIG_QUOTA
+	int i;
+#endif
+
+	/* Store the original options */
+	old_sb_flags = sb->s_flags;
+	old_opts.s_mount_opt = sbi->s_mount_opt;
+	old_opts.s_resuid = sbi->s_resuid;
+	old_opts.s_resgid = sbi->s_resgid;
+	old_opts.s_commit_interval = sbi->s_commit_interval;
+#ifdef CONFIG_QUOTA
+	old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
+	for (i = 0; i < MAXQUOTAS; i++)
+		old_opts.s_qf_names[i] = sbi->s_qf_names[i];
+#endif
+
+	/*
+	 * Allow the "check" option to be passed as a remount option.
+	 */
+	if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) {
+		err = -EINVAL;
+		goto restore_opts;
+	}
+
+	if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
+		ext3_abort(sb, __FUNCTION__, "Abort forced by user");
+
+	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+		((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
+
+	es = sbi->s_es;
+
+	ext3_init_journal_params(sb, sbi->s_journal);
+
+	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
+		n_blocks_count > le32_to_cpu(es->s_blocks_count)) {
+		if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) {
+			err = -EROFS;
+			goto restore_opts;
+		}
+
+		if (*flags & MS_RDONLY) {
+			/*
+			 * First of all, the unconditional stuff we have to do
+			 * to disable replay of the journal when we next remount
+			 */
+			sb->s_flags |= MS_RDONLY;
+
+			/*
+			 * OK, test if we are remounting a valid rw partition
+			 * readonly, and if so set the rdonly flag and then
+			 * mark the partition as valid again.
+			 */
+			if (!(es->s_state & cpu_to_le16(EXT3_VALID_FS)) &&
+			    (sbi->s_mount_state & EXT3_VALID_FS))
+				es->s_state = cpu_to_le16(sbi->s_mount_state);
+
+			ext3_mark_recovery_complete(sb, es);
+		} else {
+			__le32 ret;
+			if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
+					~EXT3_FEATURE_RO_COMPAT_SUPP))) {
+				printk(KERN_WARNING "EXT3-fs: %s: couldn't "
+				       "remount RDWR because of unsupported "
+				       "optional features (%x).\n",
+				       sb->s_id, le32_to_cpu(ret));
+				err = -EROFS;
+				goto restore_opts;
+			}
+			/*
+			 * Mounting a RDONLY partition read-write, so reread
+			 * and store the current valid flag.  (It may have
+			 * been changed by e2fsck since we originally mounted
+			 * the partition.)
+			 */
+			ext3_clear_journal_err(sb, es);
+			sbi->s_mount_state = le16_to_cpu(es->s_state);
+			if ((err = ext3_group_extend(sb, es, n_blocks_count)))
+				goto restore_opts;
+			if (!ext3_setup_super (sb, es, 0))
+				sb->s_flags &= ~MS_RDONLY;
+		}
+	}
+#ifdef CONFIG_QUOTA
+	/* Release old quota file names */
+	for (i = 0; i < MAXQUOTAS; i++)
+		if (old_opts.s_qf_names[i] &&
+		    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
+			kfree(old_opts.s_qf_names[i]);
+#endif
+	return 0;
+restore_opts:
+	sb->s_flags = old_sb_flags;
+	sbi->s_mount_opt = old_opts.s_mount_opt;
+	sbi->s_resuid = old_opts.s_resuid;
+	sbi->s_resgid = old_opts.s_resgid;
+	sbi->s_commit_interval = old_opts.s_commit_interval;
+#ifdef CONFIG_QUOTA
+	sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (sbi->s_qf_names[i] &&
+		    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
+			kfree(sbi->s_qf_names[i]);
+		sbi->s_qf_names[i] = old_opts.s_qf_names[i];
+	}
+#endif
+	return err;
+}
+
+static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
+{
+	struct super_block *sb = dentry->d_sb;
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext3_super_block *es = sbi->s_es;
+	ext3_fsblk_t overhead;
+	int i;
+
+	if (test_opt (sb, MINIX_DF))
+		overhead = 0;
+	else {
+		unsigned long ngroups;
+		ngroups = EXT3_SB(sb)->s_groups_count;
+		smp_rmb();
+
+		/*
+		 * Compute the overhead (FS structures)
+		 */
+
+		/*
+		 * All of the blocks before first_data_block are
+		 * overhead
+		 */
+		overhead = le32_to_cpu(es->s_first_data_block);
+
+		/*
+		 * Add the overhead attributed to the superblock and
+		 * block group descriptors.  If the sparse superblocks
+		 * feature is turned on, then not all groups have this.
+		 */
+		for (i = 0; i < ngroups; i++) {
+			overhead += ext3_bg_has_super(sb, i) +
+				ext3_bg_num_gdb(sb, i);
+			cond_resched();
+		}
+
+		/*
+		 * Every block group has an inode bitmap, a block
+		 * bitmap, and an inode table.
+		 */
+		overhead += (ngroups * (2 + EXT3_SB(sb)->s_itb_per_group));
+	}
+
+	buf->f_type = EXT3_SUPER_MAGIC;
+	buf->f_bsize = sb->s_blocksize;
+	buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead;
+	buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter);
+	buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
+	if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
+		buf->f_bavail = 0;
+	buf->f_files = le32_to_cpu(es->s_inodes_count);
+	buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter);
+	buf->f_namelen = EXT3_NAME_LEN;
+	return 0;
+}
+
+/* Helper function for writing quotas on sync - we need to start transaction before quota file
+ * is locked for write. Otherwise the are possible deadlocks:
+ * Process 1                         Process 2
+ * ext3_create()                     quota_sync()
+ *   journal_start()                   write_dquot()
+ *   DQUOT_INIT()                        down(dqio_mutex)
+ *     down(dqio_mutex)                    journal_start()
+ *
+ */
+
+#ifdef CONFIG_QUOTA
+
+static inline struct inode *dquot_to_inode(struct dquot *dquot)
+{
+	return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
+}
+
+static int ext3_dquot_initialize(struct inode *inode, int type)
+{
+	handle_t *handle;
+	int ret, err;
+
+	/* We may create quota structure so we need to reserve enough blocks */
+	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS(inode->i_sb));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	ret = dquot_initialize(inode, type);
+	err = ext3_journal_stop(handle);
+	if (!ret)
+		ret = err;
+	return ret;
+}
+
+static int ext3_dquot_drop(struct inode *inode)
+{
+	handle_t *handle;
+	int ret, err;
+
+	/* We may delete quota structure so we need to reserve enough blocks */
+	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_DEL_BLOCKS(inode->i_sb));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	ret = dquot_drop(inode);
+	err = ext3_journal_stop(handle);
+	if (!ret)
+		ret = err;
+	return ret;
+}
+
+static int ext3_write_dquot(struct dquot *dquot)
+{
+	int ret, err;
+	handle_t *handle;
+	struct inode *inode;
+
+	inode = dquot_to_inode(dquot);
+	handle = ext3_journal_start(inode,
+					EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	ret = dquot_commit(dquot);
+	err = ext3_journal_stop(handle);
+	if (!ret)
+		ret = err;
+	return ret;
+}
+
+static int ext3_acquire_dquot(struct dquot *dquot)
+{
+	int ret, err;
+	handle_t *handle;
+
+	handle = ext3_journal_start(dquot_to_inode(dquot),
+					EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	ret = dquot_acquire(dquot);
+	err = ext3_journal_stop(handle);
+	if (!ret)
+		ret = err;
+	return ret;
+}
+
+static int ext3_release_dquot(struct dquot *dquot)
+{
+	int ret, err;
+	handle_t *handle;
+
+	handle = ext3_journal_start(dquot_to_inode(dquot),
+					EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	ret = dquot_release(dquot);
+	err = ext3_journal_stop(handle);
+	if (!ret)
+		ret = err;
+	return ret;
+}
+
+static int ext3_mark_dquot_dirty(struct dquot *dquot)
+{
+	/* Are we journalling quotas? */
+	if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
+	    EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
+		dquot_mark_dquot_dirty(dquot);
+		return ext3_write_dquot(dquot);
+	} else {
+		return dquot_mark_dquot_dirty(dquot);
+	}
+}
+
+static int ext3_write_info(struct super_block *sb, int type)
+{
+	int ret, err;
+	handle_t *handle;
+
+	/* Data block + inode block */
+	handle = ext3_journal_start(sb->s_root->d_inode, 2);
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	ret = dquot_commit_info(sb, type);
+	err = ext3_journal_stop(handle);
+	if (!ret)
+		ret = err;
+	return ret;
+}
+
+/*
+ * Turn on quotas during mount time - we need to find
+ * the quota file and such...
+ */
+static int ext3_quota_on_mount(struct super_block *sb, int type)
+{
+	return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
+			EXT3_SB(sb)->s_jquota_fmt, type);
+}
+
+/*
+ * Standard function to be called on quota_on
+ */
+static int ext3_quota_on(struct super_block *sb, int type, int format_id,
+			 char *path)
+{
+	int err;
+	struct nameidata nd;
+
+	if (!test_opt(sb, QUOTA))
+		return -EINVAL;
+	/* Not journalling quota? */
+	if (!EXT3_SB(sb)->s_qf_names[USRQUOTA] &&
+	    !EXT3_SB(sb)->s_qf_names[GRPQUOTA])
+		return vfs_quota_on(sb, type, format_id, path);
+	err = path_lookup(path, LOOKUP_FOLLOW, &nd);
+	if (err)
+		return err;
+	/* Quotafile not on the same filesystem? */
+	if (nd.mnt->mnt_sb != sb) {
+		path_release(&nd);
+		return -EXDEV;
+	}
+	/* Quotafile not of fs root? */
+	if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode)
+		printk(KERN_WARNING
+			"EXT3-fs: Quota file not on filesystem root. "
+			"Journalled quota will not work.\n");
+	path_release(&nd);
+	return vfs_quota_on(sb, type, format_id, path);
+}
+
+/* Read data from quotafile - avoid pagecache and such because we cannot afford
+ * acquiring the locks... As quota files are never truncated and quota code
+ * itself serializes the operations (and noone else should touch the files)
+ * we don't have to be afraid of races */
+static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
+			       size_t len, loff_t off)
+{
+	struct inode *inode = sb_dqopt(sb)->files[type];
+	sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
+	int err = 0;
+	int offset = off & (sb->s_blocksize - 1);
+	int tocopy;
+	size_t toread;
+	struct buffer_head *bh;
+	loff_t i_size = i_size_read(inode);
+
+	if (off > i_size)
+		return 0;
+	if (off+len > i_size)
+		len = i_size-off;
+	toread = len;
+	while (toread > 0) {
+		tocopy = sb->s_blocksize - offset < toread ?
+				sb->s_blocksize - offset : toread;
+		bh = ext3_bread(NULL, inode, blk, 0, &err);
+		if (err)
+			return err;
+		if (!bh)	/* A hole? */
+			memset(data, 0, tocopy);
+		else
+			memcpy(data, bh->b_data+offset, tocopy);
+		brelse(bh);
+		offset = 0;
+		toread -= tocopy;
+		data += tocopy;
+		blk++;
+	}
+	return len;
+}
+
+/* Write to quotafile (we know the transaction is already started and has
+ * enough credits) */
+static ssize_t ext3_quota_write(struct super_block *sb, int type,
+				const char *data, size_t len, loff_t off)
+{
+	struct inode *inode = sb_dqopt(sb)->files[type];
+	sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
+	int err = 0;
+	int offset = off & (sb->s_blocksize - 1);
+	int tocopy;
+	int journal_quota = EXT3_SB(sb)->s_qf_names[type] != NULL;
+	size_t towrite = len;
+	struct buffer_head *bh;
+	handle_t *handle = journal_current_handle();
+
+	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
+	while (towrite > 0) {
+		tocopy = sb->s_blocksize - offset < towrite ?
+				sb->s_blocksize - offset : towrite;
+		bh = ext3_bread(handle, inode, blk, 1, &err);
+		if (!bh)
+			goto out;
+		if (journal_quota) {
+			err = ext3_journal_get_write_access(handle, bh);
+			if (err) {
+				brelse(bh);
+				goto out;
+			}
+		}
+		lock_buffer(bh);
+		memcpy(bh->b_data+offset, data, tocopy);
+		flush_dcache_page(bh->b_page);
+		unlock_buffer(bh);
+		if (journal_quota)
+			err = ext3_journal_dirty_metadata(handle, bh);
+		else {
+			/* Always do at least ordered writes for quotas */
+			err = ext3_journal_dirty_data(handle, bh);
+			mark_buffer_dirty(bh);
+		}
+		brelse(bh);
+		if (err)
+			goto out;
+		offset = 0;
+		towrite -= tocopy;
+		data += tocopy;
+		blk++;
+	}
+out:
+	if (len == towrite)
+		return err;
+	if (inode->i_size < off+len-towrite) {
+		i_size_write(inode, off+len-towrite);
+		EXT3_I(inode)->i_disksize = inode->i_size;
+	}
+	inode->i_version++;
+	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	ext3_mark_inode_dirty(handle, inode);
+	mutex_unlock(&inode->i_mutex);
+	return len - towrite;
+}
+
+#endif
+
+static int ext3_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+{
+	return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt);
+}
+
+static struct file_system_type ext3_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "ext3",
+	.get_sb		= ext3_get_sb,
+	.kill_sb	= kill_block_super,
+	.fs_flags	= FS_REQUIRES_DEV,
+};
+
+static int __init init_ext3_fs(void)
+{
+	int err = init_ext3_xattr();
+	if (err)
+		return err;
+	err = init_inodecache();
+	if (err)
+		goto out1;
+        err = register_filesystem(&ext3_fs_type);
+	if (err)
+		goto out;
+	return 0;
+out:
+	destroy_inodecache();
+out1:
+	exit_ext3_xattr();
+	return err;
+}
+
+static void __exit exit_ext3_fs(void)
+{
+	unregister_filesystem(&ext3_fs_type);
+	destroy_inodecache();
+	exit_ext3_xattr();
+}
+
+MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
+MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
+MODULE_LICENSE("GPL");
+module_init(init_ext3_fs)
+module_exit(exit_ext3_fs)
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
new file mode 100644
index 000000000000..4f79122cde67
--- /dev/null
+++ b/fs/ext4/symlink.c
@@ -0,0 +1,54 @@
+/*
+ *  linux/fs/ext3/symlink.c
+ *
+ * Only fast symlinks left here - the rest is done by generic code. AV, 1999
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  from
+ *
+ *  linux/fs/minix/symlink.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  ext3 symlink handling code
+ */
+
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
+#include <linux/namei.h>
+#include "xattr.h"
+
+static void * ext3_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	struct ext3_inode_info *ei = EXT3_I(dentry->d_inode);
+	nd_set_link(nd, (char*)ei->i_data);
+	return NULL;
+}
+
+struct inode_operations ext3_symlink_inode_operations = {
+	.readlink	= generic_readlink,
+	.follow_link	= page_follow_link_light,
+	.put_link	= page_put_link,
+#ifdef CONFIG_EXT3_FS_XATTR
+	.setxattr	= generic_setxattr,
+	.getxattr	= generic_getxattr,
+	.listxattr	= ext3_listxattr,
+	.removexattr	= generic_removexattr,
+#endif
+};
+
+struct inode_operations ext3_fast_symlink_inode_operations = {
+	.readlink	= generic_readlink,
+	.follow_link	= ext3_follow_link,
+#ifdef CONFIG_EXT3_FS_XATTR
+	.setxattr	= generic_setxattr,
+	.getxattr	= generic_getxattr,
+	.listxattr	= ext3_listxattr,
+	.removexattr	= generic_removexattr,
+#endif
+};
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
new file mode 100644
index 000000000000..f86f2482f01d
--- /dev/null
+++ b/fs/ext4/xattr.c
@@ -0,0 +1,1317 @@
+/*
+ * linux/fs/ext3/xattr.c
+ *
+ * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
+ *
+ * Fix by Harrison Xing <harrison@mountainviewdata.com>.
+ * Ext3 code with a lot of help from Eric Jarman <ejarman@acm.org>.
+ * Extended attributes for symlinks and special files added per
+ *  suggestion of Luka Renko <luka.renko@hermes.si>.
+ * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
+ *  Red Hat Inc.
+ * ea-in-inode support by Alex Tomas <alex@clusterfs.com> aka bzzz
+ *  and Andreas Gruenbacher <agruen@suse.de>.
+ */
+
+/*
+ * Extended attributes are stored directly in inodes (on file systems with
+ * inodes bigger than 128 bytes) and on additional disk blocks. The i_file_acl
+ * field contains the block number if an inode uses an additional block. All
+ * attributes must fit in the inode and one additional block. Blocks that
+ * contain the identical set of attributes may be shared among several inodes.
+ * Identical blocks are detected by keeping a cache of blocks that have
+ * recently been accessed.
+ *
+ * The attributes in inodes and on blocks have a different header; the entries
+ * are stored in the same format:
+ *
+ *   +------------------+
+ *   | header           |
+ *   | entry 1          | |
+ *   | entry 2          | | growing downwards
+ *   | entry 3          | v
+ *   | four null bytes  |
+ *   | . . .            |
+ *   | value 1          | ^
+ *   | value 3          | | growing upwards
+ *   | value 2          | |
+ *   +------------------+
+ *
+ * The header is followed by multiple entry descriptors. In disk blocks, the
+ * entry descriptors are kept sorted. In inodes, they are unsorted. The
+ * attribute values are aligned to the end of the block in no specific order.
+ *
+ * Locking strategy
+ * ----------------
+ * EXT3_I(inode)->i_file_acl is protected by EXT3_I(inode)->xattr_sem.
+ * EA blocks are only changed if they are exclusive to an inode, so
+ * holding xattr_sem also means that nothing but the EA block's reference
+ * count can change. Multiple writers to the same block are synchronized
+ * by the buffer lock.
+ */
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/ext3_jbd.h>
+#include <linux/ext3_fs.h>
+#include <linux/mbcache.h>
+#include <linux/quotaops.h>
+#include <linux/rwsem.h>
+#include "xattr.h"
+#include "acl.h"
+
+#define BHDR(bh) ((struct ext3_xattr_header *)((bh)->b_data))
+#define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr))
+#define BFIRST(bh) ENTRY(BHDR(bh)+1)
+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
+
+#define IHDR(inode, raw_inode) \
+	((struct ext3_xattr_ibody_header *) \
+		((void *)raw_inode + \
+		 EXT3_GOOD_OLD_INODE_SIZE + \
+		 EXT3_I(inode)->i_extra_isize))
+#define IFIRST(hdr) ((struct ext3_xattr_entry *)((hdr)+1))
+
+#ifdef EXT3_XATTR_DEBUG
+# define ea_idebug(inode, f...) do { \
+		printk(KERN_DEBUG "inode %s:%lu: ", \
+			inode->i_sb->s_id, inode->i_ino); \
+		printk(f); \
+		printk("\n"); \
+	} while (0)
+# define ea_bdebug(bh, f...) do { \
+		char b[BDEVNAME_SIZE]; \
+		printk(KERN_DEBUG "block %s:%lu: ", \
+			bdevname(bh->b_bdev, b), \
+			(unsigned long) bh->b_blocknr); \
+		printk(f); \
+		printk("\n"); \
+	} while (0)
+#else
+# define ea_idebug(f...)
+# define ea_bdebug(f...)
+#endif
+
+static void ext3_xattr_cache_insert(struct buffer_head *);
+static struct buffer_head *ext3_xattr_cache_find(struct inode *,
+						 struct ext3_xattr_header *,
+						 struct mb_cache_entry **);
+static void ext3_xattr_rehash(struct ext3_xattr_header *,
+			      struct ext3_xattr_entry *);
+
+static struct mb_cache *ext3_xattr_cache;
+
+static struct xattr_handler *ext3_xattr_handler_map[] = {
+	[EXT3_XATTR_INDEX_USER]		     = &ext3_xattr_user_handler,
+#ifdef CONFIG_EXT3_FS_POSIX_ACL
+	[EXT3_XATTR_INDEX_POSIX_ACL_ACCESS]  = &ext3_xattr_acl_access_handler,
+	[EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext3_xattr_acl_default_handler,
+#endif
+	[EXT3_XATTR_INDEX_TRUSTED]	     = &ext3_xattr_trusted_handler,
+#ifdef CONFIG_EXT3_FS_SECURITY
+	[EXT3_XATTR_INDEX_SECURITY]	     = &ext3_xattr_security_handler,
+#endif
+};
+
+struct xattr_handler *ext3_xattr_handlers[] = {
+	&ext3_xattr_user_handler,
+	&ext3_xattr_trusted_handler,
+#ifdef CONFIG_EXT3_FS_POSIX_ACL
+	&ext3_xattr_acl_access_handler,
+	&ext3_xattr_acl_default_handler,
+#endif
+#ifdef CONFIG_EXT3_FS_SECURITY
+	&ext3_xattr_security_handler,
+#endif
+	NULL
+};
+
+static inline struct xattr_handler *
+ext3_xattr_handler(int name_index)
+{
+	struct xattr_handler *handler = NULL;
+
+	if (name_index > 0 && name_index < ARRAY_SIZE(ext3_xattr_handler_map))
+		handler = ext3_xattr_handler_map[name_index];
+	return handler;
+}
+
+/*
+ * Inode operation listxattr()
+ *
+ * dentry->d_inode->i_mutex: don't care
+ */
+ssize_t
+ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
+{
+	return ext3_xattr_list(dentry->d_inode, buffer, size);
+}
+
+static int
+ext3_xattr_check_names(struct ext3_xattr_entry *entry, void *end)
+{
+	while (!IS_LAST_ENTRY(entry)) {
+		struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(entry);
+		if ((void *)next >= end)
+			return -EIO;
+		entry = next;
+	}
+	return 0;
+}
+
+static inline int
+ext3_xattr_check_block(struct buffer_head *bh)
+{
+	int error;
+
+	if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
+	    BHDR(bh)->h_blocks != cpu_to_le32(1))
+		return -EIO;
+	error = ext3_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
+	return error;
+}
+
+static inline int
+ext3_xattr_check_entry(struct ext3_xattr_entry *entry, size_t size)
+{
+	size_t value_size = le32_to_cpu(entry->e_value_size);
+
+	if (entry->e_value_block != 0 || value_size > size ||
+	    le16_to_cpu(entry->e_value_offs) + value_size > size)
+		return -EIO;
+	return 0;
+}
+
+static int
+ext3_xattr_find_entry(struct ext3_xattr_entry **pentry, int name_index,
+		      const char *name, size_t size, int sorted)
+{
+	struct ext3_xattr_entry *entry;
+	size_t name_len;
+	int cmp = 1;
+
+	if (name == NULL)
+		return -EINVAL;
+	name_len = strlen(name);
+	entry = *pentry;
+	for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) {
+		cmp = name_index - entry->e_name_index;
+		if (!cmp)
+			cmp = name_len - entry->e_name_len;
+		if (!cmp)
+			cmp = memcmp(name, entry->e_name, name_len);
+		if (cmp <= 0 && (sorted || cmp == 0))
+			break;
+	}
+	*pentry = entry;
+	if (!cmp && ext3_xattr_check_entry(entry, size))
+			return -EIO;
+	return cmp ? -ENODATA : 0;
+}
+
+static int
+ext3_xattr_block_get(struct inode *inode, int name_index, const char *name,
+		     void *buffer, size_t buffer_size)
+{
+	struct buffer_head *bh = NULL;
+	struct ext3_xattr_entry *entry;
+	size_t size;
+	int error;
+
+	ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
+		  name_index, name, buffer, (long)buffer_size);
+
+	error = -ENODATA;
+	if (!EXT3_I(inode)->i_file_acl)
+		goto cleanup;
+	ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl);
+	bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
+	if (!bh)
+		goto cleanup;
+	ea_bdebug(bh, "b_count=%d, refcount=%d",
+		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
+	if (ext3_xattr_check_block(bh)) {
+bad_block:	ext3_error(inode->i_sb, __FUNCTION__,
+			   "inode %lu: bad block "E3FSBLK, inode->i_ino,
+			   EXT3_I(inode)->i_file_acl);
+		error = -EIO;
+		goto cleanup;
+	}
+	ext3_xattr_cache_insert(bh);
+	entry = BFIRST(bh);
+	error = ext3_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
+	if (error == -EIO)
+		goto bad_block;
+	if (error)
+		goto cleanup;
+	size = le32_to_cpu(entry->e_value_size);
+	if (buffer) {
+		error = -ERANGE;
+		if (size > buffer_size)
+			goto cleanup;
+		memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
+		       size);
+	}
+	error = size;
+
+cleanup:
+	brelse(bh);
+	return error;
+}
+
+static int
+ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
+		     void *buffer, size_t buffer_size)
+{
+	struct ext3_xattr_ibody_header *header;
+	struct ext3_xattr_entry *entry;
+	struct ext3_inode *raw_inode;
+	struct ext3_iloc iloc;
+	size_t size;
+	void *end;
+	int error;
+
+	if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR))
+		return -ENODATA;
+	error = ext3_get_inode_loc(inode, &iloc);
+	if (error)
+		return error;
+	raw_inode = ext3_raw_inode(&iloc);
+	header = IHDR(inode, raw_inode);
+	entry = IFIRST(header);
+	end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
+	error = ext3_xattr_check_names(entry, end);
+	if (error)
+		goto cleanup;
+	error = ext3_xattr_find_entry(&entry, name_index, name,
+				      end - (void *)entry, 0);
+	if (error)
+		goto cleanup;
+	size = le32_to_cpu(entry->e_value_size);
+	if (buffer) {
+		error = -ERANGE;
+		if (size > buffer_size)
+			goto cleanup;
+		memcpy(buffer, (void *)IFIRST(header) +
+		       le16_to_cpu(entry->e_value_offs), size);
+	}
+	error = size;
+
+cleanup:
+	brelse(iloc.bh);
+	return error;
+}
+
+/*
+ * ext3_xattr_get()
+ *
+ * Copy an extended attribute into the buffer
+ * provided, or compute the buffer size required.
+ * Buffer is NULL to compute the size of the buffer required.
+ *
+ * Returns a negative error number on failure, or the number of bytes
+ * used / required on success.
+ */
+int
+ext3_xattr_get(struct inode *inode, int name_index, const char *name,
+	       void *buffer, size_t buffer_size)
+{
+	int error;
+
+	down_read(&EXT3_I(inode)->xattr_sem);
+	error = ext3_xattr_ibody_get(inode, name_index, name, buffer,
+				     buffer_size);
+	if (error == -ENODATA)
+		error = ext3_xattr_block_get(inode, name_index, name, buffer,
+					     buffer_size);
+	up_read(&EXT3_I(inode)->xattr_sem);
+	return error;
+}
+
+static int
+ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
+			char *buffer, size_t buffer_size)
+{
+	size_t rest = buffer_size;
+
+	for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) {
+		struct xattr_handler *handler =
+			ext3_xattr_handler(entry->e_name_index);
+
+		if (handler) {
+			size_t size = handler->list(inode, buffer, rest,
+						    entry->e_name,
+						    entry->e_name_len);
+			if (buffer) {
+				if (size > rest)
+					return -ERANGE;
+				buffer += size;
+			}
+			rest -= size;
+		}
+	}
+	return buffer_size - rest;
+}
+
+static int
+ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
+{
+	struct buffer_head *bh = NULL;
+	int error;
+
+	ea_idebug(inode, "buffer=%p, buffer_size=%ld",
+		  buffer, (long)buffer_size);
+
+	error = 0;
+	if (!EXT3_I(inode)->i_file_acl)
+		goto cleanup;
+	ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl);
+	bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
+	error = -EIO;
+	if (!bh)
+		goto cleanup;
+	ea_bdebug(bh, "b_count=%d, refcount=%d",
+		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
+	if (ext3_xattr_check_block(bh)) {
+		ext3_error(inode->i_sb, __FUNCTION__,
+			   "inode %lu: bad block "E3FSBLK, inode->i_ino,
+			   EXT3_I(inode)->i_file_acl);
+		error = -EIO;
+		goto cleanup;
+	}
+	ext3_xattr_cache_insert(bh);
+	error = ext3_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size);
+
+cleanup:
+	brelse(bh);
+
+	return error;
+}
+
+static int
+ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
+{
+	struct ext3_xattr_ibody_header *header;
+	struct ext3_inode *raw_inode;
+	struct ext3_iloc iloc;
+	void *end;
+	int error;
+
+	if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR))
+		return 0;
+	error = ext3_get_inode_loc(inode, &iloc);
+	if (error)
+		return error;
+	raw_inode = ext3_raw_inode(&iloc);
+	header = IHDR(inode, raw_inode);
+	end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
+	error = ext3_xattr_check_names(IFIRST(header), end);
+	if (error)
+		goto cleanup;
+	error = ext3_xattr_list_entries(inode, IFIRST(header),
+					buffer, buffer_size);
+
+cleanup:
+	brelse(iloc.bh);
+	return error;
+}
+
+/*
+ * ext3_xattr_list()
+ *
+ * Copy a list of attribute names into the buffer
+ * provided, or compute the buffer size required.
+ * Buffer is NULL to compute the size of the buffer required.
+ *
+ * Returns a negative error number on failure, or the number of bytes
+ * used / required on success.
+ */
+int
+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
+{
+	int i_error, b_error;
+
+	down_read(&EXT3_I(inode)->xattr_sem);
+	i_error = ext3_xattr_ibody_list(inode, buffer, buffer_size);
+	if (i_error < 0) {
+		b_error = 0;
+	} else {
+		if (buffer) {
+			buffer += i_error;
+			buffer_size -= i_error;
+		}
+		b_error = ext3_xattr_block_list(inode, buffer, buffer_size);
+		if (b_error < 0)
+			i_error = 0;
+	}
+	up_read(&EXT3_I(inode)->xattr_sem);
+	return i_error + b_error;
+}
+
+/*
+ * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is
+ * not set, set it.
+ */
+static void ext3_xattr_update_super_block(handle_t *handle,
+					  struct super_block *sb)
+{
+	if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR))
+		return;
+
+	lock_super(sb);
+	if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) {
+		EXT3_SB(sb)->s_es->s_feature_compat |=
+			cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR);
+		sb->s_dirt = 1;
+		ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+	}
+	unlock_super(sb);
+}
+
+/*
+ * Release the xattr block BH: If the reference count is > 1, decrement
+ * it; otherwise free the block.
+ */
+static void
+ext3_xattr_release_block(handle_t *handle, struct inode *inode,
+			 struct buffer_head *bh)
+{
+	struct mb_cache_entry *ce = NULL;
+
+	ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, bh->b_blocknr);
+	if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
+		ea_bdebug(bh, "refcount now=0; freeing");
+		if (ce)
+			mb_cache_entry_free(ce);
+		ext3_free_blocks(handle, inode, bh->b_blocknr, 1);
+		get_bh(bh);
+		ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
+	} else {
+		if (ext3_journal_get_write_access(handle, bh) == 0) {
+			lock_buffer(bh);
+			BHDR(bh)->h_refcount = cpu_to_le32(
+				le32_to_cpu(BHDR(bh)->h_refcount) - 1);
+			ext3_journal_dirty_metadata(handle, bh);
+			if (IS_SYNC(inode))
+				handle->h_sync = 1;
+			DQUOT_FREE_BLOCK(inode, 1);
+			unlock_buffer(bh);
+			ea_bdebug(bh, "refcount now=%d; releasing",
+				  le32_to_cpu(BHDR(bh)->h_refcount));
+		}
+		if (ce)
+			mb_cache_entry_release(ce);
+	}
+}
+
+struct ext3_xattr_info {
+	int name_index;
+	const char *name;
+	const void *value;
+	size_t value_len;
+};
+
+struct ext3_xattr_search {
+	struct ext3_xattr_entry *first;
+	void *base;
+	void *end;
+	struct ext3_xattr_entry *here;
+	int not_found;
+};
+
+static int
+ext3_xattr_set_entry(struct ext3_xattr_info *i, struct ext3_xattr_search *s)
+{
+	struct ext3_xattr_entry *last;
+	size_t free, min_offs = s->end - s->base, name_len = strlen(i->name);
+
+	/* Compute min_offs and last. */
+	last = s->first;
+	for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) {
+		if (!last->e_value_block && last->e_value_size) {
+			size_t offs = le16_to_cpu(last->e_value_offs);
+			if (offs < min_offs)
+				min_offs = offs;
+		}
+	}
+	free = min_offs - ((void *)last - s->base) - sizeof(__u32);
+	if (!s->not_found) {
+		if (!s->here->e_value_block && s->here->e_value_size) {
+			size_t size = le32_to_cpu(s->here->e_value_size);
+			free += EXT3_XATTR_SIZE(size);
+		}
+		free += EXT3_XATTR_LEN(name_len);
+	}
+	if (i->value) {
+		if (free < EXT3_XATTR_SIZE(i->value_len) ||
+		    free < EXT3_XATTR_LEN(name_len) +
+			   EXT3_XATTR_SIZE(i->value_len))
+			return -ENOSPC;
+	}
+
+	if (i->value && s->not_found) {
+		/* Insert the new name. */
+		size_t size = EXT3_XATTR_LEN(name_len);
+		size_t rest = (void *)last - (void *)s->here + sizeof(__u32);
+		memmove((void *)s->here + size, s->here, rest);
+		memset(s->here, 0, size);
+		s->here->e_name_index = i->name_index;
+		s->here->e_name_len = name_len;
+		memcpy(s->here->e_name, i->name, name_len);
+	} else {
+		if (!s->here->e_value_block && s->here->e_value_size) {
+			void *first_val = s->base + min_offs;
+			size_t offs = le16_to_cpu(s->here->e_value_offs);
+			void *val = s->base + offs;
+			size_t size = EXT3_XATTR_SIZE(
+				le32_to_cpu(s->here->e_value_size));
+
+			if (i->value && size == EXT3_XATTR_SIZE(i->value_len)) {
+				/* The old and the new value have the same
+				   size. Just replace. */
+				s->here->e_value_size =
+					cpu_to_le32(i->value_len);
+				memset(val + size - EXT3_XATTR_PAD, 0,
+				       EXT3_XATTR_PAD); /* Clear pad bytes. */
+				memcpy(val, i->value, i->value_len);
+				return 0;
+			}
+
+			/* Remove the old value. */
+			memmove(first_val + size, first_val, val - first_val);
+			memset(first_val, 0, size);
+			s->here->e_value_size = 0;
+			s->here->e_value_offs = 0;
+			min_offs += size;
+
+			/* Adjust all value offsets. */
+			last = s->first;
+			while (!IS_LAST_ENTRY(last)) {
+				size_t o = le16_to_cpu(last->e_value_offs);
+				if (!last->e_value_block &&
+				    last->e_value_size && o < offs)
+					last->e_value_offs =
+						cpu_to_le16(o + size);
+				last = EXT3_XATTR_NEXT(last);
+			}
+		}
+		if (!i->value) {
+			/* Remove the old name. */
+			size_t size = EXT3_XATTR_LEN(name_len);
+			last = ENTRY((void *)last - size);
+			memmove(s->here, (void *)s->here + size,
+				(void *)last - (void *)s->here + sizeof(__u32));
+			memset(last, 0, size);
+		}
+	}
+
+	if (i->value) {
+		/* Insert the new value. */
+		s->here->e_value_size = cpu_to_le32(i->value_len);
+		if (i->value_len) {
+			size_t size = EXT3_XATTR_SIZE(i->value_len);
+			void *val = s->base + min_offs - size;
+			s->here->e_value_offs = cpu_to_le16(min_offs - size);
+			memset(val + size - EXT3_XATTR_PAD, 0,
+			       EXT3_XATTR_PAD); /* Clear the pad bytes. */
+			memcpy(val, i->value, i->value_len);
+		}
+	}
+	return 0;
+}
+
+struct ext3_xattr_block_find {
+	struct ext3_xattr_search s;
+	struct buffer_head *bh;
+};
+
+static int
+ext3_xattr_block_find(struct inode *inode, struct ext3_xattr_info *i,
+		      struct ext3_xattr_block_find *bs)
+{
+	struct super_block *sb = inode->i_sb;
+	int error;
+
+	ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
+		  i->name_index, i->name, i->value, (long)i->value_len);
+
+	if (EXT3_I(inode)->i_file_acl) {
+		/* The inode already has an extended attribute block. */
+		bs->bh = sb_bread(sb, EXT3_I(inode)->i_file_acl);
+		error = -EIO;
+		if (!bs->bh)
+			goto cleanup;
+		ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
+			atomic_read(&(bs->bh->b_count)),
+			le32_to_cpu(BHDR(bs->bh)->h_refcount));
+		if (ext3_xattr_check_block(bs->bh)) {
+			ext3_error(sb, __FUNCTION__,
+				"inode %lu: bad block "E3FSBLK, inode->i_ino,
+				EXT3_I(inode)->i_file_acl);
+			error = -EIO;
+			goto cleanup;
+		}
+		/* Find the named attribute. */
+		bs->s.base = BHDR(bs->bh);
+		bs->s.first = BFIRST(bs->bh);
+		bs->s.end = bs->bh->b_data + bs->bh->b_size;
+		bs->s.here = bs->s.first;
+		error = ext3_xattr_find_entry(&bs->s.here, i->name_index,
+					      i->name, bs->bh->b_size, 1);
+		if (error && error != -ENODATA)
+			goto cleanup;
+		bs->s.not_found = error;
+	}
+	error = 0;
+
+cleanup:
+	return error;
+}
+
+static int
+ext3_xattr_block_set(handle_t *handle, struct inode *inode,
+		     struct ext3_xattr_info *i,
+		     struct ext3_xattr_block_find *bs)
+{
+	struct super_block *sb = inode->i_sb;
+	struct buffer_head *new_bh = NULL;
+	struct ext3_xattr_search *s = &bs->s;
+	struct mb_cache_entry *ce = NULL;
+	int error;
+
+#define header(x) ((struct ext3_xattr_header *)(x))
+
+	if (i->value && i->value_len > sb->s_blocksize)
+		return -ENOSPC;
+	if (s->base) {
+		ce = mb_cache_entry_get(ext3_xattr_cache, bs->bh->b_bdev,
+					bs->bh->b_blocknr);
+		if (header(s->base)->h_refcount == cpu_to_le32(1)) {
+			if (ce) {
+				mb_cache_entry_free(ce);
+				ce = NULL;
+			}
+			ea_bdebug(bs->bh, "modifying in-place");
+			error = ext3_journal_get_write_access(handle, bs->bh);
+			if (error)
+				goto cleanup;
+			lock_buffer(bs->bh);
+			error = ext3_xattr_set_entry(i, s);
+			if (!error) {
+				if (!IS_LAST_ENTRY(s->first))
+					ext3_xattr_rehash(header(s->base),
+							  s->here);
+				ext3_xattr_cache_insert(bs->bh);
+			}
+			unlock_buffer(bs->bh);
+			if (error == -EIO)
+				goto bad_block;
+			if (!error)
+				error = ext3_journal_dirty_metadata(handle,
+								    bs->bh);
+			if (error)
+				goto cleanup;
+			goto inserted;
+		} else {
+			int offset = (char *)s->here - bs->bh->b_data;
+
+			if (ce) {
+				mb_cache_entry_release(ce);
+				ce = NULL;
+			}
+			ea_bdebug(bs->bh, "cloning");
+			s->base = kmalloc(bs->bh->b_size, GFP_KERNEL);
+			error = -ENOMEM;
+			if (s->base == NULL)
+				goto cleanup;
+			memcpy(s->base, BHDR(bs->bh), bs->bh->b_size);
+			s->first = ENTRY(header(s->base)+1);
+			header(s->base)->h_refcount = cpu_to_le32(1);
+			s->here = ENTRY(s->base + offset);
+			s->end = s->base + bs->bh->b_size;
+		}
+	} else {
+		/* Allocate a buffer where we construct the new block. */
+		s->base = kmalloc(sb->s_blocksize, GFP_KERNEL);
+		/* assert(header == s->base) */
+		error = -ENOMEM;
+		if (s->base == NULL)
+			goto cleanup;
+		memset(s->base, 0, sb->s_blocksize);
+		header(s->base)->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
+		header(s->base)->h_blocks = cpu_to_le32(1);
+		header(s->base)->h_refcount = cpu_to_le32(1);
+		s->first = ENTRY(header(s->base)+1);
+		s->here = ENTRY(header(s->base)+1);
+		s->end = s->base + sb->s_blocksize;
+	}
+
+	error = ext3_xattr_set_entry(i, s);
+	if (error == -EIO)
+		goto bad_block;
+	if (error)
+		goto cleanup;
+	if (!IS_LAST_ENTRY(s->first))
+		ext3_xattr_rehash(header(s->base), s->here);
+
+inserted:
+	if (!IS_LAST_ENTRY(s->first)) {
+		new_bh = ext3_xattr_cache_find(inode, header(s->base), &ce);
+		if (new_bh) {
+			/* We found an identical block in the cache. */
+			if (new_bh == bs->bh)
+				ea_bdebug(new_bh, "keeping");
+			else {
+				/* The old block is released after updating
+				   the inode. */
+				error = -EDQUOT;
+				if (DQUOT_ALLOC_BLOCK(inode, 1))
+					goto cleanup;
+				error = ext3_journal_get_write_access(handle,
+								      new_bh);
+				if (error)
+					goto cleanup_dquot;
+				lock_buffer(new_bh);
+				BHDR(new_bh)->h_refcount = cpu_to_le32(1 +
+					le32_to_cpu(BHDR(new_bh)->h_refcount));
+				ea_bdebug(new_bh, "reusing; refcount now=%d",
+					le32_to_cpu(BHDR(new_bh)->h_refcount));
+				unlock_buffer(new_bh);
+				error = ext3_journal_dirty_metadata(handle,
+								    new_bh);
+				if (error)
+					goto cleanup_dquot;
+			}
+			mb_cache_entry_release(ce);
+			ce = NULL;
+		} else if (bs->bh && s->base == bs->bh->b_data) {
+			/* We were modifying this block in-place. */
+			ea_bdebug(bs->bh, "keeping this block");
+			new_bh = bs->bh;
+			get_bh(new_bh);
+		} else {
+			/* We need to allocate a new block */
+			ext3_fsblk_t goal = le32_to_cpu(
+					EXT3_SB(sb)->s_es->s_first_data_block) +
+				(ext3_fsblk_t)EXT3_I(inode)->i_block_group *
+				EXT3_BLOCKS_PER_GROUP(sb);
+			ext3_fsblk_t block = ext3_new_block(handle, inode,
+							goal, &error);
+			if (error)
+				goto cleanup;
+			ea_idebug(inode, "creating block %d", block);
+
+			new_bh = sb_getblk(sb, block);
+			if (!new_bh) {
+getblk_failed:
+				ext3_free_blocks(handle, inode, block, 1);
+				error = -EIO;
+				goto cleanup;
+			}
+			lock_buffer(new_bh);
+			error = ext3_journal_get_create_access(handle, new_bh);
+			if (error) {
+				unlock_buffer(new_bh);
+				goto getblk_failed;
+			}
+			memcpy(new_bh->b_data, s->base, new_bh->b_size);
+			set_buffer_uptodate(new_bh);
+			unlock_buffer(new_bh);
+			ext3_xattr_cache_insert(new_bh);
+			error = ext3_journal_dirty_metadata(handle, new_bh);
+			if (error)
+				goto cleanup;
+		}
+	}
+
+	/* Update the inode. */
+	EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
+
+	/* Drop the previous xattr block. */
+	if (bs->bh && bs->bh != new_bh)
+		ext3_xattr_release_block(handle, inode, bs->bh);
+	error = 0;
+
+cleanup:
+	if (ce)
+		mb_cache_entry_release(ce);
+	brelse(new_bh);
+	if (!(bs->bh && s->base == bs->bh->b_data))
+		kfree(s->base);
+
+	return error;
+
+cleanup_dquot:
+	DQUOT_FREE_BLOCK(inode, 1);
+	goto cleanup;
+
+bad_block:
+	ext3_error(inode->i_sb, __FUNCTION__,
+		   "inode %lu: bad block "E3FSBLK, inode->i_ino,
+		   EXT3_I(inode)->i_file_acl);
+	goto cleanup;
+
+#undef header
+}
+
+struct ext3_xattr_ibody_find {
+	struct ext3_xattr_search s;
+	struct ext3_iloc iloc;
+};
+
+static int
+ext3_xattr_ibody_find(struct inode *inode, struct ext3_xattr_info *i,
+		      struct ext3_xattr_ibody_find *is)
+{
+	struct ext3_xattr_ibody_header *header;
+	struct ext3_inode *raw_inode;
+	int error;
+
+	if (EXT3_I(inode)->i_extra_isize == 0)
+		return 0;
+	raw_inode = ext3_raw_inode(&is->iloc);
+	header = IHDR(inode, raw_inode);
+	is->s.base = is->s.first = IFIRST(header);
+	is->s.here = is->s.first;
+	is->s.end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
+	if (EXT3_I(inode)->i_state & EXT3_STATE_XATTR) {
+		error = ext3_xattr_check_names(IFIRST(header), is->s.end);
+		if (error)
+			return error;
+		/* Find the named attribute. */
+		error = ext3_xattr_find_entry(&is->s.here, i->name_index,
+					      i->name, is->s.end -
+					      (void *)is->s.base, 0);
+		if (error && error != -ENODATA)
+			return error;
+		is->s.not_found = error;
+	}
+	return 0;
+}
+
+static int
+ext3_xattr_ibody_set(handle_t *handle, struct inode *inode,
+		     struct ext3_xattr_info *i,
+		     struct ext3_xattr_ibody_find *is)
+{
+	struct ext3_xattr_ibody_header *header;
+	struct ext3_xattr_search *s = &is->s;
+	int error;
+
+	if (EXT3_I(inode)->i_extra_isize == 0)
+		return -ENOSPC;
+	error = ext3_xattr_set_entry(i, s);
+	if (error)
+		return error;
+	header = IHDR(inode, ext3_raw_inode(&is->iloc));
+	if (!IS_LAST_ENTRY(s->first)) {
+		header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
+		EXT3_I(inode)->i_state |= EXT3_STATE_XATTR;
+	} else {
+		header->h_magic = cpu_to_le32(0);
+		EXT3_I(inode)->i_state &= ~EXT3_STATE_XATTR;
+	}
+	return 0;
+}
+
+/*
+ * ext3_xattr_set_handle()
+ *
+ * Create, replace or remove an extended attribute for this inode. Buffer
+ * is NULL to remove an existing extended attribute, and non-NULL to
+ * either replace an existing extended attribute, or create a new extended
+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE
+ * specify that an extended attribute must exist and must not exist
+ * previous to the call, respectively.
+ *
+ * Returns 0, or a negative error number on failure.
+ */
+int
+ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
+		      const char *name, const void *value, size_t value_len,
+		      int flags)
+{
+	struct ext3_xattr_info i = {
+		.name_index = name_index,
+		.name = name,
+		.value = value,
+		.value_len = value_len,
+
+	};
+	struct ext3_xattr_ibody_find is = {
+		.s = { .not_found = -ENODATA, },
+	};
+	struct ext3_xattr_block_find bs = {
+		.s = { .not_found = -ENODATA, },
+	};
+	int error;
+
+	if (!name)
+		return -EINVAL;
+	if (strlen(name) > 255)
+		return -ERANGE;
+	down_write(&EXT3_I(inode)->xattr_sem);
+	error = ext3_get_inode_loc(inode, &is.iloc);
+	if (error)
+		goto cleanup;
+
+	if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) {
+		struct ext3_inode *raw_inode = ext3_raw_inode(&is.iloc);
+		memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
+		EXT3_I(inode)->i_state &= ~EXT3_STATE_NEW;
+	}
+
+	error = ext3_xattr_ibody_find(inode, &i, &is);
+	if (error)
+		goto cleanup;
+	if (is.s.not_found)
+		error = ext3_xattr_block_find(inode, &i, &bs);
+	if (error)
+		goto cleanup;
+	if (is.s.not_found && bs.s.not_found) {
+		error = -ENODATA;
+		if (flags & XATTR_REPLACE)
+			goto cleanup;
+		error = 0;
+		if (!value)
+			goto cleanup;
+	} else {
+		error = -EEXIST;
+		if (flags & XATTR_CREATE)
+			goto cleanup;
+	}
+	error = ext3_journal_get_write_access(handle, is.iloc.bh);
+	if (error)
+		goto cleanup;
+	if (!value) {
+		if (!is.s.not_found)
+			error = ext3_xattr_ibody_set(handle, inode, &i, &is);
+		else if (!bs.s.not_found)
+			error = ext3_xattr_block_set(handle, inode, &i, &bs);
+	} else {
+		error = ext3_xattr_ibody_set(handle, inode, &i, &is);
+		if (!error && !bs.s.not_found) {
+			i.value = NULL;
+			error = ext3_xattr_block_set(handle, inode, &i, &bs);
+		} else if (error == -ENOSPC) {
+			error = ext3_xattr_block_set(handle, inode, &i, &bs);
+			if (error)
+				goto cleanup;
+			if (!is.s.not_found) {
+				i.value = NULL;
+				error = ext3_xattr_ibody_set(handle, inode, &i,
+							     &is);
+			}
+		}
+	}
+	if (!error) {
+		ext3_xattr_update_super_block(handle, inode->i_sb);
+		inode->i_ctime = CURRENT_TIME_SEC;
+		error = ext3_mark_iloc_dirty(handle, inode, &is.iloc);
+		/*
+		 * The bh is consumed by ext3_mark_iloc_dirty, even with
+		 * error != 0.
+		 */
+		is.iloc.bh = NULL;
+		if (IS_SYNC(inode))
+			handle->h_sync = 1;
+	}
+
+cleanup:
+	brelse(is.iloc.bh);
+	brelse(bs.bh);
+	up_write(&EXT3_I(inode)->xattr_sem);
+	return error;
+}
+
+/*
+ * ext3_xattr_set()
+ *
+ * Like ext3_xattr_set_handle, but start from an inode. This extended
+ * attribute modification is a filesystem transaction by itself.
+ *
+ * Returns 0, or a negative error number on failure.
+ */
+int
+ext3_xattr_set(struct inode *inode, int name_index, const char *name,
+	       const void *value, size_t value_len, int flags)
+{
+	handle_t *handle;
+	int error, retries = 0;
+
+retry:
+	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
+	if (IS_ERR(handle)) {
+		error = PTR_ERR(handle);
+	} else {
+		int error2;
+
+		error = ext3_xattr_set_handle(handle, inode, name_index, name,
+					      value, value_len, flags);
+		error2 = ext3_journal_stop(handle);
+		if (error == -ENOSPC &&
+		    ext3_should_retry_alloc(inode->i_sb, &retries))
+			goto retry;
+		if (error == 0)
+			error = error2;
+	}
+
+	return error;
+}
+
+/*
+ * ext3_xattr_delete_inode()
+ *
+ * Free extended attribute resources associated with this inode. This
+ * is called immediately before an inode is freed. We have exclusive
+ * access to the inode.
+ */
+void
+ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
+{
+	struct buffer_head *bh = NULL;
+
+	if (!EXT3_I(inode)->i_file_acl)
+		goto cleanup;
+	bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
+	if (!bh) {
+		ext3_error(inode->i_sb, __FUNCTION__,
+			"inode %lu: block "E3FSBLK" read error", inode->i_ino,
+			EXT3_I(inode)->i_file_acl);
+		goto cleanup;
+	}
+	if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
+	    BHDR(bh)->h_blocks != cpu_to_le32(1)) {
+		ext3_error(inode->i_sb, __FUNCTION__,
+			"inode %lu: bad block "E3FSBLK, inode->i_ino,
+			EXT3_I(inode)->i_file_acl);
+		goto cleanup;
+	}
+	ext3_xattr_release_block(handle, inode, bh);
+	EXT3_I(inode)->i_file_acl = 0;
+
+cleanup:
+	brelse(bh);
+}
+
+/*
+ * ext3_xattr_put_super()
+ *
+ * This is called when a file system is unmounted.
+ */
+void
+ext3_xattr_put_super(struct super_block *sb)
+{
+	mb_cache_shrink(sb->s_bdev);
+}
+
+/*
+ * ext3_xattr_cache_insert()
+ *
+ * Create a new entry in the extended attribute cache, and insert
+ * it unless such an entry is already in the cache.
+ *
+ * Returns 0, or a negative error number on failure.
+ */
+static void
+ext3_xattr_cache_insert(struct buffer_head *bh)
+{
+	__u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
+	struct mb_cache_entry *ce;
+	int error;
+
+	ce = mb_cache_entry_alloc(ext3_xattr_cache);
+	if (!ce) {
+		ea_bdebug(bh, "out of memory");
+		return;
+	}
+	error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash);
+	if (error) {
+		mb_cache_entry_free(ce);
+		if (error == -EBUSY) {
+			ea_bdebug(bh, "already in cache");
+			error = 0;
+		}
+	} else {
+		ea_bdebug(bh, "inserting [%x]", (int)hash);
+		mb_cache_entry_release(ce);
+	}
+}
+
+/*
+ * ext3_xattr_cmp()
+ *
+ * Compare two extended attribute blocks for equality.
+ *
+ * Returns 0 if the blocks are equal, 1 if they differ, and
+ * a negative error number on errors.
+ */
+static int
+ext3_xattr_cmp(struct ext3_xattr_header *header1,
+	       struct ext3_xattr_header *header2)
+{
+	struct ext3_xattr_entry *entry1, *entry2;
+
+	entry1 = ENTRY(header1+1);
+	entry2 = ENTRY(header2+1);
+	while (!IS_LAST_ENTRY(entry1)) {
+		if (IS_LAST_ENTRY(entry2))
+			return 1;
+		if (entry1->e_hash != entry2->e_hash ||
+		    entry1->e_name_index != entry2->e_name_index ||
+		    entry1->e_name_len != entry2->e_name_len ||
+		    entry1->e_value_size != entry2->e_value_size ||
+		    memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
+			return 1;
+		if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
+			return -EIO;
+		if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
+			   (char *)header2 + le16_to_cpu(entry2->e_value_offs),
+			   le32_to_cpu(entry1->e_value_size)))
+			return 1;
+
+		entry1 = EXT3_XATTR_NEXT(entry1);
+		entry2 = EXT3_XATTR_NEXT(entry2);
+	}
+	if (!IS_LAST_ENTRY(entry2))
+		return 1;
+	return 0;
+}
+
+/*
+ * ext3_xattr_cache_find()
+ *
+ * Find an identical extended attribute block.
+ *
+ * Returns a pointer to the block found, or NULL if such a block was
+ * not found or an error occurred.
+ */
+static struct buffer_head *
+ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header,
+		      struct mb_cache_entry **pce)
+{
+	__u32 hash = le32_to_cpu(header->h_hash);
+	struct mb_cache_entry *ce;
+
+	if (!header->h_hash)
+		return NULL;  /* never share */
+	ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
+again:
+	ce = mb_cache_entry_find_first(ext3_xattr_cache, 0,
+				       inode->i_sb->s_bdev, hash);
+	while (ce) {
+		struct buffer_head *bh;
+
+		if (IS_ERR(ce)) {
+			if (PTR_ERR(ce) == -EAGAIN)
+				goto again;
+			break;
+		}
+		bh = sb_bread(inode->i_sb, ce->e_block);
+		if (!bh) {
+			ext3_error(inode->i_sb, __FUNCTION__,
+				"inode %lu: block %lu read error",
+				inode->i_ino, (unsigned long) ce->e_block);
+		} else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
+				EXT3_XATTR_REFCOUNT_MAX) {
+			ea_idebug(inode, "block %lu refcount %d>=%d",
+				  (unsigned long) ce->e_block,
+				  le32_to_cpu(BHDR(bh)->h_refcount),
+					  EXT3_XATTR_REFCOUNT_MAX);
+		} else if (ext3_xattr_cmp(header, BHDR(bh)) == 0) {
+			*pce = ce;
+			return bh;
+		}
+		brelse(bh);
+		ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash);
+	}
+	return NULL;
+}
+
+#define NAME_HASH_SHIFT 5
+#define VALUE_HASH_SHIFT 16
+
+/*
+ * ext3_xattr_hash_entry()
+ *
+ * Compute the hash of an extended attribute.
+ */
+static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header,
+					 struct ext3_xattr_entry *entry)
+{
+	__u32 hash = 0;
+	char *name = entry->e_name;
+	int n;
+
+	for (n=0; n < entry->e_name_len; n++) {
+		hash = (hash << NAME_HASH_SHIFT) ^
+		       (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
+		       *name++;
+	}
+
+	if (entry->e_value_block == 0 && entry->e_value_size != 0) {
+		__le32 *value = (__le32 *)((char *)header +
+			le16_to_cpu(entry->e_value_offs));
+		for (n = (le32_to_cpu(entry->e_value_size) +
+		     EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) {
+			hash = (hash << VALUE_HASH_SHIFT) ^
+			       (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
+			       le32_to_cpu(*value++);
+		}
+	}
+	entry->e_hash = cpu_to_le32(hash);
+}
+
+#undef NAME_HASH_SHIFT
+#undef VALUE_HASH_SHIFT
+
+#define BLOCK_HASH_SHIFT 16
+
+/*
+ * ext3_xattr_rehash()
+ *
+ * Re-compute the extended attribute hash value after an entry has changed.
+ */
+static void ext3_xattr_rehash(struct ext3_xattr_header *header,
+			      struct ext3_xattr_entry *entry)
+{
+	struct ext3_xattr_entry *here;
+	__u32 hash = 0;
+
+	ext3_xattr_hash_entry(header, entry);
+	here = ENTRY(header+1);
+	while (!IS_LAST_ENTRY(here)) {
+		if (!here->e_hash) {
+			/* Block is not shared if an entry's hash value == 0 */
+			hash = 0;
+			break;
+		}
+		hash = (hash << BLOCK_HASH_SHIFT) ^
+		       (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
+		       le32_to_cpu(here->e_hash);
+		here = EXT3_XATTR_NEXT(here);
+	}
+	header->h_hash = cpu_to_le32(hash);
+}
+
+#undef BLOCK_HASH_SHIFT
+
+int __init
+init_ext3_xattr(void)
+{
+	ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL,
+		sizeof(struct mb_cache_entry) +
+		sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
+	if (!ext3_xattr_cache)
+		return -ENOMEM;
+	return 0;
+}
+
+void
+exit_ext3_xattr(void)
+{
+	if (ext3_xattr_cache)
+		mb_cache_destroy(ext3_xattr_cache);
+	ext3_xattr_cache = NULL;
+}
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
new file mode 100644
index 000000000000..6b1ae1c6182c
--- /dev/null
+++ b/fs/ext4/xattr.h
@@ -0,0 +1,145 @@
+/*
+  File: fs/ext3/xattr.h
+
+  On-disk format of extended attributes for the ext3 filesystem.
+
+  (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
+*/
+
+#include <linux/xattr.h>
+
+/* Magic value in attribute blocks */
+#define EXT3_XATTR_MAGIC		0xEA020000
+
+/* Maximum number of references to one attribute block */
+#define EXT3_XATTR_REFCOUNT_MAX		1024
+
+/* Name indexes */
+#define EXT3_XATTR_INDEX_USER			1
+#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS	2
+#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT	3
+#define EXT3_XATTR_INDEX_TRUSTED		4
+#define	EXT3_XATTR_INDEX_LUSTRE			5
+#define EXT3_XATTR_INDEX_SECURITY	        6
+
+struct ext3_xattr_header {
+	__le32	h_magic;	/* magic number for identification */
+	__le32	h_refcount;	/* reference count */
+	__le32	h_blocks;	/* number of disk blocks used */
+	__le32	h_hash;		/* hash value of all attributes */
+	__u32	h_reserved[4];	/* zero right now */
+};
+
+struct ext3_xattr_ibody_header {
+	__le32	h_magic;	/* magic number for identification */
+};
+
+struct ext3_xattr_entry {
+	__u8	e_name_len;	/* length of name */
+	__u8	e_name_index;	/* attribute name index */
+	__le16	e_value_offs;	/* offset in disk block of value */
+	__le32	e_value_block;	/* disk block attribute is stored on (n/i) */
+	__le32	e_value_size;	/* size of attribute value */
+	__le32	e_hash;		/* hash value of name and value */
+	char	e_name[0];	/* attribute name */
+};
+
+#define EXT3_XATTR_PAD_BITS		2
+#define EXT3_XATTR_PAD		(1<<EXT3_XATTR_PAD_BITS)
+#define EXT3_XATTR_ROUND		(EXT3_XATTR_PAD-1)
+#define EXT3_XATTR_LEN(name_len) \
+	(((name_len) + EXT3_XATTR_ROUND + \
+	sizeof(struct ext3_xattr_entry)) & ~EXT3_XATTR_ROUND)
+#define EXT3_XATTR_NEXT(entry) \
+	( (struct ext3_xattr_entry *)( \
+	  (char *)(entry) + EXT3_XATTR_LEN((entry)->e_name_len)) )
+#define EXT3_XATTR_SIZE(size) \
+	(((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND)
+
+# ifdef CONFIG_EXT3_FS_XATTR
+
+extern struct xattr_handler ext3_xattr_user_handler;
+extern struct xattr_handler ext3_xattr_trusted_handler;
+extern struct xattr_handler ext3_xattr_acl_access_handler;
+extern struct xattr_handler ext3_xattr_acl_default_handler;
+extern struct xattr_handler ext3_xattr_security_handler;
+
+extern ssize_t ext3_listxattr(struct dentry *, char *, size_t);
+
+extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
+extern int ext3_xattr_list(struct inode *, char *, size_t);
+extern int ext3_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
+extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
+
+extern void ext3_xattr_delete_inode(handle_t *, struct inode *);
+extern void ext3_xattr_put_super(struct super_block *);
+
+extern int init_ext3_xattr(void);
+extern void exit_ext3_xattr(void);
+
+extern struct xattr_handler *ext3_xattr_handlers[];
+
+# else  /* CONFIG_EXT3_FS_XATTR */
+
+static inline int
+ext3_xattr_get(struct inode *inode, int name_index, const char *name,
+	       void *buffer, size_t size, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ext3_xattr_list(struct inode *inode, void *buffer, size_t size)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ext3_xattr_set(struct inode *inode, int name_index, const char *name,
+	       const void *value, size_t size, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
+	       const char *name, const void *value, size_t size, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void
+ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
+{
+}
+
+static inline void
+ext3_xattr_put_super(struct super_block *sb)
+{
+}
+
+static inline int
+init_ext3_xattr(void)
+{
+	return 0;
+}
+
+static inline void
+exit_ext3_xattr(void)
+{
+}
+
+#define ext3_xattr_handlers	NULL
+
+# endif  /* CONFIG_EXT3_FS_XATTR */
+
+#ifdef CONFIG_EXT3_FS_SECURITY
+extern int ext3_init_security(handle_t *handle, struct inode *inode,
+				struct inode *dir);
+#else
+static inline int ext3_init_security(handle_t *handle, struct inode *inode,
+				struct inode *dir)
+{
+	return 0;
+}
+#endif
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
new file mode 100644
index 000000000000..b9c40c15647b
--- /dev/null
+++ b/fs/ext4/xattr_security.c
@@ -0,0 +1,77 @@
+/*
+ * linux/fs/ext3/xattr_security.c
+ * Handler for storing security labels as extended attributes.
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/smp_lock.h>
+#include <linux/ext3_jbd.h>
+#include <linux/ext3_fs.h>
+#include <linux/security.h>
+#include "xattr.h"
+
+static size_t
+ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
+			 const char *name, size_t name_len)
+{
+	const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
+	const size_t total_len = prefix_len + name_len + 1;
+
+
+	if (list && total_len <= list_size) {
+		memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
+		memcpy(list+prefix_len, name, name_len);
+		list[prefix_len + name_len] = '\0';
+	}
+	return total_len;
+}
+
+static int
+ext3_xattr_security_get(struct inode *inode, const char *name,
+		       void *buffer, size_t size)
+{
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+	return ext3_xattr_get(inode, EXT3_XATTR_INDEX_SECURITY, name,
+			      buffer, size);
+}
+
+static int
+ext3_xattr_security_set(struct inode *inode, const char *name,
+		       const void *value, size_t size, int flags)
+{
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+	return ext3_xattr_set(inode, EXT3_XATTR_INDEX_SECURITY, name,
+			      value, size, flags);
+}
+
+int
+ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir)
+{
+	int err;
+	size_t len;
+	void *value;
+	char *name;
+
+	err = security_inode_init_security(inode, dir, &name, &value, &len);
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			return 0;
+		return err;
+	}
+	err = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_SECURITY,
+				    name, value, len, 0);
+	kfree(name);
+	kfree(value);
+	return err;
+}
+
+struct xattr_handler ext3_xattr_security_handler = {
+	.prefix	= XATTR_SECURITY_PREFIX,
+	.list	= ext3_xattr_security_list,
+	.get	= ext3_xattr_security_get,
+	.set	= ext3_xattr_security_set,
+};
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
new file mode 100644
index 000000000000..86d91f1186dc
--- /dev/null
+++ b/fs/ext4/xattr_trusted.c
@@ -0,0 +1,62 @@
+/*
+ * linux/fs/ext3/xattr_trusted.c
+ * Handler for trusted extended attributes.
+ *
+ * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/capability.h>
+#include <linux/fs.h>
+#include <linux/smp_lock.h>
+#include <linux/ext3_jbd.h>
+#include <linux/ext3_fs.h>
+#include "xattr.h"
+
+#define XATTR_TRUSTED_PREFIX "trusted."
+
+static size_t
+ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
+			const char *name, size_t name_len)
+{
+	const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1;
+	const size_t total_len = prefix_len + name_len + 1;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return 0;
+
+	if (list && total_len <= list_size) {
+		memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
+		memcpy(list+prefix_len, name, name_len);
+		list[prefix_len + name_len] = '\0';
+	}
+	return total_len;
+}
+
+static int
+ext3_xattr_trusted_get(struct inode *inode, const char *name,
+		       void *buffer, size_t size)
+{
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+	return ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, name,
+			      buffer, size);
+}
+
+static int
+ext3_xattr_trusted_set(struct inode *inode, const char *name,
+		       const void *value, size_t size, int flags)
+{
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+	return ext3_xattr_set(inode, EXT3_XATTR_INDEX_TRUSTED, name,
+			      value, size, flags);
+}
+
+struct xattr_handler ext3_xattr_trusted_handler = {
+	.prefix	= XATTR_TRUSTED_PREFIX,
+	.list	= ext3_xattr_trusted_list,
+	.get	= ext3_xattr_trusted_get,
+	.set	= ext3_xattr_trusted_set,
+};
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
new file mode 100644
index 000000000000..a85a0a17c4fd
--- /dev/null
+++ b/fs/ext4/xattr_user.c
@@ -0,0 +1,64 @@
+/*
+ * linux/fs/ext3/xattr_user.c
+ * Handler for extended user attributes.
+ *
+ * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/smp_lock.h>
+#include <linux/ext3_jbd.h>
+#include <linux/ext3_fs.h>
+#include "xattr.h"
+
+#define XATTR_USER_PREFIX "user."
+
+static size_t
+ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
+		     const char *name, size_t name_len)
+{
+	const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1;
+	const size_t total_len = prefix_len + name_len + 1;
+
+	if (!test_opt(inode->i_sb, XATTR_USER))
+		return 0;
+
+	if (list && total_len <= list_size) {
+		memcpy(list, XATTR_USER_PREFIX, prefix_len);
+		memcpy(list+prefix_len, name, name_len);
+		list[prefix_len + name_len] = '\0';
+	}
+	return total_len;
+}
+
+static int
+ext3_xattr_user_get(struct inode *inode, const char *name,
+		    void *buffer, size_t size)
+{
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+	if (!test_opt(inode->i_sb, XATTR_USER))
+		return -EOPNOTSUPP;
+	return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, buffer, size);
+}
+
+static int
+ext3_xattr_user_set(struct inode *inode, const char *name,
+		    const void *value, size_t size, int flags)
+{
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+	if (!test_opt(inode->i_sb, XATTR_USER))
+		return -EOPNOTSUPP;
+	return ext3_xattr_set(inode, EXT3_XATTR_INDEX_USER, name,
+			      value, size, flags);
+}
+
+struct xattr_handler ext3_xattr_user_handler = {
+	.prefix	= XATTR_USER_PREFIX,
+	.list	= ext3_xattr_user_list,
+	.get	= ext3_xattr_user_get,
+	.set	= ext3_xattr_user_set,
+};
-- 
cgit v1.2.3


From 617ba13b31fbf505cc21799826639ef24ed94af0 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Wed, 11 Oct 2006 01:20:53 -0700
Subject: [PATCH] ext4: rename ext4 symbols to avoid duplication of ext3
 symbols

Mingming Cao originally did this work, and Shaggy reproduced it using some
scripts from her.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/Makefile         |   12 +-
 fs/ext4/acl.c            |  188 ++++----
 fs/ext4/acl.h            |   58 +--
 fs/ext4/balloc.c         |  536 +++++++++++-----------
 fs/ext4/bitmap.c         |   10 +-
 fs/ext4/dir.c            |  102 ++---
 fs/ext4/file.c           |   50 +--
 fs/ext4/fsync.c          |   20 +-
 fs/ext4/hash.c           |   10 +-
 fs/ext4/ialloc.c         |  230 +++++-----
 fs/ext4/inode.c          | 1020 +++++++++++++++++++++---------------------
 fs/ext4/ioctl.c          |  156 +++----
 fs/ext4/namei.c          |  830 +++++++++++++++++-----------------
 fs/ext4/namei.h          |    4 +-
 fs/ext4/resize.c         |  412 ++++++++---------
 fs/ext4/super.c          | 1114 +++++++++++++++++++++++-----------------------
 fs/ext4/symlink.c        |   24 +-
 fs/ext4/xattr.c          |  560 +++++++++++------------
 fs/ext4/xattr.h          |  110 ++---
 fs/ext4/xattr_security.c |   28 +-
 fs/ext4/xattr_trusted.c  |   24 +-
 fs/ext4/xattr_user.c     |   24 +-
 22 files changed, 2761 insertions(+), 2761 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 704cd44a40c2..09c487893e4a 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -1,12 +1,12 @@
 #
-# Makefile for the linux ext3-filesystem routines.
+# Makefile for the linux ext4-filesystem routines.
 #
 
-obj-$(CONFIG_EXT3_FS) += ext3.o
+obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o
 
-ext3-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+ext4dev-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
 	   ioctl.o namei.o super.o symlink.o hash.o resize.o
 
-ext3-$(CONFIG_EXT3_FS_XATTR)	 += xattr.o xattr_user.o xattr_trusted.o
-ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
-ext3-$(CONFIG_EXT3_FS_SECURITY)	 += xattr_security.o
+ext4dev-$(CONFIG_EXT4DEV_FS_XATTR)	+= xattr.o xattr_user.o xattr_trusted.o
+ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL)	+= acl.o
+ext4dev-$(CONFIG_EXT4DEV_FS_SECURITY)	+= xattr_security.o
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 1e5038d9a01b..d143489aeb4c 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -1,5 +1,5 @@
 /*
- * linux/fs/ext3/acl.c
+ * linux/fs/ext4/acl.c
  *
  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
  */
@@ -9,8 +9,8 @@
 #include <linux/slab.h>
 #include <linux/capability.h>
 #include <linux/fs.h>
-#include <linux/ext3_jbd.h>
-#include <linux/ext3_fs.h>
+#include <linux/ext4_jbd.h>
+#include <linux/ext4_fs.h>
 #include "xattr.h"
 #include "acl.h"
 
@@ -18,7 +18,7 @@
  * Convert from filesystem to in-memory representation.
  */
 static struct posix_acl *
-ext3_acl_from_disk(const void *value, size_t size)
+ext4_acl_from_disk(const void *value, size_t size)
 {
 	const char *end = (char *)value + size;
 	int n, count;
@@ -26,13 +26,13 @@ ext3_acl_from_disk(const void *value, size_t size)
 
 	if (!value)
 		return NULL;
-	if (size < sizeof(ext3_acl_header))
+	if (size < sizeof(ext4_acl_header))
 		 return ERR_PTR(-EINVAL);
-	if (((ext3_acl_header *)value)->a_version !=
-	    cpu_to_le32(EXT3_ACL_VERSION))
+	if (((ext4_acl_header *)value)->a_version !=
+	    cpu_to_le32(EXT4_ACL_VERSION))
 		return ERR_PTR(-EINVAL);
-	value = (char *)value + sizeof(ext3_acl_header);
-	count = ext3_acl_count(size);
+	value = (char *)value + sizeof(ext4_acl_header);
+	count = ext4_acl_count(size);
 	if (count < 0)
 		return ERR_PTR(-EINVAL);
 	if (count == 0)
@@ -41,9 +41,9 @@ ext3_acl_from_disk(const void *value, size_t size)
 	if (!acl)
 		return ERR_PTR(-ENOMEM);
 	for (n=0; n < count; n++) {
-		ext3_acl_entry *entry =
-			(ext3_acl_entry *)value;
-		if ((char *)value + sizeof(ext3_acl_entry_short) > end)
+		ext4_acl_entry *entry =
+			(ext4_acl_entry *)value;
+		if ((char *)value + sizeof(ext4_acl_entry_short) > end)
 			goto fail;
 		acl->a_entries[n].e_tag  = le16_to_cpu(entry->e_tag);
 		acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
@@ -53,13 +53,13 @@ ext3_acl_from_disk(const void *value, size_t size)
 			case ACL_MASK:
 			case ACL_OTHER:
 				value = (char *)value +
-					sizeof(ext3_acl_entry_short);
+					sizeof(ext4_acl_entry_short);
 				acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
 				break;
 
 			case ACL_USER:
 			case ACL_GROUP:
-				value = (char *)value + sizeof(ext3_acl_entry);
+				value = (char *)value + sizeof(ext4_acl_entry);
 				if ((char *)value > end)
 					goto fail;
 				acl->a_entries[n].e_id =
@@ -83,21 +83,21 @@ fail:
  * Convert from in-memory to filesystem representation.
  */
 static void *
-ext3_acl_to_disk(const struct posix_acl *acl, size_t *size)
+ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
 {
-	ext3_acl_header *ext_acl;
+	ext4_acl_header *ext_acl;
 	char *e;
 	size_t n;
 
-	*size = ext3_acl_size(acl->a_count);
-	ext_acl = kmalloc(sizeof(ext3_acl_header) + acl->a_count *
-			sizeof(ext3_acl_entry), GFP_KERNEL);
+	*size = ext4_acl_size(acl->a_count);
+	ext_acl = kmalloc(sizeof(ext4_acl_header) + acl->a_count *
+			sizeof(ext4_acl_entry), GFP_KERNEL);
 	if (!ext_acl)
 		return ERR_PTR(-ENOMEM);
-	ext_acl->a_version = cpu_to_le32(EXT3_ACL_VERSION);
-	e = (char *)ext_acl + sizeof(ext3_acl_header);
+	ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
+	e = (char *)ext_acl + sizeof(ext4_acl_header);
 	for (n=0; n < acl->a_count; n++) {
-		ext3_acl_entry *entry = (ext3_acl_entry *)e;
+		ext4_acl_entry *entry = (ext4_acl_entry *)e;
 		entry->e_tag  = cpu_to_le16(acl->a_entries[n].e_tag);
 		entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
 		switch(acl->a_entries[n].e_tag) {
@@ -105,14 +105,14 @@ ext3_acl_to_disk(const struct posix_acl *acl, size_t *size)
 			case ACL_GROUP:
 				entry->e_id =
 					cpu_to_le32(acl->a_entries[n].e_id);
-				e += sizeof(ext3_acl_entry);
+				e += sizeof(ext4_acl_entry);
 				break;
 
 			case ACL_USER_OBJ:
 			case ACL_GROUP_OBJ:
 			case ACL_MASK:
 			case ACL_OTHER:
-				e += sizeof(ext3_acl_entry_short);
+				e += sizeof(ext4_acl_entry_short);
 				break;
 
 			default:
@@ -127,12 +127,12 @@ fail:
 }
 
 static inline struct posix_acl *
-ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl)
+ext4_iget_acl(struct inode *inode, struct posix_acl **i_acl)
 {
-	struct posix_acl *acl = EXT3_ACL_NOT_CACHED;
+	struct posix_acl *acl = EXT4_ACL_NOT_CACHED;
 
 	spin_lock(&inode->i_lock);
-	if (*i_acl != EXT3_ACL_NOT_CACHED)
+	if (*i_acl != EXT4_ACL_NOT_CACHED)
 		acl = posix_acl_dup(*i_acl);
 	spin_unlock(&inode->i_lock);
 
@@ -140,11 +140,11 @@ ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl)
 }
 
 static inline void
-ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
+ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl,
                   struct posix_acl *acl)
 {
 	spin_lock(&inode->i_lock);
-	if (*i_acl != EXT3_ACL_NOT_CACHED)
+	if (*i_acl != EXT4_ACL_NOT_CACHED)
 		posix_acl_release(*i_acl);
 	*i_acl = posix_acl_dup(acl);
 	spin_unlock(&inode->i_lock);
@@ -156,9 +156,9 @@ ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
  * inode->i_mutex: don't care
  */
 static struct posix_acl *
-ext3_get_acl(struct inode *inode, int type)
+ext4_get_acl(struct inode *inode, int type)
 {
-	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext4_inode_info *ei = EXT4_I(inode);
 	int name_index;
 	char *value = NULL;
 	struct posix_acl *acl;
@@ -169,31 +169,31 @@ ext3_get_acl(struct inode *inode, int type)
 
 	switch(type) {
 		case ACL_TYPE_ACCESS:
-			acl = ext3_iget_acl(inode, &ei->i_acl);
-			if (acl != EXT3_ACL_NOT_CACHED)
+			acl = ext4_iget_acl(inode, &ei->i_acl);
+			if (acl != EXT4_ACL_NOT_CACHED)
 				return acl;
-			name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
+			name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
 			break;
 
 		case ACL_TYPE_DEFAULT:
-			acl = ext3_iget_acl(inode, &ei->i_default_acl);
-			if (acl != EXT3_ACL_NOT_CACHED)
+			acl = ext4_iget_acl(inode, &ei->i_default_acl);
+			if (acl != EXT4_ACL_NOT_CACHED)
 				return acl;
-			name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
+			name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
 			break;
 
 		default:
 			return ERR_PTR(-EINVAL);
 	}
-	retval = ext3_xattr_get(inode, name_index, "", NULL, 0);
+	retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
 	if (retval > 0) {
 		value = kmalloc(retval, GFP_KERNEL);
 		if (!value)
 			return ERR_PTR(-ENOMEM);
-		retval = ext3_xattr_get(inode, name_index, "", value, retval);
+		retval = ext4_xattr_get(inode, name_index, "", value, retval);
 	}
 	if (retval > 0)
-		acl = ext3_acl_from_disk(value, retval);
+		acl = ext4_acl_from_disk(value, retval);
 	else if (retval == -ENODATA || retval == -ENOSYS)
 		acl = NULL;
 	else
@@ -203,11 +203,11 @@ ext3_get_acl(struct inode *inode, int type)
 	if (!IS_ERR(acl)) {
 		switch(type) {
 			case ACL_TYPE_ACCESS:
-				ext3_iset_acl(inode, &ei->i_acl, acl);
+				ext4_iset_acl(inode, &ei->i_acl, acl);
 				break;
 
 			case ACL_TYPE_DEFAULT:
-				ext3_iset_acl(inode, &ei->i_default_acl, acl);
+				ext4_iset_acl(inode, &ei->i_default_acl, acl);
 				break;
 		}
 	}
@@ -217,13 +217,13 @@ ext3_get_acl(struct inode *inode, int type)
 /*
  * Set the access or default ACL of an inode.
  *
- * inode->i_mutex: down unless called from ext3_new_inode
+ * inode->i_mutex: down unless called from ext4_new_inode
  */
 static int
-ext3_set_acl(handle_t *handle, struct inode *inode, int type,
+ext4_set_acl(handle_t *handle, struct inode *inode, int type,
 	     struct posix_acl *acl)
 {
-	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext4_inode_info *ei = EXT4_I(inode);
 	int name_index;
 	void *value = NULL;
 	size_t size = 0;
@@ -234,7 +234,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
 
 	switch(type) {
 		case ACL_TYPE_ACCESS:
-			name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
+			name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
 			if (acl) {
 				mode_t mode = inode->i_mode;
 				error = posix_acl_equiv_mode(acl, &mode);
@@ -242,7 +242,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
 					return error;
 				else {
 					inode->i_mode = mode;
-					ext3_mark_inode_dirty(handle, inode);
+					ext4_mark_inode_dirty(handle, inode);
 					if (error == 0)
 						acl = NULL;
 				}
@@ -250,7 +250,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
 			break;
 
 		case ACL_TYPE_DEFAULT:
-			name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
+			name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
 			if (!S_ISDIR(inode->i_mode))
 				return acl ? -EACCES : 0;
 			break;
@@ -259,23 +259,23 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
 			return -EINVAL;
 	}
 	if (acl) {
-		value = ext3_acl_to_disk(acl, &size);
+		value = ext4_acl_to_disk(acl, &size);
 		if (IS_ERR(value))
 			return (int)PTR_ERR(value);
 	}
 
-	error = ext3_xattr_set_handle(handle, inode, name_index, "",
+	error = ext4_xattr_set_handle(handle, inode, name_index, "",
 				      value, size, 0);
 
 	kfree(value);
 	if (!error) {
 		switch(type) {
 			case ACL_TYPE_ACCESS:
-				ext3_iset_acl(inode, &ei->i_acl, acl);
+				ext4_iset_acl(inode, &ei->i_acl, acl);
 				break;
 
 			case ACL_TYPE_DEFAULT:
-				ext3_iset_acl(inode, &ei->i_default_acl, acl);
+				ext4_iset_acl(inode, &ei->i_default_acl, acl);
 				break;
 		}
 	}
@@ -283,9 +283,9 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
 }
 
 static int
-ext3_check_acl(struct inode *inode, int mask)
+ext4_check_acl(struct inode *inode, int mask)
 {
-	struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
+	struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS);
 
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
@@ -299,26 +299,26 @@ ext3_check_acl(struct inode *inode, int mask)
 }
 
 int
-ext3_permission(struct inode *inode, int mask, struct nameidata *nd)
+ext4_permission(struct inode *inode, int mask, struct nameidata *nd)
 {
-	return generic_permission(inode, mask, ext3_check_acl);
+	return generic_permission(inode, mask, ext4_check_acl);
 }
 
 /*
- * Initialize the ACLs of a new inode. Called from ext3_new_inode.
+ * Initialize the ACLs of a new inode. Called from ext4_new_inode.
  *
  * dir->i_mutex: down
  * inode->i_mutex: up (access to inode is still exclusive)
  */
 int
-ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
+ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
 {
 	struct posix_acl *acl = NULL;
 	int error = 0;
 
 	if (!S_ISLNK(inode->i_mode)) {
 		if (test_opt(dir->i_sb, POSIX_ACL)) {
-			acl = ext3_get_acl(dir, ACL_TYPE_DEFAULT);
+			acl = ext4_get_acl(dir, ACL_TYPE_DEFAULT);
 			if (IS_ERR(acl))
 				return PTR_ERR(acl);
 		}
@@ -330,7 +330,7 @@ ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
 		mode_t mode;
 
 		if (S_ISDIR(inode->i_mode)) {
-			error = ext3_set_acl(handle, inode,
+			error = ext4_set_acl(handle, inode,
 					     ACL_TYPE_DEFAULT, acl);
 			if (error)
 				goto cleanup;
@@ -346,7 +346,7 @@ ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
 			inode->i_mode = mode;
 			if (error > 0) {
 				/* This is an extended ACL */
-				error = ext3_set_acl(handle, inode,
+				error = ext4_set_acl(handle, inode,
 						     ACL_TYPE_ACCESS, clone);
 			}
 		}
@@ -372,7 +372,7 @@ cleanup:
  * inode->i_mutex: down
  */
 int
-ext3_acl_chmod(struct inode *inode)
+ext4_acl_chmod(struct inode *inode)
 {
 	struct posix_acl *acl, *clone;
         int error;
@@ -381,7 +381,7 @@ ext3_acl_chmod(struct inode *inode)
 		return -EOPNOTSUPP;
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return 0;
-	acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
+	acl = ext4_get_acl(inode, ACL_TYPE_ACCESS);
 	if (IS_ERR(acl) || !acl)
 		return PTR_ERR(acl);
 	clone = posix_acl_clone(acl, GFP_KERNEL);
@@ -394,17 +394,17 @@ ext3_acl_chmod(struct inode *inode)
 		int retries = 0;
 
 	retry:
-		handle = ext3_journal_start(inode,
-				EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
+		handle = ext4_journal_start(inode,
+				EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
 		if (IS_ERR(handle)) {
 			error = PTR_ERR(handle);
-			ext3_std_error(inode->i_sb, error);
+			ext4_std_error(inode->i_sb, error);
 			goto out;
 		}
-		error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, clone);
-		ext3_journal_stop(handle);
+		error = ext4_set_acl(handle, inode, ACL_TYPE_ACCESS, clone);
+		ext4_journal_stop(handle);
 		if (error == -ENOSPC &&
-		    ext3_should_retry_alloc(inode->i_sb, &retries))
+		    ext4_should_retry_alloc(inode->i_sb, &retries))
 			goto retry;
 	}
 out:
@@ -416,7 +416,7 @@ out:
  * Extended attribute handlers
  */
 static size_t
-ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
+ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
 			   const char *name, size_t name_len)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
@@ -429,7 +429,7 @@ ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
 }
 
 static size_t
-ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
+ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
 			    const char *name, size_t name_len)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
@@ -442,7 +442,7 @@ ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
 }
 
 static int
-ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
+ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
 {
 	struct posix_acl *acl;
 	int error;
@@ -450,7 +450,7 @@ ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return -EOPNOTSUPP;
 
-	acl = ext3_get_acl(inode, type);
+	acl = ext4_get_acl(inode, type);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -462,25 +462,25 @@ ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
 }
 
 static int
-ext3_xattr_get_acl_access(struct inode *inode, const char *name,
+ext4_xattr_get_acl_access(struct inode *inode, const char *name,
 			  void *buffer, size_t size)
 {
 	if (strcmp(name, "") != 0)
 		return -EINVAL;
-	return ext3_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
+	return ext4_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
 }
 
 static int
-ext3_xattr_get_acl_default(struct inode *inode, const char *name,
+ext4_xattr_get_acl_default(struct inode *inode, const char *name,
 			   void *buffer, size_t size)
 {
 	if (strcmp(name, "") != 0)
 		return -EINVAL;
-	return ext3_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
+	return ext4_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
 }
 
 static int
-ext3_xattr_set_acl(struct inode *inode, int type, const void *value,
+ext4_xattr_set_acl(struct inode *inode, int type, const void *value,
 		   size_t size)
 {
 	handle_t *handle;
@@ -505,12 +505,12 @@ ext3_xattr_set_acl(struct inode *inode, int type, const void *value,
 		acl = NULL;
 
 retry:
-	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
+	handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
-	error = ext3_set_acl(handle, inode, type, acl);
-	ext3_journal_stop(handle);
-	if (error == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
+	error = ext4_set_acl(handle, inode, type, acl);
+	ext4_journal_stop(handle);
+	if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
 
 release_and_out:
@@ -519,33 +519,33 @@ release_and_out:
 }
 
 static int
-ext3_xattr_set_acl_access(struct inode *inode, const char *name,
+ext4_xattr_set_acl_access(struct inode *inode, const char *name,
 			  const void *value, size_t size, int flags)
 {
 	if (strcmp(name, "") != 0)
 		return -EINVAL;
-	return ext3_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
+	return ext4_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
 }
 
 static int
-ext3_xattr_set_acl_default(struct inode *inode, const char *name,
+ext4_xattr_set_acl_default(struct inode *inode, const char *name,
 			   const void *value, size_t size, int flags)
 {
 	if (strcmp(name, "") != 0)
 		return -EINVAL;
-	return ext3_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
+	return ext4_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
 }
 
-struct xattr_handler ext3_xattr_acl_access_handler = {
+struct xattr_handler ext4_xattr_acl_access_handler = {
 	.prefix	= POSIX_ACL_XATTR_ACCESS,
-	.list	= ext3_xattr_list_acl_access,
-	.get	= ext3_xattr_get_acl_access,
-	.set	= ext3_xattr_set_acl_access,
+	.list	= ext4_xattr_list_acl_access,
+	.get	= ext4_xattr_get_acl_access,
+	.set	= ext4_xattr_set_acl_access,
 };
 
-struct xattr_handler ext3_xattr_acl_default_handler = {
+struct xattr_handler ext4_xattr_acl_default_handler = {
 	.prefix	= POSIX_ACL_XATTR_DEFAULT,
-	.list	= ext3_xattr_list_acl_default,
-	.get	= ext3_xattr_get_acl_default,
-	.set	= ext3_xattr_set_acl_default,
+	.list	= ext4_xattr_list_acl_default,
+	.get	= ext4_xattr_get_acl_default,
+	.set	= ext4_xattr_set_acl_default,
 };
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 0d1e6279cbfd..26a5c1abf147 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -1,81 +1,81 @@
 /*
-  File: fs/ext3/acl.h
+  File: fs/ext4/acl.h
 
   (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
 */
 
 #include <linux/posix_acl_xattr.h>
 
-#define EXT3_ACL_VERSION	0x0001
+#define EXT4_ACL_VERSION	0x0001
 
 typedef struct {
 	__le16		e_tag;
 	__le16		e_perm;
 	__le32		e_id;
-} ext3_acl_entry;
+} ext4_acl_entry;
 
 typedef struct {
 	__le16		e_tag;
 	__le16		e_perm;
-} ext3_acl_entry_short;
+} ext4_acl_entry_short;
 
 typedef struct {
 	__le32		a_version;
-} ext3_acl_header;
+} ext4_acl_header;
 
-static inline size_t ext3_acl_size(int count)
+static inline size_t ext4_acl_size(int count)
 {
 	if (count <= 4) {
-		return sizeof(ext3_acl_header) +
-		       count * sizeof(ext3_acl_entry_short);
+		return sizeof(ext4_acl_header) +
+		       count * sizeof(ext4_acl_entry_short);
 	} else {
-		return sizeof(ext3_acl_header) +
-		       4 * sizeof(ext3_acl_entry_short) +
-		       (count - 4) * sizeof(ext3_acl_entry);
+		return sizeof(ext4_acl_header) +
+		       4 * sizeof(ext4_acl_entry_short) +
+		       (count - 4) * sizeof(ext4_acl_entry);
 	}
 }
 
-static inline int ext3_acl_count(size_t size)
+static inline int ext4_acl_count(size_t size)
 {
 	ssize_t s;
-	size -= sizeof(ext3_acl_header);
-	s = size - 4 * sizeof(ext3_acl_entry_short);
+	size -= sizeof(ext4_acl_header);
+	s = size - 4 * sizeof(ext4_acl_entry_short);
 	if (s < 0) {
-		if (size % sizeof(ext3_acl_entry_short))
+		if (size % sizeof(ext4_acl_entry_short))
 			return -1;
-		return size / sizeof(ext3_acl_entry_short);
+		return size / sizeof(ext4_acl_entry_short);
 	} else {
-		if (s % sizeof(ext3_acl_entry))
+		if (s % sizeof(ext4_acl_entry))
 			return -1;
-		return s / sizeof(ext3_acl_entry) + 4;
+		return s / sizeof(ext4_acl_entry) + 4;
 	}
 }
 
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
 
-/* Value for inode->u.ext3_i.i_acl and inode->u.ext3_i.i_default_acl
+/* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl
    if the ACL has not been cached */
-#define EXT3_ACL_NOT_CACHED ((void *)-1)
+#define EXT4_ACL_NOT_CACHED ((void *)-1)
 
 /* acl.c */
-extern int ext3_permission (struct inode *, int, struct nameidata *);
-extern int ext3_acl_chmod (struct inode *);
-extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
+extern int ext4_permission (struct inode *, int, struct nameidata *);
+extern int ext4_acl_chmod (struct inode *);
+extern int ext4_init_acl (handle_t *, struct inode *, struct inode *);
 
-#else  /* CONFIG_EXT3_FS_POSIX_ACL */
+#else  /* CONFIG_EXT4DEV_FS_POSIX_ACL */
 #include <linux/sched.h>
-#define ext3_permission NULL
+#define ext4_permission NULL
 
 static inline int
-ext3_acl_chmod(struct inode *inode)
+ext4_acl_chmod(struct inode *inode)
 {
 	return 0;
 }
 
 static inline int
-ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
+ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
 {
 	return 0;
 }
-#endif  /* CONFIG_EXT3_FS_POSIX_ACL */
+#endif  /* CONFIG_EXT4DEV_FS_POSIX_ACL */
 
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index b41a7d7e20f0..357e4e50374a 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/ext3/balloc.c
+ *  linux/fs/ext4/balloc.c
  *
  * Copyright (C) 1992, 1993, 1994, 1995
  * Remy Card (card@masi.ibp.fr)
@@ -15,8 +15,8 @@
 #include <linux/capability.h>
 #include <linux/fs.h>
 #include <linux/jbd.h>
-#include <linux/ext3_fs.h>
-#include <linux/ext3_jbd.h>
+#include <linux/ext4_fs.h>
+#include <linux/ext4_jbd.h>
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
 
@@ -32,30 +32,30 @@
  * The file system contains group descriptors which are located after the
  * super block.  Each descriptor contains the number of the bitmap block and
  * the free blocks count in the block.  The descriptors are loaded in memory
- * when a file system is mounted (see ext3_read_super).
+ * when a file system is mounted (see ext4_read_super).
  */
 
 
 #define in_range(b, first, len)	((b) >= (first) && (b) <= (first) + (len) - 1)
 
 /**
- * ext3_get_group_desc() -- load group descriptor from disk
+ * ext4_get_group_desc() -- load group descriptor from disk
  * @sb:			super block
  * @block_group:	given block group
  * @bh:			pointer to the buffer head to store the block
  *			group descriptor
  */
-struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
+struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
 					     unsigned int block_group,
 					     struct buffer_head ** bh)
 {
 	unsigned long group_desc;
 	unsigned long offset;
-	struct ext3_group_desc * desc;
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext4_group_desc * desc;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
 	if (block_group >= sbi->s_groups_count) {
-		ext3_error (sb, "ext3_get_group_desc",
+		ext4_error (sb, "ext4_get_group_desc",
 			    "block_group >= groups_count - "
 			    "block_group = %d, groups_count = %lu",
 			    block_group, sbi->s_groups_count);
@@ -64,17 +64,17 @@ struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
 	}
 	smp_rmb();
 
-	group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb);
-	offset = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1);
+	group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
+	offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
 	if (!sbi->s_group_desc[group_desc]) {
-		ext3_error (sb, "ext3_get_group_desc",
+		ext4_error (sb, "ext4_get_group_desc",
 			    "Group descriptor not loaded - "
 			    "block_group = %d, group_desc = %lu, desc = %lu",
 			     block_group, group_desc, offset);
 		return NULL;
 	}
 
-	desc = (struct ext3_group_desc *) sbi->s_group_desc[group_desc]->b_data;
+	desc = (struct ext4_group_desc *) sbi->s_group_desc[group_desc]->b_data;
 	if (bh)
 		*bh = sbi->s_group_desc[group_desc];
 	return desc + offset;
@@ -93,15 +93,15 @@ struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
 static struct buffer_head *
 read_block_bitmap(struct super_block *sb, unsigned int block_group)
 {
-	struct ext3_group_desc * desc;
+	struct ext4_group_desc * desc;
 	struct buffer_head * bh = NULL;
 
-	desc = ext3_get_group_desc (sb, block_group, NULL);
+	desc = ext4_get_group_desc (sb, block_group, NULL);
 	if (!desc)
 		goto error_out;
 	bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
 	if (!bh)
-		ext3_error (sb, "read_block_bitmap",
+		ext4_error (sb, "read_block_bitmap",
 			    "Cannot read block bitmap - "
 			    "block_group = %d, block_bitmap = %u",
 			    block_group, le32_to_cpu(desc->bg_block_bitmap));
@@ -134,7 +134,7 @@ static void __rsv_window_dump(struct rb_root *root, int verbose,
 			      const char *fn)
 {
 	struct rb_node *n;
-	struct ext3_reserve_window_node *rsv, *prev;
+	struct ext4_reserve_window_node *rsv, *prev;
 	int bad;
 
 restart:
@@ -144,7 +144,7 @@ restart:
 
 	printk("Block Allocation Reservation Windows Map (%s):\n", fn);
 	while (n) {
-		rsv = list_entry(n, struct ext3_reserve_window_node, rsv_node);
+		rsv = list_entry(n, struct ext4_reserve_window_node, rsv_node);
 		if (verbose)
 			printk("reservation window 0x%p "
 			       "start:  %lu, end:  %lu\n",
@@ -196,13 +196,13 @@ restart:
  * otherwise, return 0;
  */
 static int
-goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal,
+goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal,
 			unsigned int group, struct super_block * sb)
 {
-	ext3_fsblk_t group_first_block, group_last_block;
+	ext4_fsblk_t group_first_block, group_last_block;
 
-	group_first_block = ext3_group_first_block_no(sb, group);
-	group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
+	group_first_block = ext4_group_first_block_no(sb, group);
+	group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1);
 
 	if ((rsv->_rsv_start > group_last_block) ||
 	    (rsv->_rsv_end < group_first_block))
@@ -222,17 +222,17 @@ goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal,
  * if the goal is not in any window.
  * Returns NULL if there are no windows or if all windows start after the goal.
  */
-static struct ext3_reserve_window_node *
-search_reserve_window(struct rb_root *root, ext3_fsblk_t goal)
+static struct ext4_reserve_window_node *
+search_reserve_window(struct rb_root *root, ext4_fsblk_t goal)
 {
 	struct rb_node *n = root->rb_node;
-	struct ext3_reserve_window_node *rsv;
+	struct ext4_reserve_window_node *rsv;
 
 	if (!n)
 		return NULL;
 
 	do {
-		rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node);
+		rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node);
 
 		if (goal < rsv->rsv_start)
 			n = n->rb_left;
@@ -249,33 +249,33 @@ search_reserve_window(struct rb_root *root, ext3_fsblk_t goal)
 	 */
 	if (rsv->rsv_start > goal) {
 		n = rb_prev(&rsv->rsv_node);
-		rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node);
+		rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node);
 	}
 	return rsv;
 }
 
 /**
- * ext3_rsv_window_add() -- Insert a window to the block reservation rb tree.
+ * ext4_rsv_window_add() -- Insert a window to the block reservation rb tree.
  * @sb:			super block
  * @rsv:		reservation window to add
  *
  * Must be called with rsv_lock hold.
  */
-void ext3_rsv_window_add(struct super_block *sb,
-		    struct ext3_reserve_window_node *rsv)
+void ext4_rsv_window_add(struct super_block *sb,
+		    struct ext4_reserve_window_node *rsv)
 {
-	struct rb_root *root = &EXT3_SB(sb)->s_rsv_window_root;
+	struct rb_root *root = &EXT4_SB(sb)->s_rsv_window_root;
 	struct rb_node *node = &rsv->rsv_node;
-	ext3_fsblk_t start = rsv->rsv_start;
+	ext4_fsblk_t start = rsv->rsv_start;
 
 	struct rb_node ** p = &root->rb_node;
 	struct rb_node * parent = NULL;
-	struct ext3_reserve_window_node *this;
+	struct ext4_reserve_window_node *this;
 
 	while (*p)
 	{
 		parent = *p;
-		this = rb_entry(parent, struct ext3_reserve_window_node, rsv_node);
+		this = rb_entry(parent, struct ext4_reserve_window_node, rsv_node);
 
 		if (start < this->rsv_start)
 			p = &(*p)->rb_left;
@@ -292,7 +292,7 @@ void ext3_rsv_window_add(struct super_block *sb,
 }
 
 /**
- * ext3_rsv_window_remove() -- unlink a window from the reservation rb tree
+ * ext4_rsv_window_remove() -- unlink a window from the reservation rb tree
  * @sb:			super block
  * @rsv:		reservation window to remove
  *
@@ -301,59 +301,59 @@ void ext3_rsv_window_add(struct super_block *sb,
  * rsv_lock hold.
  */
 static void rsv_window_remove(struct super_block *sb,
-			      struct ext3_reserve_window_node *rsv)
+			      struct ext4_reserve_window_node *rsv)
 {
-	rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
-	rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+	rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
+	rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
 	rsv->rsv_alloc_hit = 0;
-	rb_erase(&rsv->rsv_node, &EXT3_SB(sb)->s_rsv_window_root);
+	rb_erase(&rsv->rsv_node, &EXT4_SB(sb)->s_rsv_window_root);
 }
 
 /*
  * rsv_is_empty() -- Check if the reservation window is allocated.
  * @rsv:		given reservation window to check
  *
- * returns 1 if the end block is EXT3_RESERVE_WINDOW_NOT_ALLOCATED.
+ * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED.
  */
-static inline int rsv_is_empty(struct ext3_reserve_window *rsv)
+static inline int rsv_is_empty(struct ext4_reserve_window *rsv)
 {
 	/* a valid reservation end block could not be 0 */
-	return rsv->_rsv_end == EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+	return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
 }
 
 /**
- * ext3_init_block_alloc_info()
+ * ext4_init_block_alloc_info()
  * @inode:		file inode structure
  *
  * Allocate and initialize the	reservation window structure, and
- * link the window to the ext3 inode structure at last
+ * link the window to the ext4 inode structure at last
  *
  * The reservation window structure is only dynamically allocated
- * and linked to ext3 inode the first time the open file
- * needs a new block. So, before every ext3_new_block(s) call, for
+ * and linked to ext4 inode the first time the open file
+ * needs a new block. So, before every ext4_new_block(s) call, for
  * regular files, we should check whether the reservation window
  * structure exists or not. In the latter case, this function is called.
  * Fail to do so will result in block reservation being turned off for that
  * open file.
  *
- * This function is called from ext3_get_blocks_handle(), also called
+ * This function is called from ext4_get_blocks_handle(), also called
  * when setting the reservation window size through ioctl before the file
  * is open for write (needs block allocation).
  *
  * Needs truncate_mutex protection prior to call this function.
  */
-void ext3_init_block_alloc_info(struct inode *inode)
+void ext4_init_block_alloc_info(struct inode *inode)
 {
-	struct ext3_inode_info *ei = EXT3_I(inode);
-	struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info;
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info;
 	struct super_block *sb = inode->i_sb;
 
 	block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
 	if (block_i) {
-		struct ext3_reserve_window_node *rsv = &block_i->rsv_window_node;
+		struct ext4_reserve_window_node *rsv = &block_i->rsv_window_node;
 
-		rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
-		rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+		rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
+		rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
 
 		/*
 		 * if filesystem is mounted with NORESERVATION, the goal
@@ -363,7 +363,7 @@ void ext3_init_block_alloc_info(struct inode *inode)
 		if (!test_opt(sb, RESERVATION))
 			rsv->rsv_goal_size = 0;
 		else
-			rsv->rsv_goal_size = EXT3_DEFAULT_RESERVE_BLOCKS;
+			rsv->rsv_goal_size = EXT4_DEFAULT_RESERVE_BLOCKS;
 		rsv->rsv_alloc_hit = 0;
 		block_i->last_alloc_logical_block = 0;
 		block_i->last_alloc_physical_block = 0;
@@ -372,24 +372,24 @@ void ext3_init_block_alloc_info(struct inode *inode)
 }
 
 /**
- * ext3_discard_reservation()
+ * ext4_discard_reservation()
  * @inode:		inode
  *
  * Discard(free) block reservation window on last file close, or truncate
  * or at last iput().
  *
  * It is being called in three cases:
- *	ext3_release_file(): last writer close the file
- *	ext3_clear_inode(): last iput(), when nobody link to this file.
- *	ext3_truncate(): when the block indirect map is about to change.
+ *	ext4_release_file(): last writer close the file
+ *	ext4_clear_inode(): last iput(), when nobody link to this file.
+ *	ext4_truncate(): when the block indirect map is about to change.
  *
  */
-void ext3_discard_reservation(struct inode *inode)
+void ext4_discard_reservation(struct inode *inode)
 {
-	struct ext3_inode_info *ei = EXT3_I(inode);
-	struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info;
-	struct ext3_reserve_window_node *rsv;
-	spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock;
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info;
+	struct ext4_reserve_window_node *rsv;
+	spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock;
 
 	if (!block_i)
 		return;
@@ -404,62 +404,62 @@ void ext3_discard_reservation(struct inode *inode)
 }
 
 /**
- * ext3_free_blocks_sb() -- Free given blocks and update quota
+ * ext4_free_blocks_sb() -- Free given blocks and update quota
  * @handle:			handle to this transaction
  * @sb:				super block
  * @block:			start physcial block to free
  * @count:			number of blocks to free
  * @pdquot_freed_blocks:	pointer to quota
  */
-void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb,
-			 ext3_fsblk_t block, unsigned long count,
+void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
+			 ext4_fsblk_t block, unsigned long count,
 			 unsigned long *pdquot_freed_blocks)
 {
 	struct buffer_head *bitmap_bh = NULL;
 	struct buffer_head *gd_bh;
 	unsigned long block_group;
-	ext3_grpblk_t bit;
+	ext4_grpblk_t bit;
 	unsigned long i;
 	unsigned long overflow;
-	struct ext3_group_desc * desc;
-	struct ext3_super_block * es;
-	struct ext3_sb_info *sbi;
+	struct ext4_group_desc * desc;
+	struct ext4_super_block * es;
+	struct ext4_sb_info *sbi;
 	int err = 0, ret;
-	ext3_grpblk_t group_freed;
+	ext4_grpblk_t group_freed;
 
 	*pdquot_freed_blocks = 0;
-	sbi = EXT3_SB(sb);
+	sbi = EXT4_SB(sb);
 	es = sbi->s_es;
 	if (block < le32_to_cpu(es->s_first_data_block) ||
 	    block + count < block ||
 	    block + count > le32_to_cpu(es->s_blocks_count)) {
-		ext3_error (sb, "ext3_free_blocks",
+		ext4_error (sb, "ext4_free_blocks",
 			    "Freeing blocks not in datazone - "
 			    "block = "E3FSBLK", count = %lu", block, count);
 		goto error_return;
 	}
 
-	ext3_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
+	ext4_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
 
 do_more:
 	overflow = 0;
 	block_group = (block - le32_to_cpu(es->s_first_data_block)) /
-		      EXT3_BLOCKS_PER_GROUP(sb);
+		      EXT4_BLOCKS_PER_GROUP(sb);
 	bit = (block - le32_to_cpu(es->s_first_data_block)) %
-		      EXT3_BLOCKS_PER_GROUP(sb);
+		      EXT4_BLOCKS_PER_GROUP(sb);
 	/*
 	 * Check to see if we are freeing blocks across a group
 	 * boundary.
 	 */
-	if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
-		overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
+	if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
+		overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb);
 		count -= overflow;
 	}
 	brelse(bitmap_bh);
 	bitmap_bh = read_block_bitmap(sb, block_group);
 	if (!bitmap_bh)
 		goto error_return;
-	desc = ext3_get_group_desc (sb, block_group, &gd_bh);
+	desc = ext4_get_group_desc (sb, block_group, &gd_bh);
 	if (!desc)
 		goto error_return;
 
@@ -469,7 +469,7 @@ do_more:
 		      sbi->s_itb_per_group) ||
 	    in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table),
 		      sbi->s_itb_per_group))
-		ext3_error (sb, "ext3_free_blocks",
+		ext4_error (sb, "ext4_free_blocks",
 			    "Freeing blocks in system zones - "
 			    "Block = "E3FSBLK", count = %lu",
 			    block, count);
@@ -480,7 +480,7 @@ do_more:
 	 */
 	/* @@@ check errors */
 	BUFFER_TRACE(bitmap_bh, "getting undo access");
-	err = ext3_journal_get_undo_access(handle, bitmap_bh);
+	err = ext4_journal_get_undo_access(handle, bitmap_bh);
 	if (err)
 		goto error_return;
 
@@ -490,7 +490,7 @@ do_more:
 	 * using it
 	 */
 	BUFFER_TRACE(gd_bh, "get_write_access");
-	err = ext3_journal_get_write_access(handle, gd_bh);
+	err = ext4_journal_get_write_access(handle, gd_bh);
 	if (err)
 		goto error_return;
 
@@ -542,7 +542,7 @@ do_more:
 		BUFFER_TRACE(bitmap_bh, "set in b_committed_data");
 		J_ASSERT_BH(bitmap_bh,
 				bh2jh(bitmap_bh)->b_committed_data != NULL);
-		ext3_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i,
+		ext4_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i,
 				bh2jh(bitmap_bh)->b_committed_data);
 
 		/*
@@ -551,10 +551,10 @@ do_more:
 		 * the allocator uses.
 		 */
 		BUFFER_TRACE(bitmap_bh, "clear bit");
-		if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
+		if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
 						bit + i, bitmap_bh->b_data)) {
 			jbd_unlock_bh_state(bitmap_bh);
-			ext3_error(sb, __FUNCTION__,
+			ext4_error(sb, __FUNCTION__,
 				"bit already cleared for block "E3FSBLK,
 				 block + i);
 			jbd_lock_bh_state(bitmap_bh);
@@ -574,11 +574,11 @@ do_more:
 
 	/* We dirtied the bitmap block */
 	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
-	err = ext3_journal_dirty_metadata(handle, bitmap_bh);
+	err = ext4_journal_dirty_metadata(handle, bitmap_bh);
 
 	/* And the group descriptor block */
 	BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
-	ret = ext3_journal_dirty_metadata(handle, gd_bh);
+	ret = ext4_journal_dirty_metadata(handle, gd_bh);
 	if (!err) err = ret;
 	*pdquot_freed_blocks += group_freed;
 
@@ -590,40 +590,40 @@ do_more:
 	sb->s_dirt = 1;
 error_return:
 	brelse(bitmap_bh);
-	ext3_std_error(sb, err);
+	ext4_std_error(sb, err);
 	return;
 }
 
 /**
- * ext3_free_blocks() -- Free given blocks and update quota
+ * ext4_free_blocks() -- Free given blocks and update quota
  * @handle:		handle for this transaction
  * @inode:		inode
  * @block:		start physical block to free
  * @count:		number of blocks to count
  */
-void ext3_free_blocks(handle_t *handle, struct inode *inode,
-			ext3_fsblk_t block, unsigned long count)
+void ext4_free_blocks(handle_t *handle, struct inode *inode,
+			ext4_fsblk_t block, unsigned long count)
 {
 	struct super_block * sb;
 	unsigned long dquot_freed_blocks;
 
 	sb = inode->i_sb;
 	if (!sb) {
-		printk ("ext3_free_blocks: nonexistent device");
+		printk ("ext4_free_blocks: nonexistent device");
 		return;
 	}
-	ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
+	ext4_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
 	if (dquot_freed_blocks)
 		DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
 	return;
 }
 
 /**
- * ext3_test_allocatable()
+ * ext4_test_allocatable()
  * @nr:			given allocation block group
  * @bh:			bufferhead contains the bitmap of the given block group
  *
- * For ext3 allocations, we must not reuse any blocks which are
+ * For ext4 allocations, we must not reuse any blocks which are
  * allocated in the bitmap buffer's "last committed data" copy.  This
  * prevents deletes from freeing up the page for reuse until we have
  * committed the delete transaction.
@@ -638,19 +638,19 @@ void ext3_free_blocks(handle_t *handle, struct inode *inode,
  * data-writes at some point, and disable it for metadata allocations or
  * sync-data inodes.
  */
-static int ext3_test_allocatable(ext3_grpblk_t nr, struct buffer_head *bh)
+static int ext4_test_allocatable(ext4_grpblk_t nr, struct buffer_head *bh)
 {
 	int ret;
 	struct journal_head *jh = bh2jh(bh);
 
-	if (ext3_test_bit(nr, bh->b_data))
+	if (ext4_test_bit(nr, bh->b_data))
 		return 0;
 
 	jbd_lock_bh_state(bh);
 	if (!jh->b_committed_data)
 		ret = 1;
 	else
-		ret = !ext3_test_bit(nr, jh->b_committed_data);
+		ret = !ext4_test_bit(nr, jh->b_committed_data);
 	jbd_unlock_bh_state(bh);
 	return ret;
 }
@@ -665,22 +665,22 @@ static int ext3_test_allocatable(ext3_grpblk_t nr, struct buffer_head *bh)
  * bitmap on disk and the last-committed copy in journal, until we find a
  * bit free in both bitmaps.
  */
-static ext3_grpblk_t
-bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
-					ext3_grpblk_t maxblocks)
+static ext4_grpblk_t
+bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh,
+					ext4_grpblk_t maxblocks)
 {
-	ext3_grpblk_t next;
+	ext4_grpblk_t next;
 	struct journal_head *jh = bh2jh(bh);
 
 	while (start < maxblocks) {
-		next = ext3_find_next_zero_bit(bh->b_data, maxblocks, start);
+		next = ext4_find_next_zero_bit(bh->b_data, maxblocks, start);
 		if (next >= maxblocks)
 			return -1;
-		if (ext3_test_allocatable(next, bh))
+		if (ext4_test_allocatable(next, bh))
 			return next;
 		jbd_lock_bh_state(bh);
 		if (jh->b_committed_data)
-			start = ext3_find_next_zero_bit(jh->b_committed_data,
+			start = ext4_find_next_zero_bit(jh->b_committed_data,
 							maxblocks, next);
 		jbd_unlock_bh_state(bh);
 	}
@@ -700,11 +700,11 @@ bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
  * the initial goal; then for a free byte somewhere in the bitmap; then
  * for any free bit in the bitmap.
  */
-static ext3_grpblk_t
-find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
-			ext3_grpblk_t maxblocks)
+static ext4_grpblk_t
+find_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh,
+			ext4_grpblk_t maxblocks)
 {
-	ext3_grpblk_t here, next;
+	ext4_grpblk_t here, next;
 	char *p, *r;
 
 	if (start > 0) {
@@ -713,16 +713,16 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
 		 * block within the next XX blocks.
 		 *
 		 * end_goal is more or less random, but it has to be
-		 * less than EXT3_BLOCKS_PER_GROUP. Aligning up to the
+		 * less than EXT4_BLOCKS_PER_GROUP. Aligning up to the
 		 * next 64-bit boundary is simple..
 		 */
-		ext3_grpblk_t end_goal = (start + 63) & ~63;
+		ext4_grpblk_t end_goal = (start + 63) & ~63;
 		if (end_goal > maxblocks)
 			end_goal = maxblocks;
-		here = ext3_find_next_zero_bit(bh->b_data, end_goal, start);
-		if (here < end_goal && ext3_test_allocatable(here, bh))
+		here = ext4_find_next_zero_bit(bh->b_data, end_goal, start);
+		if (here < end_goal && ext4_test_allocatable(here, bh))
 			return here;
-		ext3_debug("Bit not found near goal\n");
+		ext4_debug("Bit not found near goal\n");
 	}
 
 	here = start;
@@ -733,7 +733,7 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
 	r = memscan(p, 0, (maxblocks - here + 7) >> 3);
 	next = (r - ((char *)bh->b_data)) << 3;
 
-	if (next < maxblocks && next >= start && ext3_test_allocatable(next, bh))
+	if (next < maxblocks && next >= start && ext4_test_allocatable(next, bh))
 		return next;
 
 	/*
@@ -757,16 +757,16 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
  * zero (failure).
  */
 static inline int
-claim_block(spinlock_t *lock, ext3_grpblk_t block, struct buffer_head *bh)
+claim_block(spinlock_t *lock, ext4_grpblk_t block, struct buffer_head *bh)
 {
 	struct journal_head *jh = bh2jh(bh);
 	int ret;
 
-	if (ext3_set_bit_atomic(lock, block, bh->b_data))
+	if (ext4_set_bit_atomic(lock, block, bh->b_data))
 		return 0;
 	jbd_lock_bh_state(bh);
-	if (jh->b_committed_data && ext3_test_bit(block,jh->b_committed_data)) {
-		ext3_clear_bit_atomic(lock, block, bh->b_data);
+	if (jh->b_committed_data && ext4_test_bit(block,jh->b_committed_data)) {
+		ext4_clear_bit_atomic(lock, block, bh->b_data);
 		ret = 0;
 	} else {
 		ret = 1;
@@ -776,7 +776,7 @@ claim_block(spinlock_t *lock, ext3_grpblk_t block, struct buffer_head *bh)
 }
 
 /**
- * ext3_try_to_allocate()
+ * ext4_try_to_allocate()
  * @sb:			superblock
  * @handle:		handle to this transaction
  * @group:		given allocation block group
@@ -797,29 +797,29 @@ claim_block(spinlock_t *lock, ext3_grpblk_t block, struct buffer_head *bh)
  *
  * If we failed to allocate the desired block then we may end up crossing to a
  * new bitmap.  In that case we must release write access to the old one via
- * ext3_journal_release_buffer(), else we'll run out of credits.
+ * ext4_journal_release_buffer(), else we'll run out of credits.
  */
-static ext3_grpblk_t
-ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
-			struct buffer_head *bitmap_bh, ext3_grpblk_t grp_goal,
-			unsigned long *count, struct ext3_reserve_window *my_rsv)
+static ext4_grpblk_t
+ext4_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
+			struct buffer_head *bitmap_bh, ext4_grpblk_t grp_goal,
+			unsigned long *count, struct ext4_reserve_window *my_rsv)
 {
-	ext3_fsblk_t group_first_block;
-	ext3_grpblk_t start, end;
+	ext4_fsblk_t group_first_block;
+	ext4_grpblk_t start, end;
 	unsigned long num = 0;
 
 	/* we do allocation within the reservation window if we have a window */
 	if (my_rsv) {
-		group_first_block = ext3_group_first_block_no(sb, group);
+		group_first_block = ext4_group_first_block_no(sb, group);
 		if (my_rsv->_rsv_start >= group_first_block)
 			start = my_rsv->_rsv_start - group_first_block;
 		else
 			/* reservation window cross group boundary */
 			start = 0;
 		end = my_rsv->_rsv_end - group_first_block + 1;
-		if (end > EXT3_BLOCKS_PER_GROUP(sb))
+		if (end > EXT4_BLOCKS_PER_GROUP(sb))
 			/* reservation window crosses group boundary */
-			end = EXT3_BLOCKS_PER_GROUP(sb);
+			end = EXT4_BLOCKS_PER_GROUP(sb);
 		if ((start <= grp_goal) && (grp_goal < end))
 			start = grp_goal;
 		else
@@ -829,13 +829,13 @@ ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
 			start = grp_goal;
 		else
 			start = 0;
-		end = EXT3_BLOCKS_PER_GROUP(sb);
+		end = EXT4_BLOCKS_PER_GROUP(sb);
 	}
 
-	BUG_ON(start > EXT3_BLOCKS_PER_GROUP(sb));
+	BUG_ON(start > EXT4_BLOCKS_PER_GROUP(sb));
 
 repeat:
-	if (grp_goal < 0 || !ext3_test_allocatable(grp_goal, bitmap_bh)) {
+	if (grp_goal < 0 || !ext4_test_allocatable(grp_goal, bitmap_bh)) {
 		grp_goal = find_next_usable_block(start, bitmap_bh, end);
 		if (grp_goal < 0)
 			goto fail_access;
@@ -843,7 +843,7 @@ repeat:
 			int i;
 
 			for (i = 0; i < 7 && grp_goal > start &&
-					ext3_test_allocatable(grp_goal - 1,
+					ext4_test_allocatable(grp_goal - 1,
 								bitmap_bh);
 					i++, grp_goal--)
 				;
@@ -851,7 +851,7 @@ repeat:
 	}
 	start = grp_goal;
 
-	if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group),
+	if (!claim_block(sb_bgl_lock(EXT4_SB(sb), group),
 		grp_goal, bitmap_bh)) {
 		/*
 		 * The block was allocated by another thread, or it was
@@ -866,8 +866,8 @@ repeat:
 	num++;
 	grp_goal++;
 	while (num < *count && grp_goal < end
-		&& ext3_test_allocatable(grp_goal, bitmap_bh)
-		&& claim_block(sb_bgl_lock(EXT3_SB(sb), group),
+		&& ext4_test_allocatable(grp_goal, bitmap_bh)
+		&& claim_block(sb_bgl_lock(EXT4_SB(sb), group),
 				grp_goal, bitmap_bh)) {
 		num++;
 		grp_goal++;
@@ -913,15 +913,15 @@ fail_access:
  *
  */
 static int find_next_reservable_window(
-				struct ext3_reserve_window_node *search_head,
-				struct ext3_reserve_window_node *my_rsv,
+				struct ext4_reserve_window_node *search_head,
+				struct ext4_reserve_window_node *my_rsv,
 				struct super_block * sb,
-				ext3_fsblk_t start_block,
-				ext3_fsblk_t last_block)
+				ext4_fsblk_t start_block,
+				ext4_fsblk_t last_block)
 {
 	struct rb_node *next;
-	struct ext3_reserve_window_node *rsv, *prev;
-	ext3_fsblk_t cur;
+	struct ext4_reserve_window_node *rsv, *prev;
+	ext4_fsblk_t cur;
 	int size = my_rsv->rsv_goal_size;
 
 	/* TODO: make the start of the reservation window byte-aligned */
@@ -949,7 +949,7 @@ static int find_next_reservable_window(
 
 		prev = rsv;
 		next = rb_next(&rsv->rsv_node);
-		rsv = list_entry(next,struct ext3_reserve_window_node,rsv_node);
+		rsv = list_entry(next,struct ext4_reserve_window_node,rsv_node);
 
 		/*
 		 * Reached the last reservation, we can just append to the
@@ -992,7 +992,7 @@ static int find_next_reservable_window(
 	my_rsv->rsv_alloc_hit = 0;
 
 	if (prev != my_rsv)
-		ext3_rsv_window_add(sb, my_rsv);
+		ext4_rsv_window_add(sb, my_rsv);
 
 	return 0;
 }
@@ -1034,20 +1034,20 @@ static int find_next_reservable_window(
  *	@bitmap_bh: the block group block bitmap
  *
  */
-static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
-		ext3_grpblk_t grp_goal, struct super_block *sb,
+static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
+		ext4_grpblk_t grp_goal, struct super_block *sb,
 		unsigned int group, struct buffer_head *bitmap_bh)
 {
-	struct ext3_reserve_window_node *search_head;
-	ext3_fsblk_t group_first_block, group_end_block, start_block;
-	ext3_grpblk_t first_free_block;
-	struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root;
+	struct ext4_reserve_window_node *search_head;
+	ext4_fsblk_t group_first_block, group_end_block, start_block;
+	ext4_grpblk_t first_free_block;
+	struct rb_root *fs_rsv_root = &EXT4_SB(sb)->s_rsv_window_root;
 	unsigned long size;
 	int ret;
-	spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
+	spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock;
 
-	group_first_block = ext3_group_first_block_no(sb, group);
-	group_end_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
+	group_first_block = ext4_group_first_block_no(sb, group);
+	group_end_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1);
 
 	if (grp_goal < 0)
 		start_block = group_first_block;
@@ -1085,8 +1085,8 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
 			 * otherwise we keep the same size window
 			 */
 			size = size * 2;
-			if (size > EXT3_MAX_RESERVE_BLOCKS)
-				size = EXT3_MAX_RESERVE_BLOCKS;
+			if (size > EXT4_MAX_RESERVE_BLOCKS)
+				size = EXT4_MAX_RESERVE_BLOCKS;
 			my_rsv->rsv_goal_size= size;
 		}
 	}
@@ -1170,20 +1170,20 @@ retry:
  * Attempt to expand the reservation window large enough to have
  * required number of free blocks
  *
- * Since ext3_try_to_allocate() will always allocate blocks within
+ * Since ext4_try_to_allocate() will always allocate blocks within
  * the reservation window range, if the window size is too small,
  * multiple blocks allocation has to stop at the end of the reservation
  * window. To make this more efficient, given the total number of
  * blocks needed and the current size of the window, we try to
  * expand the reservation window size if necessary on a best-effort
- * basis before ext3_new_blocks() tries to allocate blocks,
+ * basis before ext4_new_blocks() tries to allocate blocks,
  */
-static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
+static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
 			struct super_block *sb, int size)
 {
-	struct ext3_reserve_window_node *next_rsv;
+	struct ext4_reserve_window_node *next_rsv;
 	struct rb_node *next;
-	spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
+	spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock;
 
 	if (!spin_trylock(rsv_lock))
 		return;
@@ -1193,7 +1193,7 @@ static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
 	if (!next)
 		my_rsv->rsv_end += size;
 	else {
-		next_rsv = list_entry(next, struct ext3_reserve_window_node, rsv_node);
+		next_rsv = list_entry(next, struct ext4_reserve_window_node, rsv_node);
 
 		if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size)
 			my_rsv->rsv_end += size;
@@ -1204,7 +1204,7 @@ static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
 }
 
 /**
- * ext3_try_to_allocate_with_rsv()
+ * ext4_try_to_allocate_with_rsv()
  * @sb:			superblock
  * @handle:		handle to this transaction
  * @group:		given allocation block group
@@ -1232,15 +1232,15 @@ static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
  * We use a red-black tree for the per-filesystem reservation list.
  *
  */
-static ext3_grpblk_t
-ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
+static ext4_grpblk_t
+ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 			unsigned int group, struct buffer_head *bitmap_bh,
-			ext3_grpblk_t grp_goal,
-			struct ext3_reserve_window_node * my_rsv,
+			ext4_grpblk_t grp_goal,
+			struct ext4_reserve_window_node * my_rsv,
 			unsigned long *count, int *errp)
 {
-	ext3_fsblk_t group_first_block, group_last_block;
-	ext3_grpblk_t ret = 0;
+	ext4_fsblk_t group_first_block, group_last_block;
+	ext4_grpblk_t ret = 0;
 	int fatal;
 	unsigned long num = *count;
 
@@ -1252,7 +1252,7 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 	 * if the buffer is in BJ_Forget state in the committing transaction.
 	 */
 	BUFFER_TRACE(bitmap_bh, "get undo access for new block");
-	fatal = ext3_journal_get_undo_access(handle, bitmap_bh);
+	fatal = ext4_journal_get_undo_access(handle, bitmap_bh);
 	if (fatal) {
 		*errp = fatal;
 		return -1;
@@ -1265,18 +1265,18 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 	 * or last attempt to allocate a block with reservation turned on failed
 	 */
 	if (my_rsv == NULL ) {
-		ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
+		ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh,
 						grp_goal, count, NULL);
 		goto out;
 	}
 	/*
 	 * grp_goal is a group relative block number (if there is a goal)
-	 * 0 < grp_goal < EXT3_BLOCKS_PER_GROUP(sb)
+	 * 0 < grp_goal < EXT4_BLOCKS_PER_GROUP(sb)
 	 * first block is a filesystem wide block number
 	 * first block is the block number of the first block in this group
 	 */
-	group_first_block = ext3_group_first_block_no(sb, group);
-	group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
+	group_first_block = ext4_group_first_block_no(sb, group);
+	group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1);
 
 	/*
 	 * Basically we will allocate a new block from inode's reservation
@@ -1314,10 +1314,10 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 
 		if ((my_rsv->rsv_start > group_last_block) ||
 				(my_rsv->rsv_end < group_first_block)) {
-			rsv_window_dump(&EXT3_SB(sb)->s_rsv_window_root, 1);
+			rsv_window_dump(&EXT4_SB(sb)->s_rsv_window_root, 1);
 			BUG();
 		}
-		ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
+		ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh,
 					   grp_goal, &num, &my_rsv->rsv_window);
 		if (ret >= 0) {
 			my_rsv->rsv_alloc_hit += num;
@@ -1330,7 +1330,7 @@ out:
 	if (ret >= 0) {
 		BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for "
 					"bitmap block");
-		fatal = ext3_journal_dirty_metadata(handle, bitmap_bh);
+		fatal = ext4_journal_dirty_metadata(handle, bitmap_bh);
 		if (fatal) {
 			*errp = fatal;
 			return -1;
@@ -1339,19 +1339,19 @@ out:
 	}
 
 	BUFFER_TRACE(bitmap_bh, "journal_release_buffer");
-	ext3_journal_release_buffer(handle, bitmap_bh);
+	ext4_journal_release_buffer(handle, bitmap_bh);
 	return ret;
 }
 
 /**
- * ext3_has_free_blocks()
+ * ext4_has_free_blocks()
  * @sbi:		in-core super block structure.
  *
  * Check if filesystem has at least 1 free block available for allocation.
  */
-static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
+static int ext4_has_free_blocks(struct ext4_sb_info *sbi)
 {
-	ext3_fsblk_t free_blocks, root_blocks;
+	ext4_fsblk_t free_blocks, root_blocks;
 
 	free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
 	root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
@@ -1364,63 +1364,63 @@ static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
 }
 
 /**
- * ext3_should_retry_alloc()
+ * ext4_should_retry_alloc()
  * @sb:			super block
  * @retries		number of attemps has been made
  *
- * ext3_should_retry_alloc() is called when ENOSPC is returned, and if
+ * ext4_should_retry_alloc() is called when ENOSPC is returned, and if
  * it is profitable to retry the operation, this function will wait
  * for the current or commiting transaction to complete, and then
  * return TRUE.
  *
  * if the total number of retries exceed three times, return FALSE.
  */
-int ext3_should_retry_alloc(struct super_block *sb, int *retries)
+int ext4_should_retry_alloc(struct super_block *sb, int *retries)
 {
-	if (!ext3_has_free_blocks(EXT3_SB(sb)) || (*retries)++ > 3)
+	if (!ext4_has_free_blocks(EXT4_SB(sb)) || (*retries)++ > 3)
 		return 0;
 
 	jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
 
-	return journal_force_commit_nested(EXT3_SB(sb)->s_journal);
+	return journal_force_commit_nested(EXT4_SB(sb)->s_journal);
 }
 
 /**
- * ext3_new_blocks() -- core block(s) allocation function
+ * ext4_new_blocks() -- core block(s) allocation function
  * @handle:		handle to this transaction
  * @inode:		file inode
  * @goal:		given target block(filesystem wide)
  * @count:		target number of blocks to allocate
  * @errp:		error code
  *
- * ext3_new_blocks uses a goal block to assist allocation.  It tries to
+ * ext4_new_blocks uses a goal block to assist allocation.  It tries to
  * allocate block(s) from the block group contains the goal block first. If that
  * fails, it will try to allocate block(s) from other block groups without
  * any specific goal block.
  *
  */
-ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
-			ext3_fsblk_t goal, unsigned long *count, int *errp)
+ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
+			ext4_fsblk_t goal, unsigned long *count, int *errp)
 {
 	struct buffer_head *bitmap_bh = NULL;
 	struct buffer_head *gdp_bh;
 	int group_no;
 	int goal_group;
-	ext3_grpblk_t grp_target_blk;	/* blockgroup relative goal block */
-	ext3_grpblk_t grp_alloc_blk;	/* blockgroup-relative allocated block*/
-	ext3_fsblk_t ret_block;		/* filesyetem-wide allocated block */
+	ext4_grpblk_t grp_target_blk;	/* blockgroup relative goal block */
+	ext4_grpblk_t grp_alloc_blk;	/* blockgroup-relative allocated block*/
+	ext4_fsblk_t ret_block;		/* filesyetem-wide allocated block */
 	int bgi;			/* blockgroup iteration index */
 	int fatal = 0, err;
 	int performed_allocation = 0;
-	ext3_grpblk_t free_blocks;	/* number of free blocks in a group */
+	ext4_grpblk_t free_blocks;	/* number of free blocks in a group */
 	struct super_block *sb;
-	struct ext3_group_desc *gdp;
-	struct ext3_super_block *es;
-	struct ext3_sb_info *sbi;
-	struct ext3_reserve_window_node *my_rsv = NULL;
-	struct ext3_block_alloc_info *block_i;
+	struct ext4_group_desc *gdp;
+	struct ext4_super_block *es;
+	struct ext4_sb_info *sbi;
+	struct ext4_reserve_window_node *my_rsv = NULL;
+	struct ext4_block_alloc_info *block_i;
 	unsigned short windowsz = 0;
-#ifdef EXT3FS_DEBUG
+#ifdef EXT4FS_DEBUG
 	static int goal_hits, goal_attempts;
 #endif
 	unsigned long ngroups;
@@ -1429,7 +1429,7 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
 	*errp = -ENOSPC;
 	sb = inode->i_sb;
 	if (!sb) {
-		printk("ext3_new_block: nonexistent device");
+		printk("ext4_new_block: nonexistent device");
 		return 0;
 	}
 
@@ -1441,22 +1441,22 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
 		return 0;
 	}
 
-	sbi = EXT3_SB(sb);
-	es = EXT3_SB(sb)->s_es;
-	ext3_debug("goal=%lu.\n", goal);
+	sbi = EXT4_SB(sb);
+	es = EXT4_SB(sb)->s_es;
+	ext4_debug("goal=%lu.\n", goal);
 	/*
 	 * Allocate a block from reservation only when
 	 * filesystem is mounted with reservation(default,-o reservation), and
 	 * it's a regular file, and
 	 * the desired window size is greater than 0 (One could use ioctl
-	 * command EXT3_IOC_SETRSVSZ to set the window size to 0 to turn off
+	 * command EXT4_IOC_SETRSVSZ to set the window size to 0 to turn off
 	 * reservation on that particular file)
 	 */
-	block_i = EXT3_I(inode)->i_block_alloc_info;
+	block_i = EXT4_I(inode)->i_block_alloc_info;
 	if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
 		my_rsv = &block_i->rsv_window_node;
 
-	if (!ext3_has_free_blocks(sbi)) {
+	if (!ext4_has_free_blocks(sbi)) {
 		*errp = -ENOSPC;
 		goto out;
 	}
@@ -1468,10 +1468,10 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
 	    goal >= le32_to_cpu(es->s_blocks_count))
 		goal = le32_to_cpu(es->s_first_data_block);
 	group_no = (goal - le32_to_cpu(es->s_first_data_block)) /
-			EXT3_BLOCKS_PER_GROUP(sb);
+			EXT4_BLOCKS_PER_GROUP(sb);
 	goal_group = group_no;
 retry_alloc:
-	gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
+	gdp = ext4_get_group_desc(sb, group_no, &gdp_bh);
 	if (!gdp)
 		goto io_error;
 
@@ -1486,11 +1486,11 @@ retry_alloc:
 
 	if (free_blocks > 0) {
 		grp_target_blk = ((goal - le32_to_cpu(es->s_first_data_block)) %
-				EXT3_BLOCKS_PER_GROUP(sb));
+				EXT4_BLOCKS_PER_GROUP(sb));
 		bitmap_bh = read_block_bitmap(sb, group_no);
 		if (!bitmap_bh)
 			goto io_error;
-		grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle,
+		grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle,
 					group_no, bitmap_bh, grp_target_blk,
 					my_rsv,	&num, &fatal);
 		if (fatal)
@@ -1499,7 +1499,7 @@ retry_alloc:
 			goto allocated;
 	}
 
-	ngroups = EXT3_SB(sb)->s_groups_count;
+	ngroups = EXT4_SB(sb)->s_groups_count;
 	smp_rmb();
 
 	/*
@@ -1510,7 +1510,7 @@ retry_alloc:
 		group_no++;
 		if (group_no >= ngroups)
 			group_no = 0;
-		gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
+		gdp = ext4_get_group_desc(sb, group_no, &gdp_bh);
 		if (!gdp) {
 			*errp = -EIO;
 			goto out;
@@ -1531,7 +1531,7 @@ retry_alloc:
 		/*
 		 * try to allocate block(s) from this group, without a goal(-1).
 		 */
-		grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle,
+		grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle,
 					group_no, bitmap_bh, -1, my_rsv,
 					&num, &fatal);
 		if (fatal)
@@ -1557,23 +1557,23 @@ retry_alloc:
 
 allocated:
 
-	ext3_debug("using block group %d(%d)\n",
+	ext4_debug("using block group %d(%d)\n",
 			group_no, gdp->bg_free_blocks_count);
 
 	BUFFER_TRACE(gdp_bh, "get_write_access");
-	fatal = ext3_journal_get_write_access(handle, gdp_bh);
+	fatal = ext4_journal_get_write_access(handle, gdp_bh);
 	if (fatal)
 		goto out;
 
-	ret_block = grp_alloc_blk + ext3_group_first_block_no(sb, group_no);
+	ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no);
 
 	if (in_range(le32_to_cpu(gdp->bg_block_bitmap), ret_block, num) ||
 	    in_range(le32_to_cpu(gdp->bg_inode_bitmap), ret_block, num) ||
 	    in_range(ret_block, le32_to_cpu(gdp->bg_inode_table),
-		      EXT3_SB(sb)->s_itb_per_group) ||
+		      EXT4_SB(sb)->s_itb_per_group) ||
 	    in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table),
-		      EXT3_SB(sb)->s_itb_per_group))
-		ext3_error(sb, "ext3_new_block",
+		      EXT4_SB(sb)->s_itb_per_group))
+		ext4_error(sb, "ext4_new_block",
 			    "Allocating block in system zone - "
 			    "blocks from "E3FSBLK", length %lu",
 			     ret_block, num);
@@ -1598,20 +1598,20 @@ allocated:
 		int i;
 
 		for (i = 0; i < num; i++) {
-			if (ext3_test_bit(grp_alloc_blk+i,
+			if (ext4_test_bit(grp_alloc_blk+i,
 					bh2jh(bitmap_bh)->b_committed_data)) {
 				printk("%s: block was unexpectedly set in "
 					"b_committed_data\n", __FUNCTION__);
 			}
 		}
 	}
-	ext3_debug("found bit %d\n", grp_alloc_blk);
+	ext4_debug("found bit %d\n", grp_alloc_blk);
 	spin_unlock(sb_bgl_lock(sbi, group_no));
 	jbd_unlock_bh_state(bitmap_bh);
 #endif
 
 	if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) {
-		ext3_error(sb, "ext3_new_block",
+		ext4_error(sb, "ext4_new_block",
 			    "block("E3FSBLK") >= blocks count(%d) - "
 			    "block_group = %d, es == %p ", ret_block,
 			le32_to_cpu(es->s_blocks_count), group_no, es);
@@ -1623,7 +1623,7 @@ allocated:
 	 * list of some description.  We don't know in advance whether
 	 * the caller wants to use it as metadata or data.
 	 */
-	ext3_debug("allocating block %lu. Goal hits %d of %d.\n",
+	ext4_debug("allocating block %lu. Goal hits %d of %d.\n",
 			ret_block, goal_hits, goal_attempts);
 
 	spin_lock(sb_bgl_lock(sbi, group_no));
@@ -1633,7 +1633,7 @@ allocated:
 	percpu_counter_mod(&sbi->s_freeblocks_counter, -num);
 
 	BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
-	err = ext3_journal_dirty_metadata(handle, gdp_bh);
+	err = ext4_journal_dirty_metadata(handle, gdp_bh);
 	if (!fatal)
 		fatal = err;
 
@@ -1652,7 +1652,7 @@ io_error:
 out:
 	if (fatal) {
 		*errp = fatal;
-		ext3_std_error(sb, fatal);
+		ext4_std_error(sb, fatal);
 	}
 	/*
 	 * Undo the block allocation
@@ -1663,40 +1663,40 @@ out:
 	return 0;
 }
 
-ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode,
-			ext3_fsblk_t goal, int *errp)
+ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode,
+			ext4_fsblk_t goal, int *errp)
 {
 	unsigned long count = 1;
 
-	return ext3_new_blocks(handle, inode, goal, &count, errp);
+	return ext4_new_blocks(handle, inode, goal, &count, errp);
 }
 
 /**
- * ext3_count_free_blocks() -- count filesystem free blocks
+ * ext4_count_free_blocks() -- count filesystem free blocks
  * @sb:		superblock
  *
  * Adds up the number of free blocks from each block group.
  */
-ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb)
+ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
 {
-	ext3_fsblk_t desc_count;
-	struct ext3_group_desc *gdp;
+	ext4_fsblk_t desc_count;
+	struct ext4_group_desc *gdp;
 	int i;
-	unsigned long ngroups = EXT3_SB(sb)->s_groups_count;
-#ifdef EXT3FS_DEBUG
-	struct ext3_super_block *es;
-	ext3_fsblk_t bitmap_count;
+	unsigned long ngroups = EXT4_SB(sb)->s_groups_count;
+#ifdef EXT4FS_DEBUG
+	struct ext4_super_block *es;
+	ext4_fsblk_t bitmap_count;
 	unsigned long x;
 	struct buffer_head *bitmap_bh = NULL;
 
-	es = EXT3_SB(sb)->s_es;
+	es = EXT4_SB(sb)->s_es;
 	desc_count = 0;
 	bitmap_count = 0;
 	gdp = NULL;
 
 	smp_rmb();
 	for (i = 0; i < ngroups; i++) {
-		gdp = ext3_get_group_desc(sb, i, NULL);
+		gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
 			continue;
 		desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
@@ -1705,13 +1705,13 @@ ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb)
 		if (bitmap_bh == NULL)
 			continue;
 
-		x = ext3_count_free(bitmap_bh, sb->s_blocksize);
+		x = ext4_count_free(bitmap_bh, sb->s_blocksize);
 		printk("group %d: stored = %d, counted = %lu\n",
 			i, le16_to_cpu(gdp->bg_free_blocks_count), x);
 		bitmap_count += x;
 	}
 	brelse(bitmap_bh);
-	printk("ext3_count_free_blocks: stored = "E3FSBLK
+	printk("ext4_count_free_blocks: stored = "E3FSBLK
 		", computed = "E3FSBLK", "E3FSBLK"\n",
 	       le32_to_cpu(es->s_free_blocks_count),
 		desc_count, bitmap_count);
@@ -1720,7 +1720,7 @@ ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb)
 	desc_count = 0;
 	smp_rmb();
 	for (i = 0; i < ngroups; i++) {
-		gdp = ext3_get_group_desc(sb, i, NULL);
+		gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
 			continue;
 		desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
@@ -1731,11 +1731,11 @@ ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb)
 }
 
 static inline int
-block_in_use(ext3_fsblk_t block, struct super_block *sb, unsigned char *map)
+block_in_use(ext4_fsblk_t block, struct super_block *sb, unsigned char *map)
 {
-	return ext3_test_bit ((block -
-		le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) %
-			 EXT3_BLOCKS_PER_GROUP(sb), map);
+	return ext4_test_bit ((block -
+		le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) %
+			 EXT4_BLOCKS_PER_GROUP(sb), map);
 }
 
 static inline int test_root(int a, int b)
@@ -1747,7 +1747,7 @@ static inline int test_root(int a, int b)
 	return num == a;
 }
 
-static int ext3_group_sparse(int group)
+static int ext4_group_sparse(int group)
 {
 	if (group <= 1)
 		return 1;
@@ -1758,44 +1758,44 @@ static int ext3_group_sparse(int group)
 }
 
 /**
- *	ext3_bg_has_super - number of blocks used by the superblock in group
+ *	ext4_bg_has_super - number of blocks used by the superblock in group
  *	@sb: superblock for filesystem
  *	@group: group number to check
  *
  *	Return the number of blocks used by the superblock (primary or backup)
  *	in this group.  Currently this will be only 0 or 1.
  */
-int ext3_bg_has_super(struct super_block *sb, int group)
+int ext4_bg_has_super(struct super_block *sb, int group)
 {
-	if (EXT3_HAS_RO_COMPAT_FEATURE(sb,
-				EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
-			!ext3_group_sparse(group))
+	if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+				EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
+			!ext4_group_sparse(group))
 		return 0;
 	return 1;
 }
 
-static unsigned long ext3_bg_num_gdb_meta(struct super_block *sb, int group)
+static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, int group)
 {
-	unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb);
-	unsigned long first = metagroup * EXT3_DESC_PER_BLOCK(sb);
-	unsigned long last = first + EXT3_DESC_PER_BLOCK(sb) - 1;
+	unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb);
+	unsigned long first = metagroup * EXT4_DESC_PER_BLOCK(sb);
+	unsigned long last = first + EXT4_DESC_PER_BLOCK(sb) - 1;
 
 	if (group == first || group == first + 1 || group == last)
 		return 1;
 	return 0;
 }
 
-static unsigned long ext3_bg_num_gdb_nometa(struct super_block *sb, int group)
+static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, int group)
 {
-	if (EXT3_HAS_RO_COMPAT_FEATURE(sb,
-				EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
-			!ext3_group_sparse(group))
+	if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+				EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
+			!ext4_group_sparse(group))
 		return 0;
-	return EXT3_SB(sb)->s_gdb_count;
+	return EXT4_SB(sb)->s_gdb_count;
 }
 
 /**
- *	ext3_bg_num_gdb - number of blocks used by the group table in group
+ *	ext4_bg_num_gdb - number of blocks used by the group table in group
  *	@sb: superblock for filesystem
  *	@group: group number to check
  *
@@ -1803,16 +1803,16 @@ static unsigned long ext3_bg_num_gdb_nometa(struct super_block *sb, int group)
  *	(primary or backup) in this group.  In the future there may be a
  *	different number of descriptor blocks in each group.
  */
-unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
+unsigned long ext4_bg_num_gdb(struct super_block *sb, int group)
 {
 	unsigned long first_meta_bg =
-			le32_to_cpu(EXT3_SB(sb)->s_es->s_first_meta_bg);
-	unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb);
+			le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
+	unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb);
 
-	if (!EXT3_HAS_INCOMPAT_FEATURE(sb,EXT3_FEATURE_INCOMPAT_META_BG) ||
+	if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) ||
 			metagroup < first_meta_bg)
-		return ext3_bg_num_gdb_nometa(sb,group);
+		return ext4_bg_num_gdb_nometa(sb,group);
 
-	return ext3_bg_num_gdb_meta(sb,group);
+	return ext4_bg_num_gdb_meta(sb,group);
 
 }
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index b9176eed98d1..f4b35706f39c 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/ext3/bitmap.c
+ *  linux/fs/ext4/bitmap.c
  *
  * Copyright (C) 1992, 1993, 1994, 1995
  * Remy Card (card@masi.ibp.fr)
@@ -9,13 +9,13 @@
 
 #include <linux/buffer_head.h>
 #include <linux/jbd.h>
-#include <linux/ext3_fs.h>
+#include <linux/ext4_fs.h>
 
-#ifdef EXT3FS_DEBUG
+#ifdef EXT4FS_DEBUG
 
 static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
 
-unsigned long ext3_count_free (struct buffer_head * map, unsigned int numchars)
+unsigned long ext4_count_free (struct buffer_head * map, unsigned int numchars)
 {
 	unsigned int i;
 	unsigned long sum = 0;
@@ -28,5 +28,5 @@ unsigned long ext3_count_free (struct buffer_head * map, unsigned int numchars)
 	return (sum);
 }
 
-#endif  /*  EXT3FS_DEBUG  */
+#endif  /*  EXT4FS_DEBUG  */
 
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index d0b54f30b914..ec114d7886cc 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/ext3/dir.c
+ *  linux/fs/ext4/dir.c
  *
  * Copyright (C) 1992, 1993, 1994, 1995
  * Remy Card (card@masi.ibp.fr)
@@ -12,7 +12,7 @@
  *
  *  Copyright (C) 1991, 1992  Linus Torvalds
  *
- *  ext3 directory handling functions
+ *  ext4 directory handling functions
  *
  *  Big-endian to little-endian byte-swapping/bitmaps by
  *        David S. Miller (davem@caip.rutgers.edu), 1995
@@ -23,69 +23,69 @@
 
 #include <linux/fs.h>
 #include <linux/jbd.h>
-#include <linux/ext3_fs.h>
+#include <linux/ext4_fs.h>
 #include <linux/buffer_head.h>
 #include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <linux/rbtree.h>
 
-static unsigned char ext3_filetype_table[] = {
+static unsigned char ext4_filetype_table[] = {
 	DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
 };
 
-static int ext3_readdir(struct file *, void *, filldir_t);
-static int ext3_dx_readdir(struct file * filp,
+static int ext4_readdir(struct file *, void *, filldir_t);
+static int ext4_dx_readdir(struct file * filp,
 			   void * dirent, filldir_t filldir);
-static int ext3_release_dir (struct inode * inode,
+static int ext4_release_dir (struct inode * inode,
 				struct file * filp);
 
-const struct file_operations ext3_dir_operations = {
+const struct file_operations ext4_dir_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
-	.readdir	= ext3_readdir,		/* we take BKL. needed?*/
-	.ioctl		= ext3_ioctl,		/* BKL held */
+	.readdir	= ext4_readdir,		/* we take BKL. needed?*/
+	.ioctl		= ext4_ioctl,		/* BKL held */
 #ifdef CONFIG_COMPAT
-	.compat_ioctl	= ext3_compat_ioctl,
+	.compat_ioctl	= ext4_compat_ioctl,
 #endif
-	.fsync		= ext3_sync_file,	/* BKL held */
-#ifdef CONFIG_EXT3_INDEX
-	.release	= ext3_release_dir,
+	.fsync		= ext4_sync_file,	/* BKL held */
+#ifdef CONFIG_EXT4_INDEX
+	.release	= ext4_release_dir,
 #endif
 };
 
 
 static unsigned char get_dtype(struct super_block *sb, int filetype)
 {
-	if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) ||
-	    (filetype >= EXT3_FT_MAX))
+	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE) ||
+	    (filetype >= EXT4_FT_MAX))
 		return DT_UNKNOWN;
 
-	return (ext3_filetype_table[filetype]);
+	return (ext4_filetype_table[filetype]);
 }
 
 
-int ext3_check_dir_entry (const char * function, struct inode * dir,
-			  struct ext3_dir_entry_2 * de,
+int ext4_check_dir_entry (const char * function, struct inode * dir,
+			  struct ext4_dir_entry_2 * de,
 			  struct buffer_head * bh,
 			  unsigned long offset)
 {
 	const char * error_msg = NULL;
 	const int rlen = le16_to_cpu(de->rec_len);
 
-	if (rlen < EXT3_DIR_REC_LEN(1))
+	if (rlen < EXT4_DIR_REC_LEN(1))
 		error_msg = "rec_len is smaller than minimal";
 	else if (rlen % 4 != 0)
 		error_msg = "rec_len % 4 != 0";
-	else if (rlen < EXT3_DIR_REC_LEN(de->name_len))
+	else if (rlen < EXT4_DIR_REC_LEN(de->name_len))
 		error_msg = "rec_len is too small for name_len";
 	else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
 		error_msg = "directory entry across blocks";
 	else if (le32_to_cpu(de->inode) >
-			le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count))
+			le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count))
 		error_msg = "inode out of bounds";
 
 	if (error_msg != NULL)
-		ext3_error (dir->i_sb, function,
+		ext4_error (dir->i_sb, function,
 			"bad entry in directory #%lu: %s - "
 			"offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
 			dir->i_ino, error_msg, offset,
@@ -94,13 +94,13 @@ int ext3_check_dir_entry (const char * function, struct inode * dir,
 	return error_msg == NULL ? 1 : 0;
 }
 
-static int ext3_readdir(struct file * filp,
+static int ext4_readdir(struct file * filp,
 			 void * dirent, filldir_t filldir)
 {
 	int error = 0;
 	unsigned long offset;
 	int i, stored;
-	struct ext3_dir_entry_2 *de;
+	struct ext4_dir_entry_2 *de;
 	struct super_block *sb;
 	int err;
 	struct inode *inode = filp->f_dentry->d_inode;
@@ -108,12 +108,12 @@ static int ext3_readdir(struct file * filp,
 
 	sb = inode->i_sb;
 
-#ifdef CONFIG_EXT3_INDEX
-	if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
-				    EXT3_FEATURE_COMPAT_DIR_INDEX) &&
-	    ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) ||
+#ifdef CONFIG_EXT4_INDEX
+	if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
+				    EXT4_FEATURE_COMPAT_DIR_INDEX) &&
+	    ((EXT4_I(inode)->i_flags & EXT4_INDEX_FL) ||
 	     ((inode->i_size >> sb->s_blocksize_bits) == 1))) {
-		err = ext3_dx_readdir(filp, dirent, filldir);
+		err = ext4_dx_readdir(filp, dirent, filldir);
 		if (err != ERR_BAD_DX_DIR) {
 			ret = err;
 			goto out;
@@ -122,19 +122,19 @@ static int ext3_readdir(struct file * filp,
 		 * We don't set the inode dirty flag since it's not
 		 * critical that it get flushed back to the disk.
 		 */
-		EXT3_I(filp->f_dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL;
+		EXT4_I(filp->f_dentry->d_inode)->i_flags &= ~EXT4_INDEX_FL;
 	}
 #endif
 	stored = 0;
 	offset = filp->f_pos & (sb->s_blocksize - 1);
 
 	while (!error && !stored && filp->f_pos < inode->i_size) {
-		unsigned long blk = filp->f_pos >> EXT3_BLOCK_SIZE_BITS(sb);
+		unsigned long blk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb);
 		struct buffer_head map_bh;
 		struct buffer_head *bh = NULL;
 
 		map_bh.b_state = 0;
-		err = ext3_get_blocks_handle(NULL, inode, blk, 1,
+		err = ext4_get_blocks_handle(NULL, inode, blk, 1,
 						&map_bh, 0, 0);
 		if (err > 0) {
 			page_cache_readahead(sb->s_bdev->bd_inode->i_mapping,
@@ -143,7 +143,7 @@ static int ext3_readdir(struct file * filp,
 				map_bh.b_blocknr >>
 					(PAGE_CACHE_SHIFT - inode->i_blkbits),
 				1);
-			bh = ext3_bread(NULL, inode, blk, 0, &err);
+			bh = ext4_bread(NULL, inode, blk, 0, &err);
 		}
 
 		/*
@@ -151,7 +151,7 @@ static int ext3_readdir(struct file * filp,
 		 * of recovering data when there's a bad sector
 		 */
 		if (!bh) {
-			ext3_error (sb, "ext3_readdir",
+			ext4_error (sb, "ext4_readdir",
 				"directory #%lu contains a hole at offset %lu",
 				inode->i_ino, (unsigned long)filp->f_pos);
 			filp->f_pos += sb->s_blocksize - offset;
@@ -165,7 +165,7 @@ revalidate:
 		 * to make sure. */
 		if (filp->f_version != inode->i_version) {
 			for (i = 0; i < sb->s_blocksize && i < offset; ) {
-				de = (struct ext3_dir_entry_2 *)
+				de = (struct ext4_dir_entry_2 *)
 					(bh->b_data + i);
 				/* It's too expensive to do a full
 				 * dirent test each time round this
@@ -174,7 +174,7 @@ revalidate:
 				 * failure will be detected in the
 				 * dirent test below. */
 				if (le16_to_cpu(de->rec_len) <
-						EXT3_DIR_REC_LEN(1))
+						EXT4_DIR_REC_LEN(1))
 					break;
 				i += le16_to_cpu(de->rec_len);
 			}
@@ -186,8 +186,8 @@ revalidate:
 
 		while (!error && filp->f_pos < inode->i_size
 		       && offset < sb->s_blocksize) {
-			de = (struct ext3_dir_entry_2 *) (bh->b_data + offset);
-			if (!ext3_check_dir_entry ("ext3_readdir", inode, de,
+			de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
+			if (!ext4_check_dir_entry ("ext4_readdir", inode, de,
 						   bh, offset)) {
 				/* On error, skip the f_pos to the
                                    next block. */
@@ -228,7 +228,7 @@ out:
 	return ret;
 }
 
-#ifdef CONFIG_EXT3_INDEX
+#ifdef CONFIG_EXT4_INDEX
 /*
  * These functions convert from the major/minor hash to an f_pos
  * value.
@@ -323,7 +323,7 @@ static struct dir_private_info *create_dir_info(loff_t pos)
 	return p;
 }
 
-void ext3_htree_free_dir_info(struct dir_private_info *p)
+void ext4_htree_free_dir_info(struct dir_private_info *p)
 {
 	free_rb_tree_fname(&p->root);
 	kfree(p);
@@ -332,9 +332,9 @@ void ext3_htree_free_dir_info(struct dir_private_info *p)
 /*
  * Given a directory entry, enter it into the fname rb tree.
  */
-int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
+int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
 			     __u32 minor_hash,
-			     struct ext3_dir_entry_2 *dirent)
+			     struct ext4_dir_entry_2 *dirent)
 {
 	struct rb_node **p, *parent = NULL;
 	struct fname * fname, *new_fn;
@@ -390,7 +390,7 @@ int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
 
 
 /*
- * This is a helper function for ext3_dx_readdir.  It calls filldir
+ * This is a helper function for ext4_dx_readdir.  It calls filldir
  * for all entres on the fname linked list.  (Normally there is only
  * one entry on the linked list, unless there are 62 bit hash collisions.)
  */
@@ -425,7 +425,7 @@ static int call_filldir(struct file * filp, void * dirent,
 	return 0;
 }
 
-static int ext3_dx_readdir(struct file * filp,
+static int ext4_dx_readdir(struct file * filp,
 			 void * dirent, filldir_t filldir)
 {
 	struct dir_private_info *info = filp->private_data;
@@ -440,7 +440,7 @@ static int ext3_dx_readdir(struct file * filp,
 		filp->private_data = info;
 	}
 
-	if (filp->f_pos == EXT3_HTREE_EOF)
+	if (filp->f_pos == EXT4_HTREE_EOF)
 		return 0;	/* EOF */
 
 	/* Some one has messed with f_pos; reset the world */
@@ -474,13 +474,13 @@ static int ext3_dx_readdir(struct file * filp,
 			info->curr_node = NULL;
 			free_rb_tree_fname(&info->root);
 			filp->f_version = inode->i_version;
-			ret = ext3_htree_fill_tree(filp, info->curr_hash,
+			ret = ext4_htree_fill_tree(filp, info->curr_hash,
 						   info->curr_minor_hash,
 						   &info->next_hash);
 			if (ret < 0)
 				return ret;
 			if (ret == 0) {
-				filp->f_pos = EXT3_HTREE_EOF;
+				filp->f_pos = EXT4_HTREE_EOF;
 				break;
 			}
 			info->curr_node = rb_first(&info->root);
@@ -495,7 +495,7 @@ static int ext3_dx_readdir(struct file * filp,
 		info->curr_node = rb_next(info->curr_node);
 		if (!info->curr_node) {
 			if (info->next_hash == ~0) {
-				filp->f_pos = EXT3_HTREE_EOF;
+				filp->f_pos = EXT4_HTREE_EOF;
 				break;
 			}
 			info->curr_hash = info->next_hash;
@@ -507,10 +507,10 @@ finished:
 	return 0;
 }
 
-static int ext3_release_dir (struct inode * inode, struct file * filp)
+static int ext4_release_dir (struct inode * inode, struct file * filp)
 {
        if (filp->private_data)
-		ext3_htree_free_dir_info(filp->private_data);
+		ext4_htree_free_dir_info(filp->private_data);
 
 	return 0;
 }
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index e96c388047e0..d938fbe1e08b 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/ext3/file.c
+ *  linux/fs/ext4/file.c
  *
  * Copyright (C) 1992, 1993, 1994, 1995
  * Remy Card (card@masi.ibp.fr)
@@ -12,7 +12,7 @@
  *
  *  Copyright (C) 1991, 1992  Linus Torvalds
  *
- *  ext3 fs regular file handling primitives
+ *  ext4 fs regular file handling primitives
  *
  *  64-bit file support on 64-bit platforms by Jakub Jelinek
  *	(jj@sunsite.ms.mff.cuni.cz)
@@ -21,34 +21,34 @@
 #include <linux/time.h>
 #include <linux/fs.h>
 #include <linux/jbd.h>
-#include <linux/ext3_fs.h>
-#include <linux/ext3_jbd.h>
+#include <linux/ext4_fs.h>
+#include <linux/ext4_jbd.h>
 #include "xattr.h"
 #include "acl.h"
 
 /*
  * Called when an inode is released. Note that this is different
- * from ext3_file_open: open gets called at every open, but release
+ * from ext4_file_open: open gets called at every open, but release
  * gets called only when /all/ the files are closed.
  */
-static int ext3_release_file (struct inode * inode, struct file * filp)
+static int ext4_release_file (struct inode * inode, struct file * filp)
 {
 	/* if we are the last writer on the inode, drop the block reservation */
 	if ((filp->f_mode & FMODE_WRITE) &&
 			(atomic_read(&inode->i_writecount) == 1))
 	{
-		mutex_lock(&EXT3_I(inode)->truncate_mutex);
-		ext3_discard_reservation(inode);
-		mutex_unlock(&EXT3_I(inode)->truncate_mutex);
+		mutex_lock(&EXT4_I(inode)->truncate_mutex);
+		ext4_discard_reservation(inode);
+		mutex_unlock(&EXT4_I(inode)->truncate_mutex);
 	}
 	if (is_dx(inode) && filp->private_data)
-		ext3_htree_free_dir_info(filp->private_data);
+		ext4_htree_free_dir_info(filp->private_data);
 
 	return 0;
 }
 
 static ssize_t
-ext3_file_write(struct kiocb *iocb, const struct iovec *iov,
+ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
 		unsigned long nr_segs, loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
@@ -79,7 +79,7 @@ ext3_file_write(struct kiocb *iocb, const struct iovec *iov,
 		 * Open question --- do we care about flushing timestamps too
 		 * if the inode is IS_SYNC?
 		 */
-		if (!ext3_should_journal_data(inode))
+		if (!ext4_should_journal_data(inode))
 			return ret;
 
 		goto force_commit;
@@ -100,40 +100,40 @@ ext3_file_write(struct kiocb *iocb, const struct iovec *iov,
 	 */
 
 force_commit:
-	err = ext3_force_commit(inode->i_sb);
+	err = ext4_force_commit(inode->i_sb);
 	if (err)
 		return err;
 	return ret;
 }
 
-const struct file_operations ext3_file_operations = {
+const struct file_operations ext4_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
 	.aio_read	= generic_file_aio_read,
-	.aio_write	= ext3_file_write,
-	.ioctl		= ext3_ioctl,
+	.aio_write	= ext4_file_write,
+	.ioctl		= ext4_ioctl,
 #ifdef CONFIG_COMPAT
-	.compat_ioctl	= ext3_compat_ioctl,
+	.compat_ioctl	= ext4_compat_ioctl,
 #endif
 	.mmap		= generic_file_mmap,
 	.open		= generic_file_open,
-	.release	= ext3_release_file,
-	.fsync		= ext3_sync_file,
+	.release	= ext4_release_file,
+	.fsync		= ext4_sync_file,
 	.sendfile	= generic_file_sendfile,
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= generic_file_splice_write,
 };
 
-struct inode_operations ext3_file_inode_operations = {
-	.truncate	= ext3_truncate,
-	.setattr	= ext3_setattr,
-#ifdef CONFIG_EXT3_FS_XATTR
+struct inode_operations ext4_file_inode_operations = {
+	.truncate	= ext4_truncate,
+	.setattr	= ext4_setattr,
+#ifdef CONFIG_EXT4DEV_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
-	.listxattr	= ext3_listxattr,
+	.listxattr	= ext4_listxattr,
 	.removexattr	= generic_removexattr,
 #endif
-	.permission	= ext3_permission,
+	.permission	= ext4_permission,
 };
 
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index dd1fd3c0fc05..272faa27761d 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/ext3/fsync.c
+ *  linux/fs/ext4/fsync.c
  *
  *  Copyright (C) 1993  Stephen Tweedie (sct@redhat.com)
  *  from
@@ -9,7 +9,7 @@
  *  from
  *  linux/fs/minix/truncate.c   Copyright (C) 1991, 1992  Linus Torvalds
  *
- *  ext3fs fsync primitive
+ *  ext4fs fsync primitive
  *
  *  Big-endian to little-endian byte-swapping/bitmaps by
  *        David S. Miller (davem@caip.rutgers.edu), 1995
@@ -27,11 +27,11 @@
 #include <linux/sched.h>
 #include <linux/writeback.h>
 #include <linux/jbd.h>
-#include <linux/ext3_fs.h>
-#include <linux/ext3_jbd.h>
+#include <linux/ext4_fs.h>
+#include <linux/ext4_jbd.h>
 
 /*
- * akpm: A new design for ext3_sync_file().
+ * akpm: A new design for ext4_sync_file().
  *
  * This is only called from sys_fsync(), sys_fdatasync() and sys_msync().
  * There cannot be a transaction open by this task.
@@ -42,12 +42,12 @@
  * inode to disk.
  */
 
-int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
+int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
 {
 	struct inode *inode = dentry->d_inode;
 	int ret = 0;
 
-	J_ASSERT(ext3_journal_current_handle() == 0);
+	J_ASSERT(ext4_journal_current_handle() == 0);
 
 	/*
 	 * data=writeback:
@@ -61,14 +61,14 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
 	 *
 	 * data=journal:
 	 *  filemap_fdatawrite won't do anything (the buffers are clean).
-	 *  ext3_force_commit will write the file data into the journal and
+	 *  ext4_force_commit will write the file data into the journal and
 	 *  will wait on that.
 	 *  filemap_fdatawait() will encounter a ton of newly-dirtied pages
 	 *  (they were dirtied by commit).  But that's OK - the blocks are
 	 *  safe in-journal, which is all fsync() needs to ensure.
 	 */
-	if (ext3_should_journal_data(inode)) {
-		ret = ext3_force_commit(inode->i_sb);
+	if (ext4_should_journal_data(inode)) {
+		ret = ext4_force_commit(inode->i_sb);
 		goto out;
 	}
 
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index deeb27b5ba83..d15bb4274428 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/ext3/hash.c
+ *  linux/fs/ext4/hash.c
  *
  * Copyright (C) 2002 by Theodore Ts'o
  *
@@ -12,7 +12,7 @@
 #include <linux/fs.h>
 #include <linux/jbd.h>
 #include <linux/sched.h>
-#include <linux/ext3_fs.h>
+#include <linux/ext4_fs.h>
 #include <linux/cryptohash.h>
 
 #define DELTA 0x9E3779B9
@@ -89,7 +89,7 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
  * represented, and whether or not the returned hash is 32 bits or 64
  * bits.  32 bit hashes will return 0 for the minor hash.
  */
-int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
+int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
 {
 	__u32	hash;
 	__u32	minor_hash = 0;
@@ -144,8 +144,8 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
 		return -1;
 	}
 	hash = hash & ~1;
-	if (hash == (EXT3_HTREE_EOF << 1))
-		hash = (EXT3_HTREE_EOF-1) << 1;
+	if (hash == (EXT4_HTREE_EOF << 1))
+		hash = (EXT4_HTREE_EOF-1) << 1;
 	hinfo->hash = hash;
 	hinfo->minor_hash = minor_hash;
 	return 0;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index e45dbd651736..4b92066ca08f 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/ext3/ialloc.c
+ *  linux/fs/ext4/ialloc.c
  *
  * Copyright (C) 1992, 1993, 1994, 1995
  * Remy Card (card@masi.ibp.fr)
@@ -15,8 +15,8 @@
 #include <linux/time.h>
 #include <linux/fs.h>
 #include <linux/jbd.h>
-#include <linux/ext3_fs.h>
-#include <linux/ext3_jbd.h>
+#include <linux/ext4_fs.h>
+#include <linux/ext4_jbd.h>
 #include <linux/stat.h>
 #include <linux/string.h>
 #include <linux/quotaops.h>
@@ -53,16 +53,16 @@
 static struct buffer_head *
 read_inode_bitmap(struct super_block * sb, unsigned long block_group)
 {
-	struct ext3_group_desc *desc;
+	struct ext4_group_desc *desc;
 	struct buffer_head *bh = NULL;
 
-	desc = ext3_get_group_desc(sb, block_group, NULL);
+	desc = ext4_get_group_desc(sb, block_group, NULL);
 	if (!desc)
 		goto error_out;
 
 	bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
 	if (!bh)
-		ext3_error(sb, "read_inode_bitmap",
+		ext4_error(sb, "read_inode_bitmap",
 			    "Cannot read inode bitmap - "
 			    "block_group = %lu, inode_bitmap = %u",
 			    block_group, le32_to_cpu(desc->bg_inode_bitmap));
@@ -86,7 +86,7 @@ error_out:
  * though), and then we'd have two inodes sharing the
  * same inode number and space on the harddisk.
  */
-void ext3_free_inode (handle_t *handle, struct inode * inode)
+void ext4_free_inode (handle_t *handle, struct inode * inode)
 {
 	struct super_block * sb = inode->i_sb;
 	int is_directory;
@@ -95,36 +95,36 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
 	struct buffer_head *bh2;
 	unsigned long block_group;
 	unsigned long bit;
-	struct ext3_group_desc * gdp;
-	struct ext3_super_block * es;
-	struct ext3_sb_info *sbi;
+	struct ext4_group_desc * gdp;
+	struct ext4_super_block * es;
+	struct ext4_sb_info *sbi;
 	int fatal = 0, err;
 
 	if (atomic_read(&inode->i_count) > 1) {
-		printk ("ext3_free_inode: inode has count=%d\n",
+		printk ("ext4_free_inode: inode has count=%d\n",
 					atomic_read(&inode->i_count));
 		return;
 	}
 	if (inode->i_nlink) {
-		printk ("ext3_free_inode: inode has nlink=%d\n",
+		printk ("ext4_free_inode: inode has nlink=%d\n",
 			inode->i_nlink);
 		return;
 	}
 	if (!sb) {
-		printk("ext3_free_inode: inode on nonexistent device\n");
+		printk("ext4_free_inode: inode on nonexistent device\n");
 		return;
 	}
-	sbi = EXT3_SB(sb);
+	sbi = EXT4_SB(sb);
 
 	ino = inode->i_ino;
-	ext3_debug ("freeing inode %lu\n", ino);
+	ext4_debug ("freeing inode %lu\n", ino);
 
 	/*
 	 * Note: we must free any quota before locking the superblock,
 	 * as writing the quota to disk may need the lock as well.
 	 */
 	DQUOT_INIT(inode);
-	ext3_xattr_delete_inode(handle, inode);
+	ext4_xattr_delete_inode(handle, inode);
 	DQUOT_FREE_INODE(inode);
 	DQUOT_DROP(inode);
 
@@ -133,33 +133,33 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
 	/* Do this BEFORE marking the inode not in use or returning an error */
 	clear_inode (inode);
 
-	es = EXT3_SB(sb)->s_es;
-	if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
-		ext3_error (sb, "ext3_free_inode",
+	es = EXT4_SB(sb)->s_es;
+	if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
+		ext4_error (sb, "ext4_free_inode",
 			    "reserved or nonexistent inode %lu", ino);
 		goto error_return;
 	}
-	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
-	bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
+	block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
+	bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
 	bitmap_bh = read_inode_bitmap(sb, block_group);
 	if (!bitmap_bh)
 		goto error_return;
 
 	BUFFER_TRACE(bitmap_bh, "get_write_access");
-	fatal = ext3_journal_get_write_access(handle, bitmap_bh);
+	fatal = ext4_journal_get_write_access(handle, bitmap_bh);
 	if (fatal)
 		goto error_return;
 
 	/* Ok, now we can actually update the inode bitmaps.. */
-	if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
+	if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
 					bit, bitmap_bh->b_data))
-		ext3_error (sb, "ext3_free_inode",
+		ext4_error (sb, "ext4_free_inode",
 			      "bit already cleared for inode %lu", ino);
 	else {
-		gdp = ext3_get_group_desc (sb, block_group, &bh2);
+		gdp = ext4_get_group_desc (sb, block_group, &bh2);
 
 		BUFFER_TRACE(bh2, "get_write_access");
-		fatal = ext3_journal_get_write_access(handle, bh2);
+		fatal = ext4_journal_get_write_access(handle, bh2);
 		if (fatal) goto error_return;
 
 		if (gdp) {
@@ -175,18 +175,18 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
 				percpu_counter_dec(&sbi->s_dirs_counter);
 
 		}
-		BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
-		err = ext3_journal_dirty_metadata(handle, bh2);
+		BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
+		err = ext4_journal_dirty_metadata(handle, bh2);
 		if (!fatal) fatal = err;
 	}
-	BUFFER_TRACE(bitmap_bh, "call ext3_journal_dirty_metadata");
-	err = ext3_journal_dirty_metadata(handle, bitmap_bh);
+	BUFFER_TRACE(bitmap_bh, "call ext4_journal_dirty_metadata");
+	err = ext4_journal_dirty_metadata(handle, bitmap_bh);
 	if (!fatal)
 		fatal = err;
 	sb->s_dirt = 1;
 error_return:
 	brelse(bitmap_bh);
-	ext3_std_error(sb, fatal);
+	ext4_std_error(sb, fatal);
 }
 
 /*
@@ -201,17 +201,17 @@ error_return:
  */
 static int find_group_dir(struct super_block *sb, struct inode *parent)
 {
-	int ngroups = EXT3_SB(sb)->s_groups_count;
+	int ngroups = EXT4_SB(sb)->s_groups_count;
 	unsigned int freei, avefreei;
-	struct ext3_group_desc *desc, *best_desc = NULL;
+	struct ext4_group_desc *desc, *best_desc = NULL;
 	struct buffer_head *bh;
 	int group, best_group = -1;
 
-	freei = percpu_counter_read_positive(&EXT3_SB(sb)->s_freeinodes_counter);
+	freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter);
 	avefreei = freei / ngroups;
 
 	for (group = 0; group < ngroups; group++) {
-		desc = ext3_get_group_desc (sb, group, &bh);
+		desc = ext4_get_group_desc (sb, group, &bh);
 		if (!desc || !desc->bg_free_inodes_count)
 			continue;
 		if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
@@ -256,19 +256,19 @@ static int find_group_dir(struct super_block *sb, struct inode *parent)
 
 static int find_group_orlov(struct super_block *sb, struct inode *parent)
 {
-	int parent_group = EXT3_I(parent)->i_block_group;
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
-	struct ext3_super_block *es = sbi->s_es;
+	int parent_group = EXT4_I(parent)->i_block_group;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_super_block *es = sbi->s_es;
 	int ngroups = sbi->s_groups_count;
-	int inodes_per_group = EXT3_INODES_PER_GROUP(sb);
+	int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
 	unsigned int freei, avefreei;
-	ext3_fsblk_t freeb, avefreeb;
-	ext3_fsblk_t blocks_per_dir;
+	ext4_fsblk_t freeb, avefreeb;
+	ext4_fsblk_t blocks_per_dir;
 	unsigned int ndirs;
 	int max_debt, max_dirs, min_inodes;
-	ext3_grpblk_t min_blocks;
+	ext4_grpblk_t min_blocks;
 	int group = -1, i;
-	struct ext3_group_desc *desc;
+	struct ext4_group_desc *desc;
 	struct buffer_head *bh;
 
 	freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
@@ -278,7 +278,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
 	ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
 
 	if ((parent == sb->s_root->d_inode) ||
-	    (EXT3_I(parent)->i_flags & EXT3_TOPDIR_FL)) {
+	    (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL)) {
 		int best_ndir = inodes_per_group;
 		int best_group = -1;
 
@@ -286,7 +286,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
 		parent_group = (unsigned)group % ngroups;
 		for (i = 0; i < ngroups; i++) {
 			group = (parent_group + i) % ngroups;
-			desc = ext3_get_group_desc (sb, group, &bh);
+			desc = ext4_get_group_desc (sb, group, &bh);
 			if (!desc || !desc->bg_free_inodes_count)
 				continue;
 			if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
@@ -307,9 +307,9 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
 
 	max_dirs = ndirs / ngroups + inodes_per_group / 16;
 	min_inodes = avefreei - inodes_per_group / 4;
-	min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4;
+	min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb) / 4;
 
-	max_debt = EXT3_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, (ext3_fsblk_t)BLOCK_COST);
+	max_debt = EXT4_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, (ext4_fsblk_t)BLOCK_COST);
 	if (max_debt * INODE_COST > inodes_per_group)
 		max_debt = inodes_per_group / INODE_COST;
 	if (max_debt > 255)
@@ -319,7 +319,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
 
 	for (i = 0; i < ngroups; i++) {
 		group = (parent_group + i) % ngroups;
-		desc = ext3_get_group_desc (sb, group, &bh);
+		desc = ext4_get_group_desc (sb, group, &bh);
 		if (!desc || !desc->bg_free_inodes_count)
 			continue;
 		if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
@@ -334,7 +334,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
 fallback:
 	for (i = 0; i < ngroups; i++) {
 		group = (parent_group + i) % ngroups;
-		desc = ext3_get_group_desc (sb, group, &bh);
+		desc = ext4_get_group_desc (sb, group, &bh);
 		if (!desc || !desc->bg_free_inodes_count)
 			continue;
 		if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
@@ -355,9 +355,9 @@ fallback:
 
 static int find_group_other(struct super_block *sb, struct inode *parent)
 {
-	int parent_group = EXT3_I(parent)->i_block_group;
-	int ngroups = EXT3_SB(sb)->s_groups_count;
-	struct ext3_group_desc *desc;
+	int parent_group = EXT4_I(parent)->i_block_group;
+	int ngroups = EXT4_SB(sb)->s_groups_count;
+	struct ext4_group_desc *desc;
 	struct buffer_head *bh;
 	int group, i;
 
@@ -365,7 +365,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent)
 	 * Try to place the inode in its parent directory
 	 */
 	group = parent_group;
-	desc = ext3_get_group_desc (sb, group, &bh);
+	desc = ext4_get_group_desc (sb, group, &bh);
 	if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
 			le16_to_cpu(desc->bg_free_blocks_count))
 		return group;
@@ -389,7 +389,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent)
 		group += i;
 		if (group >= ngroups)
 			group -= ngroups;
-		desc = ext3_get_group_desc (sb, group, &bh);
+		desc = ext4_get_group_desc (sb, group, &bh);
 		if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
 				le16_to_cpu(desc->bg_free_blocks_count))
 			return group;
@@ -403,7 +403,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent)
 	for (i = 0; i < ngroups; i++) {
 		if (++group >= ngroups)
 			group = 0;
-		desc = ext3_get_group_desc (sb, group, &bh);
+		desc = ext4_get_group_desc (sb, group, &bh);
 		if (desc && le16_to_cpu(desc->bg_free_inodes_count))
 			return group;
 	}
@@ -421,7 +421,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent)
  * For other inodes, search forward from the parent directory's block
  * group to find a free inode.
  */
-struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
+struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
 {
 	struct super_block *sb;
 	struct buffer_head *bitmap_bh = NULL;
@@ -429,10 +429,10 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
 	int group;
 	unsigned long ino = 0;
 	struct inode * inode;
-	struct ext3_group_desc * gdp = NULL;
-	struct ext3_super_block * es;
-	struct ext3_inode_info *ei;
-	struct ext3_sb_info *sbi;
+	struct ext4_group_desc * gdp = NULL;
+	struct ext4_super_block * es;
+	struct ext4_inode_info *ei;
+	struct ext4_sb_info *sbi;
 	int err = 0;
 	struct inode *ret;
 	int i;
@@ -445,9 +445,9 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
 	inode = new_inode(sb);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
-	ei = EXT3_I(inode);
+	ei = EXT4_I(inode);
 
-	sbi = EXT3_SB(sb);
+	sbi = EXT4_SB(sb);
 	es = sbi->s_es;
 	if (S_ISDIR(mode)) {
 		if (test_opt (sb, OLDALLOC))
@@ -464,7 +464,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
 	for (i = 0; i < sbi->s_groups_count; i++) {
 		err = -EIO;
 
-		gdp = ext3_get_group_desc(sb, group, &bh2);
+		gdp = ext4_get_group_desc(sb, group, &bh2);
 		if (!gdp)
 			goto fail;
 
@@ -476,21 +476,21 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
 		ino = 0;
 
 repeat_in_this_group:
-		ino = ext3_find_next_zero_bit((unsigned long *)
-				bitmap_bh->b_data, EXT3_INODES_PER_GROUP(sb), ino);
-		if (ino < EXT3_INODES_PER_GROUP(sb)) {
+		ino = ext4_find_next_zero_bit((unsigned long *)
+				bitmap_bh->b_data, EXT4_INODES_PER_GROUP(sb), ino);
+		if (ino < EXT4_INODES_PER_GROUP(sb)) {
 
 			BUFFER_TRACE(bitmap_bh, "get_write_access");
-			err = ext3_journal_get_write_access(handle, bitmap_bh);
+			err = ext4_journal_get_write_access(handle, bitmap_bh);
 			if (err)
 				goto fail;
 
-			if (!ext3_set_bit_atomic(sb_bgl_lock(sbi, group),
+			if (!ext4_set_bit_atomic(sb_bgl_lock(sbi, group),
 						ino, bitmap_bh->b_data)) {
 				/* we won it */
 				BUFFER_TRACE(bitmap_bh,
-					"call ext3_journal_dirty_metadata");
-				err = ext3_journal_dirty_metadata(handle,
+					"call ext4_journal_dirty_metadata");
+				err = ext4_journal_dirty_metadata(handle,
 								bitmap_bh);
 				if (err)
 					goto fail;
@@ -499,7 +499,7 @@ repeat_in_this_group:
 			/* we lost it */
 			journal_release_buffer(handle, bitmap_bh);
 
-			if (++ino < EXT3_INODES_PER_GROUP(sb))
+			if (++ino < EXT4_INODES_PER_GROUP(sb))
 				goto repeat_in_this_group;
 		}
 
@@ -517,9 +517,9 @@ repeat_in_this_group:
 	goto out;
 
 got:
-	ino += group * EXT3_INODES_PER_GROUP(sb) + 1;
-	if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
-		ext3_error (sb, "ext3_new_inode",
+	ino += group * EXT4_INODES_PER_GROUP(sb) + 1;
+	if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
+		ext4_error (sb, "ext4_new_inode",
 			    "reserved inode or inode > inodes count - "
 			    "block_group = %d, inode=%lu", group, ino);
 		err = -EIO;
@@ -527,7 +527,7 @@ got:
 	}
 
 	BUFFER_TRACE(bh2, "get_write_access");
-	err = ext3_journal_get_write_access(handle, bh2);
+	err = ext4_journal_get_write_access(handle, bh2);
 	if (err) goto fail;
 	spin_lock(sb_bgl_lock(sbi, group));
 	gdp->bg_free_inodes_count =
@@ -537,8 +537,8 @@ got:
 			cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
 	}
 	spin_unlock(sb_bgl_lock(sbi, group));
-	BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
-	err = ext3_journal_dirty_metadata(handle, bh2);
+	BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
+	err = ext4_journal_dirty_metadata(handle, bh2);
 	if (err) goto fail;
 
 	percpu_counter_dec(&sbi->s_freeinodes_counter);
@@ -566,13 +566,13 @@ got:
 	ei->i_dir_start_lookup = 0;
 	ei->i_disksize = 0;
 
-	ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL;
+	ei->i_flags = EXT4_I(dir)->i_flags & ~EXT4_INDEX_FL;
 	if (S_ISLNK(mode))
-		ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL);
+		ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL);
 	/* dirsync only applies to directories */
 	if (!S_ISDIR(mode))
-		ei->i_flags &= ~EXT3_DIRSYNC_FL;
-#ifdef EXT3_FRAGMENTS
+		ei->i_flags &= ~EXT4_DIRSYNC_FL;
+#ifdef EXT4_FRAGMENTS
 	ei->i_faddr = 0;
 	ei->i_frag_no = 0;
 	ei->i_frag_size = 0;
@@ -583,7 +583,7 @@ got:
 	ei->i_block_alloc_info = NULL;
 	ei->i_block_group = group;
 
-	ext3_set_inode_flags(inode);
+	ext4_set_inode_flags(inode);
 	if (IS_DIRSYNC(inode))
 		handle->h_sync = 1;
 	insert_inode_hash(inode);
@@ -591,10 +591,10 @@ got:
 	inode->i_generation = sbi->s_next_generation++;
 	spin_unlock(&sbi->s_next_gen_lock);
 
-	ei->i_state = EXT3_STATE_NEW;
+	ei->i_state = EXT4_STATE_NEW;
 	ei->i_extra_isize =
-		(EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
-		sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
+		(EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) ?
+		sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE : 0;
 
 	ret = inode;
 	if(DQUOT_ALLOC_INODE(inode)) {
@@ -602,24 +602,24 @@ got:
 		goto fail_drop;
 	}
 
-	err = ext3_init_acl(handle, inode, dir);
+	err = ext4_init_acl(handle, inode, dir);
 	if (err)
 		goto fail_free_drop;
 
-	err = ext3_init_security(handle,inode, dir);
+	err = ext4_init_security(handle,inode, dir);
 	if (err)
 		goto fail_free_drop;
 
-	err = ext3_mark_inode_dirty(handle, inode);
+	err = ext4_mark_inode_dirty(handle, inode);
 	if (err) {
-		ext3_std_error(sb, err);
+		ext4_std_error(sb, err);
 		goto fail_free_drop;
 	}
 
-	ext3_debug("allocating inode %lu\n", inode->i_ino);
+	ext4_debug("allocating inode %lu\n", inode->i_ino);
 	goto really_out;
 fail:
-	ext3_std_error(sb, err);
+	ext4_std_error(sb, err);
 out:
 	iput(inode);
 	ret = ERR_PTR(err);
@@ -640,9 +640,9 @@ fail_drop:
 }
 
 /* Verify that we are loading a valid orphan from disk */
-struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
+struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
 {
-	unsigned long max_ino = le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count);
+	unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count);
 	unsigned long block_group;
 	int bit;
 	struct buffer_head *bitmap_bh = NULL;
@@ -650,16 +650,16 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
 
 	/* Error cases - e2fsck has already cleaned up for us */
 	if (ino > max_ino) {
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "bad orphan ino %lu!  e2fsck was run?", ino);
 		goto out;
 	}
 
-	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
-	bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
+	block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
+	bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
 	bitmap_bh = read_inode_bitmap(sb, block_group);
 	if (!bitmap_bh) {
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "inode bitmap error for orphan %lu", ino);
 		goto out;
 	}
@@ -668,14 +668,14 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
 	 * is a valid orphan (no e2fsck run on fs).  Orphans also include
 	 * inodes that were being truncated, so we can't check i_nlink==0.
 	 */
-	if (!ext3_test_bit(bit, bitmap_bh->b_data) ||
+	if (!ext4_test_bit(bit, bitmap_bh->b_data) ||
 			!(inode = iget(sb, ino)) || is_bad_inode(inode) ||
 			NEXT_ORPHAN(inode) > max_ino) {
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "bad orphan inode %lu!  e2fsck was run?", ino);
-		printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n",
+		printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n",
 		       bit, (unsigned long long)bitmap_bh->b_blocknr,
-		       ext3_test_bit(bit, bitmap_bh->b_data));
+		       ext4_test_bit(bit, bitmap_bh->b_data));
 		printk(KERN_NOTICE "inode=%p\n", inode);
 		if (inode) {
 			printk(KERN_NOTICE "is_bad_inode(inode)=%d\n",
@@ -695,22 +695,22 @@ out:
 	return inode;
 }
 
-unsigned long ext3_count_free_inodes (struct super_block * sb)
+unsigned long ext4_count_free_inodes (struct super_block * sb)
 {
 	unsigned long desc_count;
-	struct ext3_group_desc *gdp;
+	struct ext4_group_desc *gdp;
 	int i;
-#ifdef EXT3FS_DEBUG
-	struct ext3_super_block *es;
+#ifdef EXT4FS_DEBUG
+	struct ext4_super_block *es;
 	unsigned long bitmap_count, x;
 	struct buffer_head *bitmap_bh = NULL;
 
-	es = EXT3_SB(sb)->s_es;
+	es = EXT4_SB(sb)->s_es;
 	desc_count = 0;
 	bitmap_count = 0;
 	gdp = NULL;
-	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
-		gdp = ext3_get_group_desc (sb, i, NULL);
+	for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
+		gdp = ext4_get_group_desc (sb, i, NULL);
 		if (!gdp)
 			continue;
 		desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
@@ -719,19 +719,19 @@ unsigned long ext3_count_free_inodes (struct super_block * sb)
 		if (!bitmap_bh)
 			continue;
 
-		x = ext3_count_free(bitmap_bh, EXT3_INODES_PER_GROUP(sb) / 8);
+		x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8);
 		printk("group %d: stored = %d, counted = %lu\n",
 			i, le16_to_cpu(gdp->bg_free_inodes_count), x);
 		bitmap_count += x;
 	}
 	brelse(bitmap_bh);
-	printk("ext3_count_free_inodes: stored = %u, computed = %lu, %lu\n",
+	printk("ext4_count_free_inodes: stored = %u, computed = %lu, %lu\n",
 		le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
 	return desc_count;
 #else
 	desc_count = 0;
-	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
-		gdp = ext3_get_group_desc (sb, i, NULL);
+	for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
+		gdp = ext4_get_group_desc (sb, i, NULL);
 		if (!gdp)
 			continue;
 		desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
@@ -742,13 +742,13 @@ unsigned long ext3_count_free_inodes (struct super_block * sb)
 }
 
 /* Called at mount-time, super-block is locked */
-unsigned long ext3_count_dirs (struct super_block * sb)
+unsigned long ext4_count_dirs (struct super_block * sb)
 {
 	unsigned long count = 0;
 	int i;
 
-	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
-		struct ext3_group_desc *gdp = ext3_get_group_desc (sb, i, NULL);
+	for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
+		struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL);
 		if (!gdp)
 			continue;
 		count += le16_to_cpu(gdp->bg_used_dirs_count);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 03ba5bcab186..7275d60dcc59 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/ext3/inode.c
+ *  linux/fs/ext4/inode.c
  *
  * Copyright (C) 1992, 1993, 1994, 1995
  * Remy Card (card@masi.ibp.fr)
@@ -19,13 +19,13 @@
  *  64-bit file support on 64-bit platforms by Jakub Jelinek
  *	(jj@sunsite.ms.mff.cuni.cz)
  *
- *  Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000
+ *  Assorted race fixes, rewrite of ext4_get_block() by Al Viro, 2000
  */
 
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/time.h>
-#include <linux/ext3_jbd.h>
+#include <linux/ext4_jbd.h>
 #include <linux/jbd.h>
 #include <linux/smp_lock.h>
 #include <linux/highuid.h>
@@ -40,21 +40,21 @@
 #include "xattr.h"
 #include "acl.h"
 
-static int ext3_writepage_trans_blocks(struct inode *inode);
+static int ext4_writepage_trans_blocks(struct inode *inode);
 
 /*
  * Test whether an inode is a fast symlink.
  */
-static int ext3_inode_is_fast_symlink(struct inode *inode)
+static int ext4_inode_is_fast_symlink(struct inode *inode)
 {
-	int ea_blocks = EXT3_I(inode)->i_file_acl ?
+	int ea_blocks = EXT4_I(inode)->i_file_acl ?
 		(inode->i_sb->s_blocksize >> 9) : 0;
 
 	return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
 }
 
 /*
- * The ext3 forget function must perform a revoke if we are freeing data
+ * The ext4 forget function must perform a revoke if we are freeing data
  * which has been journaled.  Metadata (eg. indirect blocks) must be
  * revoked in all cases.
  *
@@ -62,8 +62,8 @@ static int ext3_inode_is_fast_symlink(struct inode *inode)
  * but there may still be a record of it in the journal, and that record
  * still needs to be revoked.
  */
-int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
-			struct buffer_head *bh, ext3_fsblk_t blocknr)
+int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
+			struct buffer_head *bh, ext4_fsblk_t blocknr)
 {
 	int err;
 
@@ -81,11 +81,11 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
 	 * support it.  Otherwise, only skip the revoke on un-journaled
 	 * data blocks. */
 
-	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ||
-	    (!is_metadata && !ext3_should_journal_data(inode))) {
+	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
+	    (!is_metadata && !ext4_should_journal_data(inode))) {
 		if (bh) {
 			BUFFER_TRACE(bh, "call journal_forget");
-			return ext3_journal_forget(handle, bh);
+			return ext4_journal_forget(handle, bh);
 		}
 		return 0;
 	}
@@ -93,10 +93,10 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
 	/*
 	 * data!=journal && (is_metadata || should_journal_data(inode))
 	 */
-	BUFFER_TRACE(bh, "call ext3_journal_revoke");
-	err = ext3_journal_revoke(handle, blocknr, bh);
+	BUFFER_TRACE(bh, "call ext4_journal_revoke");
+	err = ext4_journal_revoke(handle, blocknr, bh);
 	if (err)
-		ext3_abort(inode->i_sb, __FUNCTION__,
+		ext4_abort(inode->i_sb, __FUNCTION__,
 			   "error %d when attempting revoke", err);
 	BUFFER_TRACE(bh, "exit");
 	return err;
@@ -115,7 +115,7 @@ static unsigned long blocks_for_truncate(struct inode *inode)
 	/* Give ourselves just enough room to cope with inodes in which
 	 * i_blocks is corrupt: we've seen disk corruptions in the past
 	 * which resulted in random data in an inode which looked enough
-	 * like a regular file for ext3 to try to delete it.  Things
+	 * like a regular file for ext4 to try to delete it.  Things
 	 * will go a bit crazy if that happens, but at least we should
 	 * try not to panic the whole kernel. */
 	if (needed < 2)
@@ -123,10 +123,10 @@ static unsigned long blocks_for_truncate(struct inode *inode)
 
 	/* But we need to bound the transaction so we don't overflow the
 	 * journal. */
-	if (needed > EXT3_MAX_TRANS_DATA)
-		needed = EXT3_MAX_TRANS_DATA;
+	if (needed > EXT4_MAX_TRANS_DATA)
+		needed = EXT4_MAX_TRANS_DATA;
 
-	return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
+	return EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
 }
 
 /*
@@ -143,11 +143,11 @@ static handle_t *start_transaction(struct inode *inode)
 {
 	handle_t *result;
 
-	result = ext3_journal_start(inode, blocks_for_truncate(inode));
+	result = ext4_journal_start(inode, blocks_for_truncate(inode));
 	if (!IS_ERR(result))
 		return result;
 
-	ext3_std_error(inode->i_sb, PTR_ERR(result));
+	ext4_std_error(inode->i_sb, PTR_ERR(result));
 	return result;
 }
 
@@ -159,9 +159,9 @@ static handle_t *start_transaction(struct inode *inode)
  */
 static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
 {
-	if (handle->h_buffer_credits > EXT3_RESERVE_TRANS_BLOCKS)
+	if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS)
 		return 0;
-	if (!ext3_journal_extend(handle, blocks_for_truncate(inode)))
+	if (!ext4_journal_extend(handle, blocks_for_truncate(inode)))
 		return 0;
 	return 1;
 }
@@ -171,16 +171,16 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
  * so before we call here everything must be consistently dirtied against
  * this transaction.
  */
-static int ext3_journal_test_restart(handle_t *handle, struct inode *inode)
+static int ext4_journal_test_restart(handle_t *handle, struct inode *inode)
 {
 	jbd_debug(2, "restarting handle %p\n", handle);
-	return ext3_journal_restart(handle, blocks_for_truncate(inode));
+	return ext4_journal_restart(handle, blocks_for_truncate(inode));
 }
 
 /*
  * Called at the last iput() if i_nlink is zero.
  */
-void ext3_delete_inode (struct inode * inode)
+void ext4_delete_inode (struct inode * inode)
 {
 	handle_t *handle;
 
@@ -196,7 +196,7 @@ void ext3_delete_inode (struct inode * inode)
 		 * make sure that the in-core orphan linked list is properly
 		 * cleaned up.
 		 */
-		ext3_orphan_del(NULL, inode);
+		ext4_orphan_del(NULL, inode);
 		goto no_delete;
 	}
 
@@ -204,17 +204,17 @@ void ext3_delete_inode (struct inode * inode)
 		handle->h_sync = 1;
 	inode->i_size = 0;
 	if (inode->i_blocks)
-		ext3_truncate(inode);
+		ext4_truncate(inode);
 	/*
-	 * Kill off the orphan record which ext3_truncate created.
+	 * Kill off the orphan record which ext4_truncate created.
 	 * AKPM: I think this can be inside the above `if'.
-	 * Note that ext3_orphan_del() has to be able to cope with the
+	 * Note that ext4_orphan_del() has to be able to cope with the
 	 * deletion of a non-existent orphan - this is because we don't
-	 * know if ext3_truncate() actually created an orphan record.
+	 * know if ext4_truncate() actually created an orphan record.
 	 * (Well, we could do this if we need to, but heck - it works)
 	 */
-	ext3_orphan_del(handle, inode);
-	EXT3_I(inode)->i_dtime	= get_seconds();
+	ext4_orphan_del(handle, inode);
+	EXT4_I(inode)->i_dtime	= get_seconds();
 
 	/*
 	 * One subtle ordering requirement: if anything has gone wrong
@@ -223,12 +223,12 @@ void ext3_delete_inode (struct inode * inode)
 	 * having errors), but we can't free the inode if the mark_dirty
 	 * fails.
 	 */
-	if (ext3_mark_inode_dirty(handle, inode))
+	if (ext4_mark_inode_dirty(handle, inode))
 		/* If that failed, just do the required in-core inode clear. */
 		clear_inode(inode);
 	else
-		ext3_free_inode(handle, inode);
-	ext3_journal_stop(handle);
+		ext4_free_inode(handle, inode);
+	ext4_journal_stop(handle);
 	return;
 no_delete:
 	clear_inode(inode);	/* We must guarantee clearing of inode... */
@@ -254,14 +254,14 @@ static int verify_chain(Indirect *from, Indirect *to)
 }
 
 /**
- *	ext3_block_to_path - parse the block number into array of offsets
+ *	ext4_block_to_path - parse the block number into array of offsets
  *	@inode: inode in question (we are only interested in its superblock)
  *	@i_block: block number to be parsed
  *	@offsets: array to store the offsets in
  *      @boundary: set this non-zero if the referred-to block is likely to be
  *             followed (on disk) by an indirect block.
  *
- *	To store the locations of file's data ext3 uses a data structure common
+ *	To store the locations of file's data ext4 uses a data structure common
  *	for UNIX filesystems - tree of pointers anchored in the inode, with
  *	data blocks at leaves and indirect blocks in intermediate nodes.
  *	This function translates the block number into path in that tree -
@@ -284,39 +284,39 @@ static int verify_chain(Indirect *from, Indirect *to)
  * get there at all.
  */
 
-static int ext3_block_to_path(struct inode *inode,
+static int ext4_block_to_path(struct inode *inode,
 			long i_block, int offsets[4], int *boundary)
 {
-	int ptrs = EXT3_ADDR_PER_BLOCK(inode->i_sb);
-	int ptrs_bits = EXT3_ADDR_PER_BLOCK_BITS(inode->i_sb);
-	const long direct_blocks = EXT3_NDIR_BLOCKS,
+	int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb);
+	int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb);
+	const long direct_blocks = EXT4_NDIR_BLOCKS,
 		indirect_blocks = ptrs,
 		double_blocks = (1 << (ptrs_bits * 2));
 	int n = 0;
 	int final = 0;
 
 	if (i_block < 0) {
-		ext3_warning (inode->i_sb, "ext3_block_to_path", "block < 0");
+		ext4_warning (inode->i_sb, "ext4_block_to_path", "block < 0");
 	} else if (i_block < direct_blocks) {
 		offsets[n++] = i_block;
 		final = direct_blocks;
 	} else if ( (i_block -= direct_blocks) < indirect_blocks) {
-		offsets[n++] = EXT3_IND_BLOCK;
+		offsets[n++] = EXT4_IND_BLOCK;
 		offsets[n++] = i_block;
 		final = ptrs;
 	} else if ((i_block -= indirect_blocks) < double_blocks) {
-		offsets[n++] = EXT3_DIND_BLOCK;
+		offsets[n++] = EXT4_DIND_BLOCK;
 		offsets[n++] = i_block >> ptrs_bits;
 		offsets[n++] = i_block & (ptrs - 1);
 		final = ptrs;
 	} else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) {
-		offsets[n++] = EXT3_TIND_BLOCK;
+		offsets[n++] = EXT4_TIND_BLOCK;
 		offsets[n++] = i_block >> (ptrs_bits * 2);
 		offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1);
 		offsets[n++] = i_block & (ptrs - 1);
 		final = ptrs;
 	} else {
-		ext3_warning(inode->i_sb, "ext3_block_to_path", "block > big");
+		ext4_warning(inode->i_sb, "ext4_block_to_path", "block > big");
 	}
 	if (boundary)
 		*boundary = final - 1 - (i_block & (ptrs - 1));
@@ -324,7 +324,7 @@ static int ext3_block_to_path(struct inode *inode,
 }
 
 /**
- *	ext3_get_branch - read the chain of indirect blocks leading to data
+ *	ext4_get_branch - read the chain of indirect blocks leading to data
  *	@inode: inode in question
  *	@depth: depth of the chain (1 - direct pointer, etc.)
  *	@offsets: offsets of pointers in inode/indirect blocks
@@ -352,7 +352,7 @@ static int ext3_block_to_path(struct inode *inode,
  *	or when it reads all @depth-1 indirect blocks successfully and finds
  *	the whole chain, all way to the data (returns %NULL, *err == 0).
  */
-static Indirect *ext3_get_branch(struct inode *inode, int depth, int *offsets,
+static Indirect *ext4_get_branch(struct inode *inode, int depth, int *offsets,
 				 Indirect chain[4], int *err)
 {
 	struct super_block *sb = inode->i_sb;
@@ -361,7 +361,7 @@ static Indirect *ext3_get_branch(struct inode *inode, int depth, int *offsets,
 
 	*err = 0;
 	/* i_data is not going away, no lock needed */
-	add_chain (chain, NULL, EXT3_I(inode)->i_data + *offsets);
+	add_chain (chain, NULL, EXT4_I(inode)->i_data + *offsets);
 	if (!p->key)
 		goto no_block;
 	while (--depth) {
@@ -389,7 +389,7 @@ no_block:
 }
 
 /**
- *	ext3_find_near - find a place for allocation with sufficient locality
+ *	ext4_find_near - find a place for allocation with sufficient locality
  *	@inode: owner
  *	@ind: descriptor of indirect block.
  *
@@ -408,13 +408,13 @@ no_block:
  *
  *	Caller must make sure that @ind is valid and will stay that way.
  */
-static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind)
+static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
 {
-	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext4_inode_info *ei = EXT4_I(inode);
 	__le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data;
 	__le32 *p;
-	ext3_fsblk_t bg_start;
-	ext3_grpblk_t colour;
+	ext4_fsblk_t bg_start;
+	ext4_grpblk_t colour;
 
 	/* Try to find previous block */
 	for (p = ind->p - 1; p >= start; p--) {
@@ -430,14 +430,14 @@ static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind)
 	 * It is going to be referred to from the inode itself? OK, just put it
 	 * into the same cylinder group then.
 	 */
-	bg_start = ext3_group_first_block_no(inode->i_sb, ei->i_block_group);
+	bg_start = ext4_group_first_block_no(inode->i_sb, ei->i_block_group);
 	colour = (current->pid % 16) *
-			(EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16);
+			(EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
 	return bg_start + colour;
 }
 
 /**
- *	ext3_find_goal - find a prefered place for allocation.
+ *	ext4_find_goal - find a prefered place for allocation.
  *	@inode: owner
  *	@block:  block we want
  *	@chain:  chain of indirect blocks
@@ -448,12 +448,12 @@ static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind)
  *	stores it in *@goal and returns zero.
  */
 
-static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block,
+static ext4_fsblk_t ext4_find_goal(struct inode *inode, long block,
 		Indirect chain[4], Indirect *partial)
 {
-	struct ext3_block_alloc_info *block_i;
+	struct ext4_block_alloc_info *block_i;
 
-	block_i =  EXT3_I(inode)->i_block_alloc_info;
+	block_i =  EXT4_I(inode)->i_block_alloc_info;
 
 	/*
 	 * try the heuristic for sequential allocation,
@@ -464,11 +464,11 @@ static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block,
 		return block_i->last_alloc_physical_block + 1;
 	}
 
-	return ext3_find_near(inode, partial);
+	return ext4_find_near(inode, partial);
 }
 
 /**
- *	ext3_blks_to_allocate: Look up the block map and count the number
+ *	ext4_blks_to_allocate: Look up the block map and count the number
  *	of direct blocks need to be allocated for the given branch.
  *
  *	@branch: chain of indirect blocks
@@ -479,7 +479,7 @@ static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block,
  *	return the total number of blocks to be allocate, including the
  *	direct and indirect blocks.
  */
-static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
+static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
 		int blocks_to_boundary)
 {
 	unsigned long count = 0;
@@ -506,7 +506,7 @@ static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
 }
 
 /**
- *	ext3_alloc_blocks: multiple allocate blocks needed for a branch
+ *	ext4_alloc_blocks: multiple allocate blocks needed for a branch
  *	@indirect_blks: the number of blocks need to allocate for indirect
  *			blocks
  *
@@ -515,14 +515,14 @@ static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
  *	@blks:	on return it will store the total number of allocated
  *		direct blocks
  */
-static int ext3_alloc_blocks(handle_t *handle, struct inode *inode,
-			ext3_fsblk_t goal, int indirect_blks, int blks,
-			ext3_fsblk_t new_blocks[4], int *err)
+static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
+			ext4_fsblk_t goal, int indirect_blks, int blks,
+			ext4_fsblk_t new_blocks[4], int *err)
 {
 	int target, i;
 	unsigned long count = 0;
 	int index = 0;
-	ext3_fsblk_t current_block = 0;
+	ext4_fsblk_t current_block = 0;
 	int ret = 0;
 
 	/*
@@ -538,7 +538,7 @@ static int ext3_alloc_blocks(handle_t *handle, struct inode *inode,
 	while (1) {
 		count = target;
 		/* allocating blocks for indirect blocks and direct blocks */
-		current_block = ext3_new_blocks(handle,inode,goal,&count,err);
+		current_block = ext4_new_blocks(handle,inode,goal,&count,err);
 		if (*err)
 			goto failed_out;
 
@@ -562,12 +562,12 @@ static int ext3_alloc_blocks(handle_t *handle, struct inode *inode,
 	return ret;
 failed_out:
 	for (i = 0; i <index; i++)
-		ext3_free_blocks(handle, inode, new_blocks[i], 1);
+		ext4_free_blocks(handle, inode, new_blocks[i], 1);
 	return ret;
 }
 
 /**
- *	ext3_alloc_branch - allocate and set up a chain of blocks.
+ *	ext4_alloc_branch - allocate and set up a chain of blocks.
  *	@inode: owner
  *	@indirect_blks: number of allocated indirect blocks
  *	@blks: number of allocated direct blocks
@@ -578,21 +578,21 @@ failed_out:
  *	links them into chain and (if we are synchronous) writes them to disk.
  *	In other words, it prepares a branch that can be spliced onto the
  *	inode. It stores the information about that chain in the branch[], in
- *	the same format as ext3_get_branch() would do. We are calling it after
+ *	the same format as ext4_get_branch() would do. We are calling it after
  *	we had read the existing part of chain and partial points to the last
  *	triple of that (one with zero ->key). Upon the exit we have the same
- *	picture as after the successful ext3_get_block(), except that in one
+ *	picture as after the successful ext4_get_block(), except that in one
  *	place chain is disconnected - *branch->p is still zero (we did not
  *	set the last link), but branch->key contains the number that should
  *	be placed into *branch->p to fill that gap.
  *
  *	If allocation fails we free all blocks we've allocated (and forget
  *	their buffer_heads) and return the error value the from failed
- *	ext3_alloc_block() (normally -ENOSPC). Otherwise we set the chain
+ *	ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain
  *	as described above and return 0.
  */
-static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
-			int indirect_blks, int *blks, ext3_fsblk_t goal,
+static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
+			int indirect_blks, int *blks, ext4_fsblk_t goal,
 			int *offsets, Indirect *branch)
 {
 	int blocksize = inode->i_sb->s_blocksize;
@@ -600,10 +600,10 @@ static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
 	int err = 0;
 	struct buffer_head *bh;
 	int num;
-	ext3_fsblk_t new_blocks[4];
-	ext3_fsblk_t current_block;
+	ext4_fsblk_t new_blocks[4];
+	ext4_fsblk_t current_block;
 
-	num = ext3_alloc_blocks(handle, inode, goal, indirect_blks,
+	num = ext4_alloc_blocks(handle, inode, goal, indirect_blks,
 				*blks, new_blocks, &err);
 	if (err)
 		return err;
@@ -622,7 +622,7 @@ static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
 		branch[n].bh = bh;
 		lock_buffer(bh);
 		BUFFER_TRACE(bh, "call get_create_access");
-		err = ext3_journal_get_create_access(handle, bh);
+		err = ext4_journal_get_create_access(handle, bh);
 		if (err) {
 			unlock_buffer(bh);
 			brelse(bh);
@@ -647,8 +647,8 @@ static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
 		set_buffer_uptodate(bh);
 		unlock_buffer(bh);
 
-		BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-		err = ext3_journal_dirty_metadata(handle, bh);
+		BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
+		err = ext4_journal_dirty_metadata(handle, bh);
 		if (err)
 			goto failed;
 	}
@@ -658,22 +658,22 @@ failed:
 	/* Allocation failed, free what we already allocated */
 	for (i = 1; i <= n ; i++) {
 		BUFFER_TRACE(branch[i].bh, "call journal_forget");
-		ext3_journal_forget(handle, branch[i].bh);
+		ext4_journal_forget(handle, branch[i].bh);
 	}
 	for (i = 0; i <indirect_blks; i++)
-		ext3_free_blocks(handle, inode, new_blocks[i], 1);
+		ext4_free_blocks(handle, inode, new_blocks[i], 1);
 
-	ext3_free_blocks(handle, inode, new_blocks[i], num);
+	ext4_free_blocks(handle, inode, new_blocks[i], num);
 
 	return err;
 }
 
 /**
- * ext3_splice_branch - splice the allocated branch onto inode.
+ * ext4_splice_branch - splice the allocated branch onto inode.
  * @inode: owner
  * @block: (logical) number of block we are adding
  * @chain: chain of indirect blocks (with a missing link - see
- *	ext3_alloc_branch)
+ *	ext4_alloc_branch)
  * @where: location of missing link
  * @num:   number of indirect blocks we are adding
  * @blks:  number of direct blocks we are adding
@@ -682,15 +682,15 @@ failed:
  * inode (->i_blocks, etc.). In case of success we end up with the full
  * chain to new block and return 0.
  */
-static int ext3_splice_branch(handle_t *handle, struct inode *inode,
+static int ext4_splice_branch(handle_t *handle, struct inode *inode,
 			long block, Indirect *where, int num, int blks)
 {
 	int i;
 	int err = 0;
-	struct ext3_block_alloc_info *block_i;
-	ext3_fsblk_t current_block;
+	struct ext4_block_alloc_info *block_i;
+	ext4_fsblk_t current_block;
 
-	block_i = EXT3_I(inode)->i_block_alloc_info;
+	block_i = EXT4_I(inode)->i_block_alloc_info;
 	/*
 	 * If we're splicing into a [td]indirect block (as opposed to the
 	 * inode) then we need to get write access to the [td]indirect block
@@ -698,7 +698,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
 	 */
 	if (where->bh) {
 		BUFFER_TRACE(where->bh, "get_write_access");
-		err = ext3_journal_get_write_access(handle, where->bh);
+		err = ext4_journal_get_write_access(handle, where->bh);
 		if (err)
 			goto err_out;
 	}
@@ -730,7 +730,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
 	/* We are done with atomic stuff, now do the rest of housekeeping */
 
 	inode->i_ctime = CURRENT_TIME_SEC;
-	ext3_mark_inode_dirty(handle, inode);
+	ext4_mark_inode_dirty(handle, inode);
 
 	/* had we spliced it onto indirect block? */
 	if (where->bh) {
@@ -740,11 +740,11 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
 		 * onto an indirect block at the very end of the file (the
 		 * file is growing) then we *will* alter the inode to reflect
 		 * the new i_size.  But that is not done here - it is done in
-		 * generic_commit_write->__mark_inode_dirty->ext3_dirty_inode.
+		 * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode.
 		 */
 		jbd_debug(5, "splicing indirect only\n");
-		BUFFER_TRACE(where->bh, "call ext3_journal_dirty_metadata");
-		err = ext3_journal_dirty_metadata(handle, where->bh);
+		BUFFER_TRACE(where->bh, "call ext4_journal_dirty_metadata");
+		err = ext4_journal_dirty_metadata(handle, where->bh);
 		if (err)
 			goto err_out;
 	} else {
@@ -759,10 +759,10 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
 err_out:
 	for (i = 1; i <= num; i++) {
 		BUFFER_TRACE(where[i].bh, "call journal_forget");
-		ext3_journal_forget(handle, where[i].bh);
-		ext3_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1);
+		ext4_journal_forget(handle, where[i].bh);
+		ext4_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1);
 	}
-	ext3_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks);
+	ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks);
 
 	return err;
 }
@@ -786,7 +786,7 @@ err_out:
  * return = 0, if plain lookup failed.
  * return < 0, error case.
  */
-int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
+int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
 		sector_t iblock, unsigned long maxblocks,
 		struct buffer_head *bh_result,
 		int create, int extend_disksize)
@@ -795,22 +795,22 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
 	int offsets[4];
 	Indirect chain[4];
 	Indirect *partial;
-	ext3_fsblk_t goal;
+	ext4_fsblk_t goal;
 	int indirect_blks;
 	int blocks_to_boundary = 0;
 	int depth;
-	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext4_inode_info *ei = EXT4_I(inode);
 	int count = 0;
-	ext3_fsblk_t first_block = 0;
+	ext4_fsblk_t first_block = 0;
 
 
 	J_ASSERT(handle != NULL || create == 0);
-	depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
+	depth = ext4_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
 
 	if (depth == 0)
 		goto out;
 
-	partial = ext3_get_branch(inode, depth, offsets, chain, &err);
+	partial = ext4_get_branch(inode, depth, offsets, chain, &err);
 
 	/* Simplest case - block found, no allocation needed */
 	if (!partial) {
@@ -819,7 +819,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
 		count++;
 		/*map more blocks*/
 		while (count < maxblocks && count <= blocks_to_boundary) {
-			ext3_fsblk_t blk;
+			ext4_fsblk_t blk;
 
 			if (!verify_chain(chain, partial)) {
 				/*
@@ -852,7 +852,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
 
 	/*
 	 * If the indirect block is missing while we are reading
-	 * the chain(ext3_get_branch() returns -EAGAIN err), or
+	 * the chain(ext4_get_branch() returns -EAGAIN err), or
 	 * if the chain has been changed after we grab the semaphore,
 	 * (either because another process truncated this branch, or
 	 * another get_block allocated this branch) re-grab the chain to see if
@@ -867,7 +867,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
 			brelse(partial->bh);
 			partial--;
 		}
-		partial = ext3_get_branch(inode, depth, offsets, chain, &err);
+		partial = ext4_get_branch(inode, depth, offsets, chain, &err);
 		if (!partial) {
 			count++;
 			mutex_unlock(&ei->truncate_mutex);
@@ -883,9 +883,9 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
 	 * allocation info here if necessary
 	*/
 	if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info))
-		ext3_init_block_alloc_info(inode);
+		ext4_init_block_alloc_info(inode);
 
-	goal = ext3_find_goal(inode, iblock, chain, partial);
+	goal = ext4_find_goal(inode, iblock, chain, partial);
 
 	/* the number of blocks need to allocate for [d,t]indirect blocks */
 	indirect_blks = (chain + depth) - partial - 1;
@@ -894,28 +894,28 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
 	 * Next look up the indirect map to count the totoal number of
 	 * direct blocks to allocate for this branch.
 	 */
-	count = ext3_blks_to_allocate(partial, indirect_blks,
+	count = ext4_blks_to_allocate(partial, indirect_blks,
 					maxblocks, blocks_to_boundary);
 	/*
-	 * Block out ext3_truncate while we alter the tree
+	 * Block out ext4_truncate while we alter the tree
 	 */
-	err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal,
+	err = ext4_alloc_branch(handle, inode, indirect_blks, &count, goal,
 				offsets + (partial - chain), partial);
 
 	/*
-	 * The ext3_splice_branch call will free and forget any buffers
+	 * The ext4_splice_branch call will free and forget any buffers
 	 * on the new chain if there is a failure, but that risks using
 	 * up transaction credits, especially for bitmaps where the
 	 * credits cannot be returned.  Can we handle this somehow?  We
 	 * may need to return -EAGAIN upwards in the worst case.  --sct
 	 */
 	if (!err)
-		err = ext3_splice_branch(handle, inode, iblock,
+		err = ext4_splice_branch(handle, inode, iblock,
 					partial, indirect_blks, count);
 	/*
 	 * i_disksize growing is protected by truncate_mutex.  Don't forget to
 	 * protect it if you're about to implement concurrent
-	 * ext3_get_block() -bzzz
+	 * ext4_get_block() -bzzz
 	*/
 	if (!err && extend_disksize && inode->i_size > ei->i_disksize)
 		ei->i_disksize = inode->i_size;
@@ -942,9 +942,9 @@ out:
 	return err;
 }
 
-#define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32)
+#define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32)
 
-static int ext3_get_block(struct inode *inode, sector_t iblock,
+static int ext4_get_block(struct inode *inode, sector_t iblock,
 			struct buffer_head *bh_result, int create)
 {
 	handle_t *handle = journal_current_handle();
@@ -962,29 +962,29 @@ static int ext3_get_block(struct inode *inode, sector_t iblock,
 		 * Huge direct-io writes can hold off commits for long
 		 * periods of time.  Let this commit run.
 		 */
-		ext3_journal_stop(handle);
-		handle = ext3_journal_start(inode, DIO_CREDITS);
+		ext4_journal_stop(handle);
+		handle = ext4_journal_start(inode, DIO_CREDITS);
 		if (IS_ERR(handle))
 			ret = PTR_ERR(handle);
 		goto get_block;
 	}
 
-	if (handle->h_buffer_credits <= EXT3_RESERVE_TRANS_BLOCKS) {
+	if (handle->h_buffer_credits <= EXT4_RESERVE_TRANS_BLOCKS) {
 		/*
 		 * Getting low on buffer credits...
 		 */
-		ret = ext3_journal_extend(handle, DIO_CREDITS);
+		ret = ext4_journal_extend(handle, DIO_CREDITS);
 		if (ret > 0) {
 			/*
 			 * Couldn't extend the transaction.  Start a new one.
 			 */
-			ret = ext3_journal_restart(handle, DIO_CREDITS);
+			ret = ext4_journal_restart(handle, DIO_CREDITS);
 		}
 	}
 
 get_block:
 	if (ret == 0) {
-		ret = ext3_get_blocks_handle(handle, inode, iblock,
+		ret = ext4_get_blocks_handle(handle, inode, iblock,
 					max_blocks, bh_result, create, 0);
 		if (ret > 0) {
 			bh_result->b_size = (ret << inode->i_blkbits);
@@ -997,7 +997,7 @@ get_block:
 /*
  * `handle' can be NULL if create is zero
  */
-struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
+struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
 				long block, int create, int *errp)
 {
 	struct buffer_head dummy;
@@ -1008,10 +1008,10 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
 	dummy.b_state = 0;
 	dummy.b_blocknr = -1000;
 	buffer_trace_init(&dummy.b_history);
-	err = ext3_get_blocks_handle(handle, inode, block, 1,
+	err = ext4_get_blocks_handle(handle, inode, block, 1,
 					&dummy, create, 1);
 	/*
-	 * ext3_get_blocks_handle() returns number of blocks
+	 * ext4_get_blocks_handle() returns number of blocks
 	 * mapped. 0 in case of a HOLE.
 	 */
 	if (err > 0) {
@@ -1035,19 +1035,19 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
 			 * Now that we do not always journal data, we should
 			 * keep in mind whether this should always journal the
 			 * new buffer as metadata.  For now, regular file
-			 * writes use ext3_get_block instead, so it's not a
+			 * writes use ext4_get_block instead, so it's not a
 			 * problem.
 			 */
 			lock_buffer(bh);
 			BUFFER_TRACE(bh, "call get_create_access");
-			fatal = ext3_journal_get_create_access(handle, bh);
+			fatal = ext4_journal_get_create_access(handle, bh);
 			if (!fatal && !buffer_uptodate(bh)) {
 				memset(bh->b_data,0,inode->i_sb->s_blocksize);
 				set_buffer_uptodate(bh);
 			}
 			unlock_buffer(bh);
-			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-			err = ext3_journal_dirty_metadata(handle, bh);
+			BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
+			err = ext4_journal_dirty_metadata(handle, bh);
 			if (!fatal)
 				fatal = err;
 		} else {
@@ -1064,12 +1064,12 @@ err:
 	return NULL;
 }
 
-struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode,
+struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
 			       int block, int create, int *err)
 {
 	struct buffer_head * bh;
 
-	bh = ext3_getblk(handle, inode, block, create, err);
+	bh = ext4_getblk(handle, inode, block, create, err);
 	if (!bh)
 		return bh;
 	if (buffer_uptodate(bh))
@@ -1118,17 +1118,17 @@ static int walk_page_buffers(	handle_t *handle,
 /*
  * To preserve ordering, it is essential that the hole instantiation and
  * the data write be encapsulated in a single transaction.  We cannot
- * close off a transaction and start a new one between the ext3_get_block()
+ * close off a transaction and start a new one between the ext4_get_block()
  * and the commit_write().  So doing the journal_start at the start of
  * prepare_write() is the right place.
  *
- * Also, this function can nest inside ext3_writepage() ->
- * block_write_full_page(). In that case, we *know* that ext3_writepage()
+ * Also, this function can nest inside ext4_writepage() ->
+ * block_write_full_page(). In that case, we *know* that ext4_writepage()
  * has generated enough buffer credits to do the whole page.  So we won't
  * block on the journal in that case, which is good, because the caller may
  * be PF_MEMALLOC.
  *
- * By accident, ext3 can be reentered when a transaction is open via
+ * By accident, ext4 can be reentered when a transaction is open via
  * quota file writes.  If we were to commit the transaction while thus
  * reentered, there can be a deadlock - we would be holding a quota
  * lock, and the commit would never complete if another thread had a
@@ -1145,48 +1145,48 @@ static int do_journal_get_write_access(handle_t *handle,
 {
 	if (!buffer_mapped(bh) || buffer_freed(bh))
 		return 0;
-	return ext3_journal_get_write_access(handle, bh);
+	return ext4_journal_get_write_access(handle, bh);
 }
 
-static int ext3_prepare_write(struct file *file, struct page *page,
+static int ext4_prepare_write(struct file *file, struct page *page,
 			      unsigned from, unsigned to)
 {
 	struct inode *inode = page->mapping->host;
-	int ret, needed_blocks = ext3_writepage_trans_blocks(inode);
+	int ret, needed_blocks = ext4_writepage_trans_blocks(inode);
 	handle_t *handle;
 	int retries = 0;
 
 retry:
-	handle = ext3_journal_start(inode, needed_blocks);
+	handle = ext4_journal_start(inode, needed_blocks);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
 		goto out;
 	}
-	if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
-		ret = nobh_prepare_write(page, from, to, ext3_get_block);
+	if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
+		ret = nobh_prepare_write(page, from, to, ext4_get_block);
 	else
-		ret = block_prepare_write(page, from, to, ext3_get_block);
+		ret = block_prepare_write(page, from, to, ext4_get_block);
 	if (ret)
 		goto prepare_write_failed;
 
-	if (ext3_should_journal_data(inode)) {
+	if (ext4_should_journal_data(inode)) {
 		ret = walk_page_buffers(handle, page_buffers(page),
 				from, to, NULL, do_journal_get_write_access);
 	}
 prepare_write_failed:
 	if (ret)
-		ext3_journal_stop(handle);
-	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
+		ext4_journal_stop(handle);
+	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
 out:
 	return ret;
 }
 
-int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
+int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 {
 	int err = journal_dirty_data(handle, bh);
 	if (err)
-		ext3_journal_abort_handle(__FUNCTION__, __FUNCTION__,
+		ext4_journal_abort_handle(__FUNCTION__, __FUNCTION__,
 						bh, handle,err);
 	return err;
 }
@@ -1197,25 +1197,25 @@ static int commit_write_fn(handle_t *handle, struct buffer_head *bh)
 	if (!buffer_mapped(bh) || buffer_freed(bh))
 		return 0;
 	set_buffer_uptodate(bh);
-	return ext3_journal_dirty_metadata(handle, bh);
+	return ext4_journal_dirty_metadata(handle, bh);
 }
 
 /*
  * We need to pick up the new inode size which generic_commit_write gave us
  * `file' can be NULL - eg, when called from page_symlink().
  *
- * ext3 never places buffers on inode->i_mapping->private_list.  metadata
+ * ext4 never places buffers on inode->i_mapping->private_list.  metadata
  * buffers are managed internally.
  */
-static int ext3_ordered_commit_write(struct file *file, struct page *page,
+static int ext4_ordered_commit_write(struct file *file, struct page *page,
 			     unsigned from, unsigned to)
 {
-	handle_t *handle = ext3_journal_current_handle();
+	handle_t *handle = ext4_journal_current_handle();
 	struct inode *inode = page->mapping->host;
 	int ret = 0, ret2;
 
 	ret = walk_page_buffers(handle, page_buffers(page),
-		from, to, NULL, ext3_journal_dirty_data);
+		from, to, NULL, ext4_journal_dirty_data);
 
 	if (ret == 0) {
 		/*
@@ -1226,43 +1226,43 @@ static int ext3_ordered_commit_write(struct file *file, struct page *page,
 		loff_t new_i_size;
 
 		new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
-		if (new_i_size > EXT3_I(inode)->i_disksize)
-			EXT3_I(inode)->i_disksize = new_i_size;
+		if (new_i_size > EXT4_I(inode)->i_disksize)
+			EXT4_I(inode)->i_disksize = new_i_size;
 		ret = generic_commit_write(file, page, from, to);
 	}
-	ret2 = ext3_journal_stop(handle);
+	ret2 = ext4_journal_stop(handle);
 	if (!ret)
 		ret = ret2;
 	return ret;
 }
 
-static int ext3_writeback_commit_write(struct file *file, struct page *page,
+static int ext4_writeback_commit_write(struct file *file, struct page *page,
 			     unsigned from, unsigned to)
 {
-	handle_t *handle = ext3_journal_current_handle();
+	handle_t *handle = ext4_journal_current_handle();
 	struct inode *inode = page->mapping->host;
 	int ret = 0, ret2;
 	loff_t new_i_size;
 
 	new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
-	if (new_i_size > EXT3_I(inode)->i_disksize)
-		EXT3_I(inode)->i_disksize = new_i_size;
+	if (new_i_size > EXT4_I(inode)->i_disksize)
+		EXT4_I(inode)->i_disksize = new_i_size;
 
-	if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
+	if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
 		ret = nobh_commit_write(file, page, from, to);
 	else
 		ret = generic_commit_write(file, page, from, to);
 
-	ret2 = ext3_journal_stop(handle);
+	ret2 = ext4_journal_stop(handle);
 	if (!ret)
 		ret = ret2;
 	return ret;
 }
 
-static int ext3_journalled_commit_write(struct file *file,
+static int ext4_journalled_commit_write(struct file *file,
 			struct page *page, unsigned from, unsigned to)
 {
-	handle_t *handle = ext3_journal_current_handle();
+	handle_t *handle = ext4_journal_current_handle();
 	struct inode *inode = page->mapping->host;
 	int ret = 0, ret2;
 	int partial = 0;
@@ -1279,14 +1279,14 @@ static int ext3_journalled_commit_write(struct file *file,
 		SetPageUptodate(page);
 	if (pos > inode->i_size)
 		i_size_write(inode, pos);
-	EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
-	if (inode->i_size > EXT3_I(inode)->i_disksize) {
-		EXT3_I(inode)->i_disksize = inode->i_size;
-		ret2 = ext3_mark_inode_dirty(handle, inode);
+	EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
+	if (inode->i_size > EXT4_I(inode)->i_disksize) {
+		EXT4_I(inode)->i_disksize = inode->i_size;
+		ret2 = ext4_mark_inode_dirty(handle, inode);
 		if (!ret)
 			ret = ret2;
 	}
-	ret2 = ext3_journal_stop(handle);
+	ret2 = ext4_journal_stop(handle);
 	if (!ret)
 		ret = ret2;
 	return ret;
@@ -1297,7 +1297,7 @@ static int ext3_journalled_commit_write(struct file *file,
  * the swapper to find the on-disk block of a specific piece of data.
  *
  * Naturally, this is dangerous if the block concerned is still in the
- * journal.  If somebody makes a swapfile on an ext3 data-journaling
+ * journal.  If somebody makes a swapfile on an ext4 data-journaling
  * filesystem and enables swap, then they may get a nasty shock when the
  * data getting swapped to that swapfile suddenly gets overwritten by
  * the original zero's written out previously to the journal and
@@ -1306,13 +1306,13 @@ static int ext3_journalled_commit_write(struct file *file,
  * So, if we see any bmap calls here on a modified, data-journaled file,
  * take extra steps to flush any blocks which might be in the cache.
  */
-static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
+static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
 {
 	struct inode *inode = mapping->host;
 	journal_t *journal;
 	int err;
 
-	if (EXT3_I(inode)->i_state & EXT3_STATE_JDATA) {
+	if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) {
 		/*
 		 * This is a REALLY heavyweight approach, but the use of
 		 * bmap on dirty files is expected to be extremely rare:
@@ -1324,15 +1324,15 @@ static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
 		 * in trouble if mortal users could trigger this path at
 		 * will.)
 		 *
-		 * NB. EXT3_STATE_JDATA is not set on files other than
+		 * NB. EXT4_STATE_JDATA is not set on files other than
 		 * regular files.  If somebody wants to bmap a directory
 		 * or symlink and gets confused because the buffer
 		 * hasn't yet been flushed to disk, they deserve
 		 * everything they get.
 		 */
 
-		EXT3_I(inode)->i_state &= ~EXT3_STATE_JDATA;
-		journal = EXT3_JOURNAL(inode);
+		EXT4_I(inode)->i_state &= ~EXT4_STATE_JDATA;
+		journal = EXT4_JOURNAL(inode);
 		journal_lock_updates(journal);
 		err = journal_flush(journal);
 		journal_unlock_updates(journal);
@@ -1341,7 +1341,7 @@ static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
 			return 0;
 	}
 
-	return generic_block_bmap(mapping,block,ext3_get_block);
+	return generic_block_bmap(mapping,block,ext4_get_block);
 }
 
 static int bget_one(handle_t *handle, struct buffer_head *bh)
@@ -1359,14 +1359,14 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
 static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
 {
 	if (buffer_mapped(bh))
-		return ext3_journal_dirty_data(handle, bh);
+		return ext4_journal_dirty_data(handle, bh);
 	return 0;
 }
 
 /*
  * Note that we always start a transaction even if we're not journalling
  * data.  This is to preserve ordering: any hole instantiation within
- * __block_write_full_page -> ext3_get_block() should be journalled
+ * __block_write_full_page -> ext4_get_block() should be journalled
  * along with the data so we don't crash and then get metadata which
  * refers to old data.
  *
@@ -1374,14 +1374,14 @@ static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
  *
  * Problem:
  *
- *	ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
- *		ext3_writepage()
+ *	ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
+ *		ext4_writepage()
  *
  * Similar for:
  *
- *	ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ...
+ *	ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ...
  *
- * Same applies to ext3_get_block().  We will deadlock on various things like
+ * Same applies to ext4_get_block().  We will deadlock on various things like
  * lock_journal and i_truncate_mutex.
  *
  * Setting PF_MEMALLOC here doesn't work - too many internal memory
@@ -1415,7 +1415,7 @@ static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
  * AKPM2: if all the page's buffers are mapped to disk and !data=journal,
  * we don't need to open a transaction here.
  */
-static int ext3_ordered_writepage(struct page *page,
+static int ext4_ordered_writepage(struct page *page,
 				struct writeback_control *wbc)
 {
 	struct inode *inode = page->mapping->host;
@@ -1430,10 +1430,10 @@ static int ext3_ordered_writepage(struct page *page,
 	 * We give up here if we're reentered, because it might be for a
 	 * different filesystem.
 	 */
-	if (ext3_journal_current_handle())
+	if (ext4_journal_current_handle())
 		goto out_fail;
 
-	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
+	handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
 
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
@@ -1448,7 +1448,7 @@ static int ext3_ordered_writepage(struct page *page,
 	walk_page_buffers(handle, page_bufs, 0,
 			PAGE_CACHE_SIZE, NULL, bget_one);
 
-	ret = block_write_full_page(page, ext3_get_block, wbc);
+	ret = block_write_full_page(page, ext4_get_block, wbc);
 
 	/*
 	 * The page can become unlocked at any point now, and
@@ -1470,7 +1470,7 @@ static int ext3_ordered_writepage(struct page *page,
 	}
 	walk_page_buffers(handle, page_bufs, 0,
 			PAGE_CACHE_SIZE, NULL, bput_one);
-	err = ext3_journal_stop(handle);
+	err = ext4_journal_stop(handle);
 	if (!ret)
 		ret = err;
 	return ret;
@@ -1481,7 +1481,7 @@ out_fail:
 	return ret;
 }
 
-static int ext3_writeback_writepage(struct page *page,
+static int ext4_writeback_writepage(struct page *page,
 				struct writeback_control *wbc)
 {
 	struct inode *inode = page->mapping->host;
@@ -1489,21 +1489,21 @@ static int ext3_writeback_writepage(struct page *page,
 	int ret = 0;
 	int err;
 
-	if (ext3_journal_current_handle())
+	if (ext4_journal_current_handle())
 		goto out_fail;
 
-	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
+	handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
 		goto out_fail;
 	}
 
-	if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
-		ret = nobh_writepage(page, ext3_get_block, wbc);
+	if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
+		ret = nobh_writepage(page, ext4_get_block, wbc);
 	else
-		ret = block_write_full_page(page, ext3_get_block, wbc);
+		ret = block_write_full_page(page, ext4_get_block, wbc);
 
-	err = ext3_journal_stop(handle);
+	err = ext4_journal_stop(handle);
 	if (!ret)
 		ret = err;
 	return ret;
@@ -1514,7 +1514,7 @@ out_fail:
 	return ret;
 }
 
-static int ext3_journalled_writepage(struct page *page,
+static int ext4_journalled_writepage(struct page *page,
 				struct writeback_control *wbc)
 {
 	struct inode *inode = page->mapping->host;
@@ -1522,10 +1522,10 @@ static int ext3_journalled_writepage(struct page *page,
 	int ret = 0;
 	int err;
 
-	if (ext3_journal_current_handle())
+	if (ext4_journal_current_handle())
 		goto no_write;
 
-	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
+	handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
 		goto no_write;
@@ -1538,9 +1538,9 @@ static int ext3_journalled_writepage(struct page *page,
 		 */
 		ClearPageChecked(page);
 		ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
-					ext3_get_block);
+					ext4_get_block);
 		if (ret != 0) {
-			ext3_journal_stop(handle);
+			ext4_journal_stop(handle);
 			goto out_unlock;
 		}
 		ret = walk_page_buffers(handle, page_buffers(page), 0,
@@ -1550,7 +1550,7 @@ static int ext3_journalled_writepage(struct page *page,
 				PAGE_CACHE_SIZE, NULL, commit_write_fn);
 		if (ret == 0)
 			ret = err;
-		EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
+		EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
 		unlock_page(page);
 	} else {
 		/*
@@ -1558,9 +1558,9 @@ static int ext3_journalled_writepage(struct page *page,
 		 * really know unless we go poke around in the buffer_heads.
 		 * But block_write_full_page will do the right thing.
 		 */
-		ret = block_write_full_page(page, ext3_get_block, wbc);
+		ret = block_write_full_page(page, ext4_get_block, wbc);
 	}
-	err = ext3_journal_stop(handle);
+	err = ext4_journal_stop(handle);
 	if (!ret)
 		ret = err;
 out:
@@ -1573,21 +1573,21 @@ out_unlock:
 	goto out;
 }
 
-static int ext3_readpage(struct file *file, struct page *page)
+static int ext4_readpage(struct file *file, struct page *page)
 {
-	return mpage_readpage(page, ext3_get_block);
+	return mpage_readpage(page, ext4_get_block);
 }
 
 static int
-ext3_readpages(struct file *file, struct address_space *mapping,
+ext4_readpages(struct file *file, struct address_space *mapping,
 		struct list_head *pages, unsigned nr_pages)
 {
-	return mpage_readpages(mapping, pages, nr_pages, ext3_get_block);
+	return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
 }
 
-static void ext3_invalidatepage(struct page *page, unsigned long offset)
+static void ext4_invalidatepage(struct page *page, unsigned long offset)
 {
-	journal_t *journal = EXT3_JOURNAL(page->mapping->host);
+	journal_t *journal = EXT4_JOURNAL(page->mapping->host);
 
 	/*
 	 * If it's a full truncate we just forget about the pending dirtying
@@ -1598,9 +1598,9 @@ static void ext3_invalidatepage(struct page *page, unsigned long offset)
 	journal_invalidatepage(journal, page, offset);
 }
 
-static int ext3_releasepage(struct page *page, gfp_t wait)
+static int ext4_releasepage(struct page *page, gfp_t wait)
 {
-	journal_t *journal = EXT3_JOURNAL(page->mapping->host);
+	journal_t *journal = EXT4_JOURNAL(page->mapping->host);
 
 	WARN_ON(PageChecked(page));
 	if (!page_has_buffers(page))
@@ -1616,13 +1616,13 @@ static int ext3_releasepage(struct page *page, gfp_t wait)
  * If the O_DIRECT write is intantiating holes inside i_size and the machine
  * crashes then stale disk data _may_ be exposed inside the file.
  */
-static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
+static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
 			const struct iovec *iov, loff_t offset,
 			unsigned long nr_segs)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
-	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext4_inode_info *ei = EXT4_I(inode);
 	handle_t *handle = NULL;
 	ssize_t ret;
 	int orphan = 0;
@@ -1631,13 +1631,13 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
 	if (rw == WRITE) {
 		loff_t final_size = offset + count;
 
-		handle = ext3_journal_start(inode, DIO_CREDITS);
+		handle = ext4_journal_start(inode, DIO_CREDITS);
 		if (IS_ERR(handle)) {
 			ret = PTR_ERR(handle);
 			goto out;
 		}
 		if (final_size > inode->i_size) {
-			ret = ext3_orphan_add(handle, inode);
+			ret = ext4_orphan_add(handle, inode);
 			if (ret)
 				goto out_stop;
 			orphan = 1;
@@ -1647,10 +1647,10 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
 
 	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
 				 offset, nr_segs,
-				 ext3_get_block, NULL);
+				 ext4_get_block, NULL);
 
 	/*
-	 * Reacquire the handle: ext3_get_block() can restart the transaction
+	 * Reacquire the handle: ext4_get_block() can restart the transaction
 	 */
 	handle = journal_current_handle();
 
@@ -1659,7 +1659,7 @@ out_stop:
 		int err;
 
 		if (orphan && inode->i_nlink)
-			ext3_orphan_del(handle, inode);
+			ext4_orphan_del(handle, inode);
 		if (orphan && ret > 0) {
 			loff_t end = offset + ret;
 			if (end > inode->i_size) {
@@ -1669,13 +1669,13 @@ out_stop:
 				 * We're going to return a positive `ret'
 				 * here due to non-zero-length I/O, so there's
 				 * no way of reporting error returns from
-				 * ext3_mark_inode_dirty() to userspace.  So
+				 * ext4_mark_inode_dirty() to userspace.  So
 				 * ignore it.
 				 */
-				ext3_mark_inode_dirty(handle, inode);
+				ext4_mark_inode_dirty(handle, inode);
 			}
 		}
-		err = ext3_journal_stop(handle);
+		err = ext4_journal_stop(handle);
 		if (ret == 0)
 			ret = err;
 	}
@@ -1684,7 +1684,7 @@ out:
 }
 
 /*
- * Pages can be marked dirty completely asynchronously from ext3's journalling
+ * Pages can be marked dirty completely asynchronously from ext4's journalling
  * activity.  By filemap_sync_pte(), try_to_unmap_one(), etc.  We cannot do
  * much here because ->set_page_dirty is called under VFS locks.  The page is
  * not necessarily locked.
@@ -1696,73 +1696,73 @@ out:
  * So what we do is to mark the page "pending dirty" and next time writepage
  * is called, propagate that into the buffers appropriately.
  */
-static int ext3_journalled_set_page_dirty(struct page *page)
+static int ext4_journalled_set_page_dirty(struct page *page)
 {
 	SetPageChecked(page);
 	return __set_page_dirty_nobuffers(page);
 }
 
-static const struct address_space_operations ext3_ordered_aops = {
-	.readpage	= ext3_readpage,
-	.readpages	= ext3_readpages,
-	.writepage	= ext3_ordered_writepage,
+static const struct address_space_operations ext4_ordered_aops = {
+	.readpage	= ext4_readpage,
+	.readpages	= ext4_readpages,
+	.writepage	= ext4_ordered_writepage,
 	.sync_page	= block_sync_page,
-	.prepare_write	= ext3_prepare_write,
-	.commit_write	= ext3_ordered_commit_write,
-	.bmap		= ext3_bmap,
-	.invalidatepage	= ext3_invalidatepage,
-	.releasepage	= ext3_releasepage,
-	.direct_IO	= ext3_direct_IO,
+	.prepare_write	= ext4_prepare_write,
+	.commit_write	= ext4_ordered_commit_write,
+	.bmap		= ext4_bmap,
+	.invalidatepage	= ext4_invalidatepage,
+	.releasepage	= ext4_releasepage,
+	.direct_IO	= ext4_direct_IO,
 	.migratepage	= buffer_migrate_page,
 };
 
-static const struct address_space_operations ext3_writeback_aops = {
-	.readpage	= ext3_readpage,
-	.readpages	= ext3_readpages,
-	.writepage	= ext3_writeback_writepage,
+static const struct address_space_operations ext4_writeback_aops = {
+	.readpage	= ext4_readpage,
+	.readpages	= ext4_readpages,
+	.writepage	= ext4_writeback_writepage,
 	.sync_page	= block_sync_page,
-	.prepare_write	= ext3_prepare_write,
-	.commit_write	= ext3_writeback_commit_write,
-	.bmap		= ext3_bmap,
-	.invalidatepage	= ext3_invalidatepage,
-	.releasepage	= ext3_releasepage,
-	.direct_IO	= ext3_direct_IO,
+	.prepare_write	= ext4_prepare_write,
+	.commit_write	= ext4_writeback_commit_write,
+	.bmap		= ext4_bmap,
+	.invalidatepage	= ext4_invalidatepage,
+	.releasepage	= ext4_releasepage,
+	.direct_IO	= ext4_direct_IO,
 	.migratepage	= buffer_migrate_page,
 };
 
-static const struct address_space_operations ext3_journalled_aops = {
-	.readpage	= ext3_readpage,
-	.readpages	= ext3_readpages,
-	.writepage	= ext3_journalled_writepage,
+static const struct address_space_operations ext4_journalled_aops = {
+	.readpage	= ext4_readpage,
+	.readpages	= ext4_readpages,
+	.writepage	= ext4_journalled_writepage,
 	.sync_page	= block_sync_page,
-	.prepare_write	= ext3_prepare_write,
-	.commit_write	= ext3_journalled_commit_write,
-	.set_page_dirty	= ext3_journalled_set_page_dirty,
-	.bmap		= ext3_bmap,
-	.invalidatepage	= ext3_invalidatepage,
-	.releasepage	= ext3_releasepage,
+	.prepare_write	= ext4_prepare_write,
+	.commit_write	= ext4_journalled_commit_write,
+	.set_page_dirty	= ext4_journalled_set_page_dirty,
+	.bmap		= ext4_bmap,
+	.invalidatepage	= ext4_invalidatepage,
+	.releasepage	= ext4_releasepage,
 };
 
-void ext3_set_aops(struct inode *inode)
+void ext4_set_aops(struct inode *inode)
 {
-	if (ext3_should_order_data(inode))
-		inode->i_mapping->a_ops = &ext3_ordered_aops;
-	else if (ext3_should_writeback_data(inode))
-		inode->i_mapping->a_ops = &ext3_writeback_aops;
+	if (ext4_should_order_data(inode))
+		inode->i_mapping->a_ops = &ext4_ordered_aops;
+	else if (ext4_should_writeback_data(inode))
+		inode->i_mapping->a_ops = &ext4_writeback_aops;
 	else
-		inode->i_mapping->a_ops = &ext3_journalled_aops;
+		inode->i_mapping->a_ops = &ext4_journalled_aops;
 }
 
 /*
- * ext3_block_truncate_page() zeroes out a mapping from file offset `from'
+ * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
  * up to the end of the block which corresponds to `from'.
  * This required during truncate. We need to physically zero the tail end
  * of that block so it doesn't yield old data if the file is later grown.
  */
-static int ext3_block_truncate_page(handle_t *handle, struct page *page,
+static int ext4_block_truncate_page(handle_t *handle, struct page *page,
 		struct address_space *mapping, loff_t from)
 {
-	ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT;
+	ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
 	unsigned offset = from & (PAGE_CACHE_SIZE-1);
 	unsigned blocksize, iblock, length, pos;
 	struct inode *inode = mapping->host;
@@ -1779,7 +1779,7 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
 	 * read-in the page - otherwise we create buffers to do the IO.
 	 */
 	if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
-	     ext3_should_writeback_data(inode) && PageUptodate(page)) {
+	     ext4_should_writeback_data(inode) && PageUptodate(page)) {
 		kaddr = kmap_atomic(page, KM_USER0);
 		memset(kaddr + offset, 0, length);
 		flush_dcache_page(page);
@@ -1808,7 +1808,7 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
 
 	if (!buffer_mapped(bh)) {
 		BUFFER_TRACE(bh, "unmapped");
-		ext3_get_block(inode, iblock, bh, 0);
+		ext4_get_block(inode, iblock, bh, 0);
 		/* unmapped? It's a hole - nothing to do */
 		if (!buffer_mapped(bh)) {
 			BUFFER_TRACE(bh, "still unmapped");
@@ -1829,9 +1829,9 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
 			goto unlock;
 	}
 
-	if (ext3_should_journal_data(inode)) {
+	if (ext4_should_journal_data(inode)) {
 		BUFFER_TRACE(bh, "get write access");
-		err = ext3_journal_get_write_access(handle, bh);
+		err = ext4_journal_get_write_access(handle, bh);
 		if (err)
 			goto unlock;
 	}
@@ -1844,11 +1844,11 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
 	BUFFER_TRACE(bh, "zeroed end of block");
 
 	err = 0;
-	if (ext3_should_journal_data(inode)) {
-		err = ext3_journal_dirty_metadata(handle, bh);
+	if (ext4_should_journal_data(inode)) {
+		err = ext4_journal_dirty_metadata(handle, bh);
 	} else {
-		if (ext3_should_order_data(inode))
-			err = ext3_journal_dirty_data(handle, bh);
+		if (ext4_should_order_data(inode))
+			err = ext4_journal_dirty_data(handle, bh);
 		mark_buffer_dirty(bh);
 	}
 
@@ -1872,14 +1872,14 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
 }
 
 /**
- *	ext3_find_shared - find the indirect blocks for partial truncation.
+ *	ext4_find_shared - find the indirect blocks for partial truncation.
  *	@inode:	  inode in question
  *	@depth:	  depth of the affected branch
- *	@offsets: offsets of pointers in that branch (see ext3_block_to_path)
+ *	@offsets: offsets of pointers in that branch (see ext4_block_to_path)
  *	@chain:	  place to store the pointers to partial indirect blocks
  *	@top:	  place to the (detached) top of branch
  *
- *	This is a helper function used by ext3_truncate().
+ *	This is a helper function used by ext4_truncate().
  *
  *	When we do truncate() we may have to clean the ends of several
  *	indirect blocks but leave the blocks themselves alive. Block is
@@ -1887,7 +1887,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
  *	from it (and it is on the path to the first completely truncated
  *	data block, indeed).  We have to free the top of that path along
  *	with everything to the right of the path. Since no allocation
- *	past the truncation point is possible until ext3_truncate()
+ *	past the truncation point is possible until ext4_truncate()
  *	finishes, we may safely do the latter, but top of branch may
  *	require special attention - pageout below the truncation point
  *	might try to populate it.
@@ -1906,7 +1906,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
  *		c) free the subtrees growing from the inode past the @chain[0].
  *			(no partially truncated stuff there).  */
 
-static Indirect *ext3_find_shared(struct inode *inode, int depth,
+static Indirect *ext4_find_shared(struct inode *inode, int depth,
 			int offsets[4], Indirect chain[4], __le32 *top)
 {
 	Indirect *partial, *p;
@@ -1916,7 +1916,7 @@ static Indirect *ext3_find_shared(struct inode *inode, int depth,
 	/* Make k index the deepest non-null offest + 1 */
 	for (k = depth; k > 1 && !offsets[k-1]; k--)
 		;
-	partial = ext3_get_branch(inode, k, offsets, chain, &err);
+	partial = ext4_get_branch(inode, k, offsets, chain, &err);
 	/* Writer: pointers */
 	if (!partial)
 		partial = chain + k-1;
@@ -1939,7 +1939,7 @@ static Indirect *ext3_find_shared(struct inode *inode, int depth,
 		p->p--;
 	} else {
 		*top = *p->p;
-		/* Nope, don't do this in ext3.  Must leave the tree intact */
+		/* Nope, don't do this in ext4.  Must leave the tree intact */
 #if 0
 		*p->p = 0;
 #endif
@@ -1962,21 +1962,21 @@ no_top:
  * We release `count' blocks on disk, but (last - first) may be greater
  * than `count' because there can be holes in there.
  */
-static void ext3_clear_blocks(handle_t *handle, struct inode *inode,
-		struct buffer_head *bh, ext3_fsblk_t block_to_free,
+static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
+		struct buffer_head *bh, ext4_fsblk_t block_to_free,
 		unsigned long count, __le32 *first, __le32 *last)
 {
 	__le32 *p;
 	if (try_to_extend_transaction(handle, inode)) {
 		if (bh) {
-			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-			ext3_journal_dirty_metadata(handle, bh);
+			BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
+			ext4_journal_dirty_metadata(handle, bh);
 		}
-		ext3_mark_inode_dirty(handle, inode);
-		ext3_journal_test_restart(handle, inode);
+		ext4_mark_inode_dirty(handle, inode);
+		ext4_journal_test_restart(handle, inode);
 		if (bh) {
 			BUFFER_TRACE(bh, "retaking write access");
-			ext3_journal_get_write_access(handle, bh);
+			ext4_journal_get_write_access(handle, bh);
 		}
 	}
 
@@ -1995,15 +1995,15 @@ static void ext3_clear_blocks(handle_t *handle, struct inode *inode,
 
 			*p = 0;
 			bh = sb_find_get_block(inode->i_sb, nr);
-			ext3_forget(handle, 0, inode, bh, nr);
+			ext4_forget(handle, 0, inode, bh, nr);
 		}
 	}
 
-	ext3_free_blocks(handle, inode, block_to_free, count);
+	ext4_free_blocks(handle, inode, block_to_free, count);
 }
 
 /**
- * ext3_free_data - free a list of data blocks
+ * ext4_free_data - free a list of data blocks
  * @handle:	handle for this transaction
  * @inode:	inode we are dealing with
  * @this_bh:	indirect buffer_head which contains *@first and *@last
@@ -2021,23 +2021,23 @@ static void ext3_clear_blocks(handle_t *handle, struct inode *inode,
  * @this_bh will be %NULL if @first and @last point into the inode's direct
  * block pointers.
  */
-static void ext3_free_data(handle_t *handle, struct inode *inode,
+static void ext4_free_data(handle_t *handle, struct inode *inode,
 			   struct buffer_head *this_bh,
 			   __le32 *first, __le32 *last)
 {
-	ext3_fsblk_t block_to_free = 0;    /* Starting block # of a run */
+	ext4_fsblk_t block_to_free = 0;    /* Starting block # of a run */
 	unsigned long count = 0;	    /* Number of blocks in the run */
 	__le32 *block_to_free_p = NULL;	    /* Pointer into inode/ind
 					       corresponding to
 					       block_to_free */
-	ext3_fsblk_t nr;		    /* Current block # */
+	ext4_fsblk_t nr;		    /* Current block # */
 	__le32 *p;			    /* Pointer into inode/ind
 					       for current block */
 	int err;
 
 	if (this_bh) {				/* For indirect block */
 		BUFFER_TRACE(this_bh, "get_write_access");
-		err = ext3_journal_get_write_access(handle, this_bh);
+		err = ext4_journal_get_write_access(handle, this_bh);
 		/* Important: if we can't update the indirect pointers
 		 * to the blocks, we can't free them. */
 		if (err)
@@ -2055,7 +2055,7 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
 			} else if (nr == block_to_free + count) {
 				count++;
 			} else {
-				ext3_clear_blocks(handle, inode, this_bh,
+				ext4_clear_blocks(handle, inode, this_bh,
 						  block_to_free,
 						  count, block_to_free_p, p);
 				block_to_free = nr;
@@ -2066,17 +2066,17 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
 	}
 
 	if (count > 0)
-		ext3_clear_blocks(handle, inode, this_bh, block_to_free,
+		ext4_clear_blocks(handle, inode, this_bh, block_to_free,
 				  count, block_to_free_p, p);
 
 	if (this_bh) {
-		BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
-		ext3_journal_dirty_metadata(handle, this_bh);
+		BUFFER_TRACE(this_bh, "call ext4_journal_dirty_metadata");
+		ext4_journal_dirty_metadata(handle, this_bh);
 	}
 }
 
 /**
- *	ext3_free_branches - free an array of branches
+ *	ext4_free_branches - free an array of branches
  *	@handle: JBD handle for this transaction
  *	@inode:	inode we are dealing with
  *	@parent_bh: the buffer_head which contains *@first and *@last
@@ -2088,11 +2088,11 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
  *	stored as little-endian 32-bit) and updating @inode->i_blocks
  *	appropriately.
  */
-static void ext3_free_branches(handle_t *handle, struct inode *inode,
+static void ext4_free_branches(handle_t *handle, struct inode *inode,
 			       struct buffer_head *parent_bh,
 			       __le32 *first, __le32 *last, int depth)
 {
-	ext3_fsblk_t nr;
+	ext4_fsblk_t nr;
 	__le32 *p;
 
 	if (is_handle_aborted(handle))
@@ -2100,7 +2100,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
 
 	if (depth--) {
 		struct buffer_head *bh;
-		int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
+		int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
 		p = last;
 		while (--p >= first) {
 			nr = le32_to_cpu(*p);
@@ -2115,7 +2115,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
 			 * (should be rare).
 			 */
 			if (!bh) {
-				ext3_error(inode->i_sb, "ext3_free_branches",
+				ext4_error(inode->i_sb, "ext4_free_branches",
 					   "Read failure, inode=%lu, block="E3FSBLK,
 					   inode->i_ino, nr);
 				continue;
@@ -2123,7 +2123,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
 
 			/* This zaps the entire block.  Bottom up. */
 			BUFFER_TRACE(bh, "free child branches");
-			ext3_free_branches(handle, inode, bh,
+			ext4_free_branches(handle, inode, bh,
 					   (__le32*)bh->b_data,
 					   (__le32*)bh->b_data + addr_per_block,
 					   depth);
@@ -2138,7 +2138,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
 			 * transaction.  But if it's part of the committing
 			 * transaction then journal_forget() will simply
 			 * brelse() it.  That means that if the underlying
-			 * block is reallocated in ext3_get_block(),
+			 * block is reallocated in ext4_get_block(),
 			 * unmap_underlying_metadata() will find this block
 			 * and will try to get rid of it.  damn, damn.
 			 *
@@ -2147,7 +2147,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
 			 * revoke records must be emitted *before* clearing
 			 * this block's bit in the bitmaps.
 			 */
-			ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
+			ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
 
 			/*
 			 * Everything below this this pointer has been
@@ -2168,11 +2168,11 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
 			if (is_handle_aborted(handle))
 				return;
 			if (try_to_extend_transaction(handle, inode)) {
-				ext3_mark_inode_dirty(handle, inode);
-				ext3_journal_test_restart(handle, inode);
+				ext4_mark_inode_dirty(handle, inode);
+				ext4_journal_test_restart(handle, inode);
 			}
 
-			ext3_free_blocks(handle, inode, nr, 1);
+			ext4_free_blocks(handle, inode, nr, 1);
 
 			if (parent_bh) {
 				/*
@@ -2180,12 +2180,12 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
 				 * pointed to by an indirect block: journal it
 				 */
 				BUFFER_TRACE(parent_bh, "get_write_access");
-				if (!ext3_journal_get_write_access(handle,
+				if (!ext4_journal_get_write_access(handle,
 								   parent_bh)){
 					*p = 0;
 					BUFFER_TRACE(parent_bh,
-					"call ext3_journal_dirty_metadata");
-					ext3_journal_dirty_metadata(handle,
+					"call ext4_journal_dirty_metadata");
+					ext4_journal_dirty_metadata(handle,
 								    parent_bh);
 				}
 			}
@@ -2193,15 +2193,15 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
 	} else {
 		/* We have reached the bottom of the tree. */
 		BUFFER_TRACE(parent_bh, "free data blocks");
-		ext3_free_data(handle, inode, parent_bh, first, last);
+		ext4_free_data(handle, inode, parent_bh, first, last);
 	}
 }
 
 /*
- * ext3_truncate()
+ * ext4_truncate()
  *
- * We block out ext3_get_block() block instantiations across the entire
- * transaction, and VFS/VM ensures that ext3_truncate() cannot run
+ * We block out ext4_get_block() block instantiations across the entire
+ * transaction, and VFS/VM ensures that ext4_truncate() cannot run
  * simultaneously on behalf of the same inode.
  *
  * As we work through the truncate and commmit bits of it to the journal there
@@ -2218,19 +2218,19 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
  * truncate against the orphan inode list.
  *
  * The committed inode has the new, desired i_size (which is the same as
- * i_disksize in this case).  After a crash, ext3_orphan_cleanup() will see
+ * i_disksize in this case).  After a crash, ext4_orphan_cleanup() will see
  * that this inode's truncate did not complete and it will again call
- * ext3_truncate() to have another go.  So there will be instantiated blocks
- * to the right of the truncation point in a crashed ext3 filesystem.  But
+ * ext4_truncate() to have another go.  So there will be instantiated blocks
+ * to the right of the truncation point in a crashed ext4 filesystem.  But
  * that's fine - as long as they are linked from the inode, the post-crash
- * ext3_truncate() run will find them and release them.
+ * ext4_truncate() run will find them and release them.
  */
-void ext3_truncate(struct inode *inode)
+void ext4_truncate(struct inode *inode)
 {
 	handle_t *handle;
-	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext4_inode_info *ei = EXT4_I(inode);
 	__le32 *i_data = ei->i_data;
-	int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
+	int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
 	struct address_space *mapping = inode->i_mapping;
 	int offsets[4];
 	Indirect chain[4];
@@ -2244,7 +2244,7 @@ void ext3_truncate(struct inode *inode)
 	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
 	    S_ISLNK(inode->i_mode)))
 		return;
-	if (ext3_inode_is_fast_symlink(inode))
+	if (ext4_inode_is_fast_symlink(inode))
 		return;
 	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
 		return;
@@ -2275,12 +2275,12 @@ void ext3_truncate(struct inode *inode)
 	}
 
 	last_block = (inode->i_size + blocksize-1)
-					>> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
+					>> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
 
 	if (page)
-		ext3_block_truncate_page(handle, page, mapping, inode->i_size);
+		ext4_block_truncate_page(handle, page, mapping, inode->i_size);
 
-	n = ext3_block_to_path(inode, last_block, offsets, NULL);
+	n = ext4_block_to_path(inode, last_block, offsets, NULL);
 	if (n == 0)
 		goto out_stop;	/* error */
 
@@ -2293,7 +2293,7 @@ void ext3_truncate(struct inode *inode)
 	 * Implication: the file must always be in a sane, consistent
 	 * truncatable state while each transaction commits.
 	 */
-	if (ext3_orphan_add(handle, inode))
+	if (ext4_orphan_add(handle, inode))
 		goto out_stop;
 
 	/*
@@ -2301,28 +2301,28 @@ void ext3_truncate(struct inode *inode)
 	 * occurs before the truncate completes, so it is now safe to propagate
 	 * the new, shorter inode size (held for now in i_size) into the
 	 * on-disk inode. We do this via i_disksize, which is the value which
-	 * ext3 *really* writes onto the disk inode.
+	 * ext4 *really* writes onto the disk inode.
 	 */
 	ei->i_disksize = inode->i_size;
 
 	/*
-	 * From here we block out all ext3_get_block() callers who want to
+	 * From here we block out all ext4_get_block() callers who want to
 	 * modify the block allocation tree.
 	 */
 	mutex_lock(&ei->truncate_mutex);
 
 	if (n == 1) {		/* direct blocks */
-		ext3_free_data(handle, inode, NULL, i_data+offsets[0],
-			       i_data + EXT3_NDIR_BLOCKS);
+		ext4_free_data(handle, inode, NULL, i_data+offsets[0],
+			       i_data + EXT4_NDIR_BLOCKS);
 		goto do_indirects;
 	}
 
-	partial = ext3_find_shared(inode, n, offsets, chain, &nr);
+	partial = ext4_find_shared(inode, n, offsets, chain, &nr);
 	/* Kill the top of shared branch (not detached) */
 	if (nr) {
 		if (partial == chain) {
 			/* Shared branch grows from the inode */
-			ext3_free_branches(handle, inode, NULL,
+			ext4_free_branches(handle, inode, NULL,
 					   &nr, &nr+1, (chain+n-1) - partial);
 			*partial->p = 0;
 			/*
@@ -2332,14 +2332,14 @@ void ext3_truncate(struct inode *inode)
 		} else {
 			/* Shared branch grows from an indirect block */
 			BUFFER_TRACE(partial->bh, "get_write_access");
-			ext3_free_branches(handle, inode, partial->bh,
+			ext4_free_branches(handle, inode, partial->bh,
 					partial->p,
 					partial->p+1, (chain+n-1) - partial);
 		}
 	}
 	/* Clear the ends of indirect blocks on the shared branch */
 	while (partial > chain) {
-		ext3_free_branches(handle, inode, partial->bh, partial->p + 1,
+		ext4_free_branches(handle, inode, partial->bh, partial->p + 1,
 				   (__le32*)partial->bh->b_data+addr_per_block,
 				   (chain+n-1) - partial);
 		BUFFER_TRACE(partial->bh, "call brelse");
@@ -2350,32 +2350,32 @@ do_indirects:
 	/* Kill the remaining (whole) subtrees */
 	switch (offsets[0]) {
 	default:
-		nr = i_data[EXT3_IND_BLOCK];
+		nr = i_data[EXT4_IND_BLOCK];
 		if (nr) {
-			ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
-			i_data[EXT3_IND_BLOCK] = 0;
+			ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
+			i_data[EXT4_IND_BLOCK] = 0;
 		}
-	case EXT3_IND_BLOCK:
-		nr = i_data[EXT3_DIND_BLOCK];
+	case EXT4_IND_BLOCK:
+		nr = i_data[EXT4_DIND_BLOCK];
 		if (nr) {
-			ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
-			i_data[EXT3_DIND_BLOCK] = 0;
+			ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
+			i_data[EXT4_DIND_BLOCK] = 0;
 		}
-	case EXT3_DIND_BLOCK:
-		nr = i_data[EXT3_TIND_BLOCK];
+	case EXT4_DIND_BLOCK:
+		nr = i_data[EXT4_TIND_BLOCK];
 		if (nr) {
-			ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
-			i_data[EXT3_TIND_BLOCK] = 0;
+			ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
+			i_data[EXT4_TIND_BLOCK] = 0;
 		}
-	case EXT3_TIND_BLOCK:
+	case EXT4_TIND_BLOCK:
 		;
 	}
 
-	ext3_discard_reservation(inode);
+	ext4_discard_reservation(inode);
 
 	mutex_unlock(&ei->truncate_mutex);
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
-	ext3_mark_inode_dirty(handle, inode);
+	ext4_mark_inode_dirty(handle, inode);
 
 	/*
 	 * In a multi-transaction truncate, we only make the final transaction
@@ -2388,25 +2388,25 @@ out_stop:
 	 * If this was a simple ftruncate(), and the file will remain alive
 	 * then we need to clear up the orphan record which we created above.
 	 * However, if this was a real unlink then we were called by
-	 * ext3_delete_inode(), and we allow that function to clean up the
+	 * ext4_delete_inode(), and we allow that function to clean up the
 	 * orphan info for us.
 	 */
 	if (inode->i_nlink)
-		ext3_orphan_del(handle, inode);
+		ext4_orphan_del(handle, inode);
 
-	ext3_journal_stop(handle);
+	ext4_journal_stop(handle);
 }
 
-static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
-		unsigned long ino, struct ext3_iloc *iloc)
+static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
+		unsigned long ino, struct ext4_iloc *iloc)
 {
 	unsigned long desc, group_desc, block_group;
 	unsigned long offset;
-	ext3_fsblk_t block;
+	ext4_fsblk_t block;
 	struct buffer_head *bh;
-	struct ext3_group_desc * gdp;
+	struct ext4_group_desc * gdp;
 
-	if (!ext3_valid_inum(sb, ino)) {
+	if (!ext4_valid_inum(sb, ino)) {
 		/*
 		 * This error is already checked for in namei.c unless we are
 		 * looking at an NFS filehandle, in which case no error
@@ -2415,54 +2415,54 @@ static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
 		return 0;
 	}
 
-	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
-	if (block_group >= EXT3_SB(sb)->s_groups_count) {
-		ext3_error(sb,"ext3_get_inode_block","group >= groups count");
+	block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
+	if (block_group >= EXT4_SB(sb)->s_groups_count) {
+		ext4_error(sb,"ext4_get_inode_block","group >= groups count");
 		return 0;
 	}
 	smp_rmb();
-	group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb);
-	desc = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1);
-	bh = EXT3_SB(sb)->s_group_desc[group_desc];
+	group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
+	desc = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
+	bh = EXT4_SB(sb)->s_group_desc[group_desc];
 	if (!bh) {
-		ext3_error (sb, "ext3_get_inode_block",
+		ext4_error (sb, "ext4_get_inode_block",
 			    "Descriptor not loaded");
 		return 0;
 	}
 
-	gdp = (struct ext3_group_desc *)bh->b_data;
+	gdp = (struct ext4_group_desc *)bh->b_data;
 	/*
 	 * Figure out the offset within the block group inode table
 	 */
-	offset = ((ino - 1) % EXT3_INODES_PER_GROUP(sb)) *
-		EXT3_INODE_SIZE(sb);
+	offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) *
+		EXT4_INODE_SIZE(sb);
 	block = le32_to_cpu(gdp[desc].bg_inode_table) +
-		(offset >> EXT3_BLOCK_SIZE_BITS(sb));
+		(offset >> EXT4_BLOCK_SIZE_BITS(sb));
 
 	iloc->block_group = block_group;
-	iloc->offset = offset & (EXT3_BLOCK_SIZE(sb) - 1);
+	iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1);
 	return block;
 }
 
 /*
- * ext3_get_inode_loc returns with an extra refcount against the inode's
+ * ext4_get_inode_loc returns with an extra refcount against the inode's
  * underlying buffer_head on success. If 'in_mem' is true, we have all
  * data in memory that is needed to recreate the on-disk version of this
  * inode.
  */
-static int __ext3_get_inode_loc(struct inode *inode,
-				struct ext3_iloc *iloc, int in_mem)
+static int __ext4_get_inode_loc(struct inode *inode,
+				struct ext4_iloc *iloc, int in_mem)
 {
-	ext3_fsblk_t block;
+	ext4_fsblk_t block;
 	struct buffer_head *bh;
 
-	block = ext3_get_inode_block(inode->i_sb, inode->i_ino, iloc);
+	block = ext4_get_inode_block(inode->i_sb, inode->i_ino, iloc);
 	if (!block)
 		return -EIO;
 
 	bh = sb_getblk(inode->i_sb, block);
 	if (!bh) {
-		ext3_error (inode->i_sb, "ext3_get_inode_loc",
+		ext4_error (inode->i_sb, "ext4_get_inode_loc",
 				"unable to read inode block - "
 				"inode=%lu, block="E3FSBLK,
 				 inode->i_ino, block);
@@ -2483,22 +2483,22 @@ static int __ext3_get_inode_loc(struct inode *inode,
 		 */
 		if (in_mem) {
 			struct buffer_head *bitmap_bh;
-			struct ext3_group_desc *desc;
+			struct ext4_group_desc *desc;
 			int inodes_per_buffer;
 			int inode_offset, i;
 			int block_group;
 			int start;
 
 			block_group = (inode->i_ino - 1) /
-					EXT3_INODES_PER_GROUP(inode->i_sb);
+					EXT4_INODES_PER_GROUP(inode->i_sb);
 			inodes_per_buffer = bh->b_size /
-				EXT3_INODE_SIZE(inode->i_sb);
+				EXT4_INODE_SIZE(inode->i_sb);
 			inode_offset = ((inode->i_ino - 1) %
-					EXT3_INODES_PER_GROUP(inode->i_sb));
+					EXT4_INODES_PER_GROUP(inode->i_sb));
 			start = inode_offset & ~(inodes_per_buffer - 1);
 
 			/* Is the inode bitmap in cache? */
-			desc = ext3_get_group_desc(inode->i_sb,
+			desc = ext4_get_group_desc(inode->i_sb,
 						block_group, NULL);
 			if (!desc)
 				goto make_io;
@@ -2520,7 +2520,7 @@ static int __ext3_get_inode_loc(struct inode *inode,
 			for (i = start; i < start + inodes_per_buffer; i++) {
 				if (i == inode_offset)
 					continue;
-				if (ext3_test_bit(i, bitmap_bh->b_data))
+				if (ext4_test_bit(i, bitmap_bh->b_data))
 					break;
 			}
 			brelse(bitmap_bh);
@@ -2544,7 +2544,7 @@ make_io:
 		submit_bh(READ_META, bh);
 		wait_on_buffer(bh);
 		if (!buffer_uptodate(bh)) {
-			ext3_error(inode->i_sb, "ext3_get_inode_loc",
+			ext4_error(inode->i_sb, "ext4_get_inode_loc",
 					"unable to read inode block - "
 					"inode=%lu, block="E3FSBLK,
 					inode->i_ino, block);
@@ -2557,48 +2557,48 @@ has_buffer:
 	return 0;
 }
 
-int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc)
+int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
 {
 	/* We have all inode data except xattrs in memory here. */
-	return __ext3_get_inode_loc(inode, iloc,
-		!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR));
+	return __ext4_get_inode_loc(inode, iloc,
+		!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR));
 }
 
-void ext3_set_inode_flags(struct inode *inode)
+void ext4_set_inode_flags(struct inode *inode)
 {
-	unsigned int flags = EXT3_I(inode)->i_flags;
+	unsigned int flags = EXT4_I(inode)->i_flags;
 
 	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
-	if (flags & EXT3_SYNC_FL)
+	if (flags & EXT4_SYNC_FL)
 		inode->i_flags |= S_SYNC;
-	if (flags & EXT3_APPEND_FL)
+	if (flags & EXT4_APPEND_FL)
 		inode->i_flags |= S_APPEND;
-	if (flags & EXT3_IMMUTABLE_FL)
+	if (flags & EXT4_IMMUTABLE_FL)
 		inode->i_flags |= S_IMMUTABLE;
-	if (flags & EXT3_NOATIME_FL)
+	if (flags & EXT4_NOATIME_FL)
 		inode->i_flags |= S_NOATIME;
-	if (flags & EXT3_DIRSYNC_FL)
+	if (flags & EXT4_DIRSYNC_FL)
 		inode->i_flags |= S_DIRSYNC;
 }
 
-void ext3_read_inode(struct inode * inode)
+void ext4_read_inode(struct inode * inode)
 {
-	struct ext3_iloc iloc;
-	struct ext3_inode *raw_inode;
-	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext4_iloc iloc;
+	struct ext4_inode *raw_inode;
+	struct ext4_inode_info *ei = EXT4_I(inode);
 	struct buffer_head *bh;
 	int block;
 
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-	ei->i_acl = EXT3_ACL_NOT_CACHED;
-	ei->i_default_acl = EXT3_ACL_NOT_CACHED;
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+	ei->i_acl = EXT4_ACL_NOT_CACHED;
+	ei->i_default_acl = EXT4_ACL_NOT_CACHED;
 #endif
 	ei->i_block_alloc_info = NULL;
 
-	if (__ext3_get_inode_loc(inode, &iloc, 0))
+	if (__ext4_get_inode_loc(inode, &iloc, 0))
 		goto bad_inode;
 	bh = iloc.bh;
-	raw_inode = ext3_raw_inode(&iloc);
+	raw_inode = ext4_raw_inode(&iloc);
 	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
 	inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
 	inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
@@ -2623,7 +2623,7 @@ void ext3_read_inode(struct inode * inode)
 	 */
 	if (inode->i_nlink == 0) {
 		if (inode->i_mode == 0 ||
-		    !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) {
+		    !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
 			/* this inode is deleted */
 			brelse (bh);
 			goto bad_inode;
@@ -2635,7 +2635,7 @@ void ext3_read_inode(struct inode * inode)
 	}
 	inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
 	ei->i_flags = le32_to_cpu(raw_inode->i_flags);
-#ifdef EXT3_FRAGMENTS
+#ifdef EXT4_FRAGMENTS
 	ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
 	ei->i_frag_no = raw_inode->i_frag;
 	ei->i_frag_size = raw_inode->i_fsize;
@@ -2654,51 +2654,51 @@ void ext3_read_inode(struct inode * inode)
 	 * NOTE! The in-memory inode i_data array is in little-endian order
 	 * even on big-endian machines: we do NOT byteswap the block numbers!
 	 */
-	for (block = 0; block < EXT3_N_BLOCKS; block++)
+	for (block = 0; block < EXT4_N_BLOCKS; block++)
 		ei->i_data[block] = raw_inode->i_block[block];
 	INIT_LIST_HEAD(&ei->i_orphan);
 
-	if (inode->i_ino >= EXT3_FIRST_INO(inode->i_sb) + 1 &&
-	    EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) {
+	if (inode->i_ino >= EXT4_FIRST_INO(inode->i_sb) + 1 &&
+	    EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
 		/*
 		 * When mke2fs creates big inodes it does not zero out
-		 * the unused bytes above EXT3_GOOD_OLD_INODE_SIZE,
+		 * the unused bytes above EXT4_GOOD_OLD_INODE_SIZE,
 		 * so ignore those first few inodes.
 		 */
 		ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
-		if (EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
-		    EXT3_INODE_SIZE(inode->i_sb))
+		if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
+		    EXT4_INODE_SIZE(inode->i_sb))
 			goto bad_inode;
 		if (ei->i_extra_isize == 0) {
 			/* The extra space is currently unused. Use it. */
-			ei->i_extra_isize = sizeof(struct ext3_inode) -
-					    EXT3_GOOD_OLD_INODE_SIZE;
+			ei->i_extra_isize = sizeof(struct ext4_inode) -
+					    EXT4_GOOD_OLD_INODE_SIZE;
 		} else {
 			__le32 *magic = (void *)raw_inode +
-					EXT3_GOOD_OLD_INODE_SIZE +
+					EXT4_GOOD_OLD_INODE_SIZE +
 					ei->i_extra_isize;
-			if (*magic == cpu_to_le32(EXT3_XATTR_MAGIC))
-				 ei->i_state |= EXT3_STATE_XATTR;
+			if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC))
+				 ei->i_state |= EXT4_STATE_XATTR;
 		}
 	} else
 		ei->i_extra_isize = 0;
 
 	if (S_ISREG(inode->i_mode)) {
-		inode->i_op = &ext3_file_inode_operations;
-		inode->i_fop = &ext3_file_operations;
-		ext3_set_aops(inode);
+		inode->i_op = &ext4_file_inode_operations;
+		inode->i_fop = &ext4_file_operations;
+		ext4_set_aops(inode);
 	} else if (S_ISDIR(inode->i_mode)) {
-		inode->i_op = &ext3_dir_inode_operations;
-		inode->i_fop = &ext3_dir_operations;
+		inode->i_op = &ext4_dir_inode_operations;
+		inode->i_fop = &ext4_dir_operations;
 	} else if (S_ISLNK(inode->i_mode)) {
-		if (ext3_inode_is_fast_symlink(inode))
-			inode->i_op = &ext3_fast_symlink_inode_operations;
+		if (ext4_inode_is_fast_symlink(inode))
+			inode->i_op = &ext4_fast_symlink_inode_operations;
 		else {
-			inode->i_op = &ext3_symlink_inode_operations;
-			ext3_set_aops(inode);
+			inode->i_op = &ext4_symlink_inode_operations;
+			ext4_set_aops(inode);
 		}
 	} else {
-		inode->i_op = &ext3_special_inode_operations;
+		inode->i_op = &ext4_special_inode_operations;
 		if (raw_inode->i_block[0])
 			init_special_inode(inode, inode->i_mode,
 			   old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
@@ -2707,7 +2707,7 @@ void ext3_read_inode(struct inode * inode)
 			   new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
 	}
 	brelse (iloc.bh);
-	ext3_set_inode_flags(inode);
+	ext4_set_inode_flags(inode);
 	return;
 
 bad_inode:
@@ -2722,19 +2722,19 @@ bad_inode:
  *
  * The caller must have write access to iloc->bh.
  */
-static int ext3_do_update_inode(handle_t *handle,
+static int ext4_do_update_inode(handle_t *handle,
 				struct inode *inode,
-				struct ext3_iloc *iloc)
+				struct ext4_iloc *iloc)
 {
-	struct ext3_inode *raw_inode = ext3_raw_inode(iloc);
-	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext4_inode *raw_inode = ext4_raw_inode(iloc);
+	struct ext4_inode_info *ei = EXT4_I(inode);
 	struct buffer_head *bh = iloc->bh;
 	int err = 0, rc, block;
 
 	/* For fields not not tracking in the in-memory inode,
 	 * initialise them to zero for new inodes. */
-	if (ei->i_state & EXT3_STATE_NEW)
-		memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
+	if (ei->i_state & EXT4_STATE_NEW)
+		memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
 
 	raw_inode->i_mode = cpu_to_le16(inode->i_mode);
 	if(!(test_opt(inode->i_sb, NO_UID32))) {
@@ -2769,7 +2769,7 @@ static int ext3_do_update_inode(handle_t *handle,
 	raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
 	raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
 	raw_inode->i_flags = cpu_to_le32(ei->i_flags);
-#ifdef EXT3_FRAGMENTS
+#ifdef EXT4_FRAGMENTS
 	raw_inode->i_faddr = cpu_to_le32(ei->i_faddr);
 	raw_inode->i_frag = ei->i_frag_no;
 	raw_inode->i_fsize = ei->i_frag_size;
@@ -2782,24 +2782,24 @@ static int ext3_do_update_inode(handle_t *handle,
 			cpu_to_le32(ei->i_disksize >> 32);
 		if (ei->i_disksize > 0x7fffffffULL) {
 			struct super_block *sb = inode->i_sb;
-			if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
-					EXT3_FEATURE_RO_COMPAT_LARGE_FILE) ||
-			    EXT3_SB(sb)->s_es->s_rev_level ==
-					cpu_to_le32(EXT3_GOOD_OLD_REV)) {
+			if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
+					EXT4_FEATURE_RO_COMPAT_LARGE_FILE) ||
+			    EXT4_SB(sb)->s_es->s_rev_level ==
+					cpu_to_le32(EXT4_GOOD_OLD_REV)) {
 			       /* If this is the first large file
 				* created, add a flag to the superblock.
 				*/
-				err = ext3_journal_get_write_access(handle,
-						EXT3_SB(sb)->s_sbh);
+				err = ext4_journal_get_write_access(handle,
+						EXT4_SB(sb)->s_sbh);
 				if (err)
 					goto out_brelse;
-				ext3_update_dynamic_rev(sb);
-				EXT3_SET_RO_COMPAT_FEATURE(sb,
-					EXT3_FEATURE_RO_COMPAT_LARGE_FILE);
+				ext4_update_dynamic_rev(sb);
+				EXT4_SET_RO_COMPAT_FEATURE(sb,
+					EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
 				sb->s_dirt = 1;
 				handle->h_sync = 1;
-				err = ext3_journal_dirty_metadata(handle,
-						EXT3_SB(sb)->s_sbh);
+				err = ext4_journal_dirty_metadata(handle,
+						EXT4_SB(sb)->s_sbh);
 			}
 		}
 	}
@@ -2815,26 +2815,26 @@ static int ext3_do_update_inode(handle_t *handle,
 				cpu_to_le32(new_encode_dev(inode->i_rdev));
 			raw_inode->i_block[2] = 0;
 		}
-	} else for (block = 0; block < EXT3_N_BLOCKS; block++)
+	} else for (block = 0; block < EXT4_N_BLOCKS; block++)
 		raw_inode->i_block[block] = ei->i_data[block];
 
 	if (ei->i_extra_isize)
 		raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
 
-	BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-	rc = ext3_journal_dirty_metadata(handle, bh);
+	BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
+	rc = ext4_journal_dirty_metadata(handle, bh);
 	if (!err)
 		err = rc;
-	ei->i_state &= ~EXT3_STATE_NEW;
+	ei->i_state &= ~EXT4_STATE_NEW;
 
 out_brelse:
 	brelse (bh);
-	ext3_std_error(inode->i_sb, err);
+	ext4_std_error(inode->i_sb, err);
 	return err;
 }
 
 /*
- * ext3_write_inode()
+ * ext4_write_inode()
  *
  * We are called from a few places:
  *
@@ -2851,7 +2851,7 @@ out_brelse:
  *
  * In all cases it is actually safe for us to return without doing anything,
  * because the inode has been copied into a raw inode buffer in
- * ext3_mark_inode_dirty().  This is a correctness thing for O_SYNC and for
+ * ext4_mark_inode_dirty().  This is a correctness thing for O_SYNC and for
  * knfsd.
  *
  * Note that we are absolutely dependent upon all inode dirtiers doing the
@@ -2868,12 +2868,12 @@ out_brelse:
  * `stuff()' is running, and the new i_size will be lost.  Plus the inode
  * will no longer be on the superblock's dirty inode list.
  */
-int ext3_write_inode(struct inode *inode, int wait)
+int ext4_write_inode(struct inode *inode, int wait)
 {
 	if (current->flags & PF_MEMALLOC)
 		return 0;
 
-	if (ext3_journal_current_handle()) {
+	if (ext4_journal_current_handle()) {
 		jbd_debug(0, "called recursively, non-PF_MEMALLOC!\n");
 		dump_stack();
 		return -EIO;
@@ -2882,11 +2882,11 @@ int ext3_write_inode(struct inode *inode, int wait)
 	if (!wait)
 		return 0;
 
-	return ext3_force_commit(inode->i_sb);
+	return ext4_force_commit(inode->i_sb);
 }
 
 /*
- * ext3_setattr()
+ * ext4_setattr()
  *
  * Called from notify_change.
  *
@@ -2902,7 +2902,7 @@ int ext3_write_inode(struct inode *inode, int wait)
  *
  * Called with inode->sem down.
  */
-int ext3_setattr(struct dentry *dentry, struct iattr *attr)
+int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = dentry->d_inode;
 	int error, rc = 0;
@@ -2918,15 +2918,15 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 
 		/* (user+group)*(old+new) structure, inode write (sb,
 		 * inode block, ? - but truncate inode update has it) */
-		handle = ext3_journal_start(inode, 2*(EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)+
-					EXT3_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
+		handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+
+					EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
 		if (IS_ERR(handle)) {
 			error = PTR_ERR(handle);
 			goto err_out;
 		}
 		error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
 		if (error) {
-			ext3_journal_stop(handle);
+			ext4_journal_stop(handle);
 			return error;
 		}
 		/* Update corresponding info in inode so that everything is in
@@ -2935,41 +2935,41 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 			inode->i_uid = attr->ia_uid;
 		if (attr->ia_valid & ATTR_GID)
 			inode->i_gid = attr->ia_gid;
-		error = ext3_mark_inode_dirty(handle, inode);
-		ext3_journal_stop(handle);
+		error = ext4_mark_inode_dirty(handle, inode);
+		ext4_journal_stop(handle);
 	}
 
 	if (S_ISREG(inode->i_mode) &&
 	    attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
 		handle_t *handle;
 
-		handle = ext3_journal_start(inode, 3);
+		handle = ext4_journal_start(inode, 3);
 		if (IS_ERR(handle)) {
 			error = PTR_ERR(handle);
 			goto err_out;
 		}
 
-		error = ext3_orphan_add(handle, inode);
-		EXT3_I(inode)->i_disksize = attr->ia_size;
-		rc = ext3_mark_inode_dirty(handle, inode);
+		error = ext4_orphan_add(handle, inode);
+		EXT4_I(inode)->i_disksize = attr->ia_size;
+		rc = ext4_mark_inode_dirty(handle, inode);
 		if (!error)
 			error = rc;
-		ext3_journal_stop(handle);
+		ext4_journal_stop(handle);
 	}
 
 	rc = inode_setattr(inode, attr);
 
-	/* If inode_setattr's call to ext3_truncate failed to get a
+	/* If inode_setattr's call to ext4_truncate failed to get a
 	 * transaction handle at all, we need to clean up the in-core
 	 * orphan list manually. */
 	if (inode->i_nlink)
-		ext3_orphan_del(NULL, inode);
+		ext4_orphan_del(NULL, inode);
 
 	if (!rc && (ia_valid & ATTR_MODE))
-		rc = ext3_acl_chmod(inode);
+		rc = ext4_acl_chmod(inode);
 
 err_out:
-	ext3_std_error(inode->i_sb, error);
+	ext4_std_error(inode->i_sb, error);
 	if (!error)
 		error = rc;
 	return error;
@@ -2988,9 +2988,9 @@ err_out:
  * N+5 group descriptor summary blocks
  * 1 inode block
  * 1 superblock.
- * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quote files
+ * 2 * EXT4_SINGLEDATA_TRANS_BLOCKS for the quote files
  *
- * 3 * (N + 5) + 2 + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
+ * 3 * (N + 5) + 2 + 2 * EXT4_SINGLEDATA_TRANS_BLOCKS
  *
  * With ordered or writeback data it's the same, less the N data blocks.
  *
@@ -3003,13 +3003,13 @@ err_out:
  * block and work out the exact number of indirects which are touched.  Pah.
  */
 
-static int ext3_writepage_trans_blocks(struct inode *inode)
+static int ext4_writepage_trans_blocks(struct inode *inode)
 {
-	int bpp = ext3_journal_blocks_per_page(inode);
-	int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3;
+	int bpp = ext4_journal_blocks_per_page(inode);
+	int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3;
 	int ret;
 
-	if (ext3_should_journal_data(inode))
+	if (ext4_should_journal_data(inode))
 		ret = 3 * (bpp + indirects) + 2;
 	else
 		ret = 2 * (bpp + indirects) + 2;
@@ -3017,26 +3017,26 @@ static int ext3_writepage_trans_blocks(struct inode *inode)
 #ifdef CONFIG_QUOTA
 	/* We know that structure was already allocated during DQUOT_INIT so
 	 * we will be updating only the data blocks + inodes */
-	ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb);
+	ret += 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
 #endif
 
 	return ret;
 }
 
 /*
- * The caller must have previously called ext3_reserve_inode_write().
+ * The caller must have previously called ext4_reserve_inode_write().
  * Give this, we know that the caller already has write access to iloc->bh.
  */
-int ext3_mark_iloc_dirty(handle_t *handle,
-		struct inode *inode, struct ext3_iloc *iloc)
+int ext4_mark_iloc_dirty(handle_t *handle,
+		struct inode *inode, struct ext4_iloc *iloc)
 {
 	int err = 0;
 
 	/* the do_update_inode consumes one bh->b_count */
 	get_bh(iloc->bh);
 
-	/* ext3_do_update_inode() does journal_dirty_metadata */
-	err = ext3_do_update_inode(handle, inode, iloc);
+	/* ext4_do_update_inode() does journal_dirty_metadata */
+	err = ext4_do_update_inode(handle, inode, iloc);
 	put_bh(iloc->bh);
 	return err;
 }
@@ -3047,22 +3047,22 @@ int ext3_mark_iloc_dirty(handle_t *handle,
  */
 
 int
-ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
-			 struct ext3_iloc *iloc)
+ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
+			 struct ext4_iloc *iloc)
 {
 	int err = 0;
 	if (handle) {
-		err = ext3_get_inode_loc(inode, iloc);
+		err = ext4_get_inode_loc(inode, iloc);
 		if (!err) {
 			BUFFER_TRACE(iloc->bh, "get_write_access");
-			err = ext3_journal_get_write_access(handle, iloc->bh);
+			err = ext4_journal_get_write_access(handle, iloc->bh);
 			if (err) {
 				brelse(iloc->bh);
 				iloc->bh = NULL;
 			}
 		}
 	}
-	ext3_std_error(inode->i_sb, err);
+	ext4_std_error(inode->i_sb, err);
 	return err;
 }
 
@@ -3087,20 +3087,20 @@ ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
  * to do a write_super() to free up some memory.  It has the desired
  * effect.
  */
-int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
+int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
 {
-	struct ext3_iloc iloc;
+	struct ext4_iloc iloc;
 	int err;
 
 	might_sleep();
-	err = ext3_reserve_inode_write(handle, inode, &iloc);
+	err = ext4_reserve_inode_write(handle, inode, &iloc);
 	if (!err)
-		err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+		err = ext4_mark_iloc_dirty(handle, inode, &iloc);
 	return err;
 }
 
 /*
- * ext3_dirty_inode() is called from __mark_inode_dirty()
+ * ext4_dirty_inode() is called from __mark_inode_dirty()
  *
  * We're really interested in the case where a file is being extended.
  * i_size has been changed by generic_commit_write() and we thus need
@@ -3113,12 +3113,12 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
  * so would cause a commit on atime updates, which we don't bother doing.
  * We handle synchronous inodes at the highest possible level.
  */
-void ext3_dirty_inode(struct inode *inode)
+void ext4_dirty_inode(struct inode *inode)
 {
-	handle_t *current_handle = ext3_journal_current_handle();
+	handle_t *current_handle = ext4_journal_current_handle();
 	handle_t *handle;
 
-	handle = ext3_journal_start(inode, 2);
+	handle = ext4_journal_start(inode, 2);
 	if (IS_ERR(handle))
 		goto out;
 	if (current_handle &&
@@ -3129,9 +3129,9 @@ void ext3_dirty_inode(struct inode *inode)
 	} else {
 		jbd_debug(5, "marking dirty.  outer handle=%p\n",
 				current_handle);
-		ext3_mark_inode_dirty(handle, inode);
+		ext4_mark_inode_dirty(handle, inode);
 	}
-	ext3_journal_stop(handle);
+	ext4_journal_stop(handle);
 out:
 	return;
 }
@@ -3140,32 +3140,32 @@ out:
 /*
  * Bind an inode's backing buffer_head into this transaction, to prevent
  * it from being flushed to disk early.  Unlike
- * ext3_reserve_inode_write, this leaves behind no bh reference and
+ * ext4_reserve_inode_write, this leaves behind no bh reference and
  * returns no iloc structure, so the caller needs to repeat the iloc
  * lookup to mark the inode dirty later.
  */
-static int ext3_pin_inode(handle_t *handle, struct inode *inode)
+static int ext4_pin_inode(handle_t *handle, struct inode *inode)
 {
-	struct ext3_iloc iloc;
+	struct ext4_iloc iloc;
 
 	int err = 0;
 	if (handle) {
-		err = ext3_get_inode_loc(inode, &iloc);
+		err = ext4_get_inode_loc(inode, &iloc);
 		if (!err) {
 			BUFFER_TRACE(iloc.bh, "get_write_access");
 			err = journal_get_write_access(handle, iloc.bh);
 			if (!err)
-				err = ext3_journal_dirty_metadata(handle,
+				err = ext4_journal_dirty_metadata(handle,
 								  iloc.bh);
 			brelse(iloc.bh);
 		}
 	}
-	ext3_std_error(inode->i_sb, err);
+	ext4_std_error(inode->i_sb, err);
 	return err;
 }
 #endif
 
-int ext3_change_inode_journal_flag(struct inode *inode, int val)
+int ext4_change_inode_journal_flag(struct inode *inode, int val)
 {
 	journal_t *journal;
 	handle_t *handle;
@@ -3181,7 +3181,7 @@ int ext3_change_inode_journal_flag(struct inode *inode, int val)
 	 * nobody is changing anything.
 	 */
 
-	journal = EXT3_JOURNAL(inode);
+	journal = EXT4_JOURNAL(inode);
 	if (is_journal_aborted(journal) || IS_RDONLY(inode))
 		return -EROFS;
 
@@ -3197,23 +3197,23 @@ int ext3_change_inode_journal_flag(struct inode *inode, int val)
 	 */
 
 	if (val)
-		EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL;
+		EXT4_I(inode)->i_flags |= EXT4_JOURNAL_DATA_FL;
 	else
-		EXT3_I(inode)->i_flags &= ~EXT3_JOURNAL_DATA_FL;
-	ext3_set_aops(inode);
+		EXT4_I(inode)->i_flags &= ~EXT4_JOURNAL_DATA_FL;
+	ext4_set_aops(inode);
 
 	journal_unlock_updates(journal);
 
 	/* Finally we can mark the inode as dirty. */
 
-	handle = ext3_journal_start(inode, 1);
+	handle = ext4_journal_start(inode, 1);
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
-	err = ext3_mark_inode_dirty(handle, inode);
+	err = ext4_mark_inode_dirty(handle, inode);
 	handle->h_sync = 1;
-	ext3_journal_stop(handle);
-	ext3_std_error(inode->i_sb, err);
+	ext4_journal_stop(handle);
+	ext4_std_error(inode->i_sb, err);
 
 	return err;
 }
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 12daa6869572..a567af161b06 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -1,5 +1,5 @@
 /*
- * linux/fs/ext3/ioctl.c
+ * linux/fs/ext4/ioctl.c
  *
  * Copyright (C) 1993, 1994, 1995
  * Remy Card (card@masi.ibp.fr)
@@ -10,30 +10,30 @@
 #include <linux/fs.h>
 #include <linux/jbd.h>
 #include <linux/capability.h>
-#include <linux/ext3_fs.h>
-#include <linux/ext3_jbd.h>
+#include <linux/ext4_fs.h>
+#include <linux/ext4_jbd.h>
 #include <linux/time.h>
 #include <linux/compat.h>
 #include <linux/smp_lock.h>
 #include <asm/uaccess.h>
 
-int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
+int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
 		unsigned long arg)
 {
-	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext4_inode_info *ei = EXT4_I(inode);
 	unsigned int flags;
 	unsigned short rsv_window_size;
 
-	ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg);
+	ext4_debug ("cmd = %u, arg = %lu\n", cmd, arg);
 
 	switch (cmd) {
-	case EXT3_IOC_GETFLAGS:
-		flags = ei->i_flags & EXT3_FL_USER_VISIBLE;
+	case EXT4_IOC_GETFLAGS:
+		flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
 		return put_user(flags, (int __user *) arg);
-	case EXT3_IOC_SETFLAGS: {
+	case EXT4_IOC_SETFLAGS: {
 		handle_t *handle = NULL;
 		int err;
-		struct ext3_iloc iloc;
+		struct ext4_iloc iloc;
 		unsigned int oldflags;
 		unsigned int jflag;
 
@@ -47,13 +47,13 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
 			return -EFAULT;
 
 		if (!S_ISDIR(inode->i_mode))
-			flags &= ~EXT3_DIRSYNC_FL;
+			flags &= ~EXT4_DIRSYNC_FL;
 
 		mutex_lock(&inode->i_mutex);
 		oldflags = ei->i_flags;
 
 		/* The JOURNAL_DATA flag is modifiable only by root */
-		jflag = flags & EXT3_JOURNAL_DATA_FL;
+		jflag = flags & EXT4_JOURNAL_DATA_FL;
 
 		/*
 		 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
@@ -61,7 +61,7 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
 		 *
 		 * This test looks nicer. Thanks to Pauline Middelink
 		 */
-		if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) {
+		if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
 			if (!capable(CAP_LINUX_IMMUTABLE)) {
 				mutex_unlock(&inode->i_mutex);
 				return -EPERM;
@@ -72,7 +72,7 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
 		 * The JOURNAL_DATA flag can only be changed by
 		 * the relevant capability.
 		 */
-		if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) {
+		if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
 			if (!capable(CAP_SYS_RESOURCE)) {
 				mutex_unlock(&inode->i_mutex);
 				return -EPERM;
@@ -80,44 +80,44 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
 		}
 
 
-		handle = ext3_journal_start(inode, 1);
+		handle = ext4_journal_start(inode, 1);
 		if (IS_ERR(handle)) {
 			mutex_unlock(&inode->i_mutex);
 			return PTR_ERR(handle);
 		}
 		if (IS_SYNC(inode))
 			handle->h_sync = 1;
-		err = ext3_reserve_inode_write(handle, inode, &iloc);
+		err = ext4_reserve_inode_write(handle, inode, &iloc);
 		if (err)
 			goto flags_err;
 
-		flags = flags & EXT3_FL_USER_MODIFIABLE;
-		flags |= oldflags & ~EXT3_FL_USER_MODIFIABLE;
+		flags = flags & EXT4_FL_USER_MODIFIABLE;
+		flags |= oldflags & ~EXT4_FL_USER_MODIFIABLE;
 		ei->i_flags = flags;
 
-		ext3_set_inode_flags(inode);
+		ext4_set_inode_flags(inode);
 		inode->i_ctime = CURRENT_TIME_SEC;
 
-		err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+		err = ext4_mark_iloc_dirty(handle, inode, &iloc);
 flags_err:
-		ext3_journal_stop(handle);
+		ext4_journal_stop(handle);
 		if (err) {
 			mutex_unlock(&inode->i_mutex);
 			return err;
 		}
 
-		if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL))
-			err = ext3_change_inode_journal_flag(inode, jflag);
+		if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
+			err = ext4_change_inode_journal_flag(inode, jflag);
 		mutex_unlock(&inode->i_mutex);
 		return err;
 	}
-	case EXT3_IOC_GETVERSION:
-	case EXT3_IOC_GETVERSION_OLD:
+	case EXT4_IOC_GETVERSION:
+	case EXT4_IOC_GETVERSION_OLD:
 		return put_user(inode->i_generation, (int __user *) arg);
-	case EXT3_IOC_SETVERSION:
-	case EXT3_IOC_SETVERSION_OLD: {
+	case EXT4_IOC_SETVERSION:
+	case EXT4_IOC_SETVERSION_OLD: {
 		handle_t *handle;
-		struct ext3_iloc iloc;
+		struct ext4_iloc iloc;
 		__u32 generation;
 		int err;
 
@@ -128,20 +128,20 @@ flags_err:
 		if (get_user(generation, (int __user *) arg))
 			return -EFAULT;
 
-		handle = ext3_journal_start(inode, 1);
+		handle = ext4_journal_start(inode, 1);
 		if (IS_ERR(handle))
 			return PTR_ERR(handle);
-		err = ext3_reserve_inode_write(handle, inode, &iloc);
+		err = ext4_reserve_inode_write(handle, inode, &iloc);
 		if (err == 0) {
 			inode->i_ctime = CURRENT_TIME_SEC;
 			inode->i_generation = generation;
-			err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+			err = ext4_mark_iloc_dirty(handle, inode, &iloc);
 		}
-		ext3_journal_stop(handle);
+		ext4_journal_stop(handle);
 		return err;
 	}
 #ifdef CONFIG_JBD_DEBUG
-	case EXT3_IOC_WAIT_FOR_READONLY:
+	case EXT4_IOC_WAIT_FOR_READONLY:
 		/*
 		 * This is racy - by the time we're woken up and running,
 		 * the superblock could be released.  And the module could
@@ -155,16 +155,16 @@ flags_err:
 			int ret = 0;
 
 			set_current_state(TASK_INTERRUPTIBLE);
-			add_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
-			if (timer_pending(&EXT3_SB(sb)->turn_ro_timer)) {
+			add_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait);
+			if (timer_pending(&EXT4_SB(sb)->turn_ro_timer)) {
 				schedule();
 				ret = 1;
 			}
-			remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
+			remove_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait);
 			return ret;
 		}
 #endif
-	case EXT3_IOC_GETRSVSZ:
+	case EXT4_IOC_GETRSVSZ:
 		if (test_opt(inode->i_sb, RESERVATION)
 			&& S_ISREG(inode->i_mode)
 			&& ei->i_block_alloc_info) {
@@ -172,7 +172,7 @@ flags_err:
 			return put_user(rsv_window_size, (int __user *)arg);
 		}
 		return -ENOTTY;
-	case EXT3_IOC_SETRSVSZ: {
+	case EXT4_IOC_SETRSVSZ: {
 
 		if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
 			return -ENOTTY;
@@ -186,8 +186,8 @@ flags_err:
 		if (get_user(rsv_window_size, (int __user *)arg))
 			return -EFAULT;
 
-		if (rsv_window_size > EXT3_MAX_RESERVE_BLOCKS)
-			rsv_window_size = EXT3_MAX_RESERVE_BLOCKS;
+		if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS)
+			rsv_window_size = EXT4_MAX_RESERVE_BLOCKS;
 
 		/*
 		 * need to allocate reservation structure for this inode
@@ -195,17 +195,17 @@ flags_err:
 		 */
 		mutex_lock(&ei->truncate_mutex);
 		if (!ei->i_block_alloc_info)
-			ext3_init_block_alloc_info(inode);
+			ext4_init_block_alloc_info(inode);
 
 		if (ei->i_block_alloc_info){
-			struct ext3_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node;
+			struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node;
 			rsv->rsv_goal_size = rsv_window_size;
 		}
 		mutex_unlock(&ei->truncate_mutex);
 		return 0;
 	}
-	case EXT3_IOC_GROUP_EXTEND: {
-		ext3_fsblk_t n_blocks_count;
+	case EXT4_IOC_GROUP_EXTEND: {
+		ext4_fsblk_t n_blocks_count;
 		struct super_block *sb = inode->i_sb;
 		int err;
 
@@ -218,15 +218,15 @@ flags_err:
 		if (get_user(n_blocks_count, (__u32 __user *)arg))
 			return -EFAULT;
 
-		err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count);
-		journal_lock_updates(EXT3_SB(sb)->s_journal);
-		journal_flush(EXT3_SB(sb)->s_journal);
-		journal_unlock_updates(EXT3_SB(sb)->s_journal);
+		err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
+		journal_lock_updates(EXT4_SB(sb)->s_journal);
+		journal_flush(EXT4_SB(sb)->s_journal);
+		journal_unlock_updates(EXT4_SB(sb)->s_journal);
 
 		return err;
 	}
-	case EXT3_IOC_GROUP_ADD: {
-		struct ext3_new_group_data input;
+	case EXT4_IOC_GROUP_ADD: {
+		struct ext4_new_group_data input;
 		struct super_block *sb = inode->i_sb;
 		int err;
 
@@ -236,14 +236,14 @@ flags_err:
 		if (IS_RDONLY(inode))
 			return -EROFS;
 
-		if (copy_from_user(&input, (struct ext3_new_group_input __user *)arg,
+		if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg,
 				sizeof(input)))
 			return -EFAULT;
 
-		err = ext3_group_add(sb, &input);
-		journal_lock_updates(EXT3_SB(sb)->s_journal);
-		journal_flush(EXT3_SB(sb)->s_journal);
-		journal_unlock_updates(EXT3_SB(sb)->s_journal);
+		err = ext4_group_add(sb, &input);
+		journal_lock_updates(EXT4_SB(sb)->s_journal);
+		journal_flush(EXT4_SB(sb)->s_journal);
+		journal_unlock_updates(EXT4_SB(sb)->s_journal);
 
 		return err;
 	}
@@ -255,52 +255,52 @@ flags_err:
 }
 
 #ifdef CONFIG_COMPAT
-long ext3_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct inode *inode = file->f_dentry->d_inode;
 	int ret;
 
 	/* These are just misnamed, they actually get/put from/to user an int */
 	switch (cmd) {
-	case EXT3_IOC32_GETFLAGS:
-		cmd = EXT3_IOC_GETFLAGS;
+	case EXT4_IOC32_GETFLAGS:
+		cmd = EXT4_IOC_GETFLAGS;
 		break;
-	case EXT3_IOC32_SETFLAGS:
-		cmd = EXT3_IOC_SETFLAGS;
+	case EXT4_IOC32_SETFLAGS:
+		cmd = EXT4_IOC_SETFLAGS;
 		break;
-	case EXT3_IOC32_GETVERSION:
-		cmd = EXT3_IOC_GETVERSION;
+	case EXT4_IOC32_GETVERSION:
+		cmd = EXT4_IOC_GETVERSION;
 		break;
-	case EXT3_IOC32_SETVERSION:
-		cmd = EXT3_IOC_SETVERSION;
+	case EXT4_IOC32_SETVERSION:
+		cmd = EXT4_IOC_SETVERSION;
 		break;
-	case EXT3_IOC32_GROUP_EXTEND:
-		cmd = EXT3_IOC_GROUP_EXTEND;
+	case EXT4_IOC32_GROUP_EXTEND:
+		cmd = EXT4_IOC_GROUP_EXTEND;
 		break;
-	case EXT3_IOC32_GETVERSION_OLD:
-		cmd = EXT3_IOC_GETVERSION_OLD;
+	case EXT4_IOC32_GETVERSION_OLD:
+		cmd = EXT4_IOC_GETVERSION_OLD;
 		break;
-	case EXT3_IOC32_SETVERSION_OLD:
-		cmd = EXT3_IOC_SETVERSION_OLD;
+	case EXT4_IOC32_SETVERSION_OLD:
+		cmd = EXT4_IOC_SETVERSION_OLD;
 		break;
 #ifdef CONFIG_JBD_DEBUG
-	case EXT3_IOC32_WAIT_FOR_READONLY:
-		cmd = EXT3_IOC_WAIT_FOR_READONLY;
+	case EXT4_IOC32_WAIT_FOR_READONLY:
+		cmd = EXT4_IOC_WAIT_FOR_READONLY;
 		break;
 #endif
-	case EXT3_IOC32_GETRSVSZ:
-		cmd = EXT3_IOC_GETRSVSZ;
+	case EXT4_IOC32_GETRSVSZ:
+		cmd = EXT4_IOC_GETRSVSZ;
 		break;
-	case EXT3_IOC32_SETRSVSZ:
-		cmd = EXT3_IOC_SETRSVSZ;
+	case EXT4_IOC32_SETRSVSZ:
+		cmd = EXT4_IOC_SETRSVSZ;
 		break;
-	case EXT3_IOC_GROUP_ADD:
+	case EXT4_IOC_GROUP_ADD:
 		break;
 	default:
 		return -ENOIOCTLCMD;
 	}
 	lock_kernel();
-	ret = ext3_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
+	ret = ext4_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
 	unlock_kernel();
 	return ret;
 }
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 906731a20f1a..956b38113f62 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/ext3/namei.c
+ *  linux/fs/ext4/namei.c
  *
  * Copyright (C) 1992, 1993, 1994, 1995
  * Remy Card (card@masi.ibp.fr)
@@ -28,8 +28,8 @@
 #include <linux/pagemap.h>
 #include <linux/jbd.h>
 #include <linux/time.h>
-#include <linux/ext3_fs.h>
-#include <linux/ext3_jbd.h>
+#include <linux/ext4_fs.h>
+#include <linux/ext4_jbd.h>
 #include <linux/fcntl.h>
 #include <linux/stat.h>
 #include <linux/string.h>
@@ -50,7 +50,7 @@
 #define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
 #define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
 
-static struct buffer_head *ext3_append(handle_t *handle,
+static struct buffer_head *ext4_append(handle_t *handle,
 					struct inode *inode,
 					u32 *block, int *err)
 {
@@ -58,10 +58,10 @@ static struct buffer_head *ext3_append(handle_t *handle,
 
 	*block = inode->i_size >> inode->i_sb->s_blocksize_bits;
 
-	if ((bh = ext3_bread(handle, inode, *block, 1, err))) {
+	if ((bh = ext4_bread(handle, inode, *block, 1, err))) {
 		inode->i_size += inode->i_sb->s_blocksize;
-		EXT3_I(inode)->i_disksize = inode->i_size;
-		ext3_journal_get_write_access(handle,bh);
+		EXT4_I(inode)->i_disksize = inode->i_size;
+		ext4_journal_get_write_access(handle,bh);
 	}
 	return bh;
 }
@@ -144,7 +144,7 @@ struct dx_map_entry
 	u32 offs;
 };
 
-#ifdef CONFIG_EXT3_INDEX
+#ifdef CONFIG_EXT4_INDEX
 static inline unsigned dx_get_block (struct dx_entry *entry);
 static void dx_set_block (struct dx_entry *entry, unsigned value);
 static inline unsigned dx_get_hash (struct dx_entry *entry);
@@ -161,20 +161,20 @@ static struct dx_frame *dx_probe(struct dentry *dentry,
 				 struct dx_frame *frame,
 				 int *err);
 static void dx_release (struct dx_frame *frames);
-static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
+static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
 			struct dx_hash_info *hinfo, struct dx_map_entry map[]);
 static void dx_sort_map(struct dx_map_entry *map, unsigned count);
-static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
+static struct ext4_dir_entry_2 *dx_move_dirents (char *from, char *to,
 		struct dx_map_entry *offsets, int count);
-static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
+static struct ext4_dir_entry_2* dx_pack_dirents (char *base, int size);
 static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
-static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+static int ext4_htree_next_block(struct inode *dir, __u32 hash,
 				 struct dx_frame *frame,
 				 struct dx_frame *frames,
 				 __u32 *start_hash);
-static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
-		       struct ext3_dir_entry_2 **res_dir, int *err);
-static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
+		       struct ext4_dir_entry_2 **res_dir, int *err);
+static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
 			     struct inode *inode);
 
 /*
@@ -224,14 +224,14 @@ static inline void dx_set_limit (struct dx_entry *entries, unsigned value)
 
 static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
 {
-	unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
-		EXT3_DIR_REC_LEN(2) - infosize;
+	unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
+		EXT4_DIR_REC_LEN(2) - infosize;
 	return 0? 20: entry_space / sizeof(struct dx_entry);
 }
 
 static inline unsigned dx_node_limit (struct inode *dir)
 {
-	unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
+	unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
 	return 0? 22: entry_space / sizeof(struct dx_entry);
 }
 
@@ -257,7 +257,7 @@ struct stats
 	unsigned bcount;
 };
 
-static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de,
+static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_entry_2 *de,
 				 int size, int show_names)
 {
 	unsigned names = 0, space = 0;
@@ -274,14 +274,14 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
 				int len = de->name_len;
 				char *name = de->name;
 				while (len--) printk("%c", *name++);
-				ext3fs_dirhash(de->name, de->name_len, &h);
+				ext4fs_dirhash(de->name, de->name_len, &h);
 				printk(":%x.%u ", h.hash,
 				       ((char *) de - base));
 			}
-			space += EXT3_DIR_REC_LEN(de->name_len);
+			space += EXT4_DIR_REC_LEN(de->name_len);
 			names++;
 		}
-		de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
+		de = (struct ext4_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
 	}
 	printk("(%i)\n", names);
 	return (struct stats) { names, space, 1 };
@@ -302,10 +302,10 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
 		u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
 		struct stats stats;
 		printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
-		if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue;
+		if (!(bh = ext4_bread (NULL,dir, block, 0,&err))) continue;
 		stats = levels?
 		   dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
-		   dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0);
+		   dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0);
 		names += stats.names;
 		space += stats.space;
 		bcount += stats.bcount;
@@ -341,13 +341,13 @@ dx_probe(struct dentry *dentry, struct inode *dir,
 	frame->bh = NULL;
 	if (dentry)
 		dir = dentry->d_parent->d_inode;
-	if (!(bh = ext3_bread (NULL,dir, 0, 0, err)))
+	if (!(bh = ext4_bread (NULL,dir, 0, 0, err)))
 		goto fail;
 	root = (struct dx_root *) bh->b_data;
 	if (root->info.hash_version != DX_HASH_TEA &&
 	    root->info.hash_version != DX_HASH_HALF_MD4 &&
 	    root->info.hash_version != DX_HASH_LEGACY) {
-		ext3_warning(dir->i_sb, __FUNCTION__,
+		ext4_warning(dir->i_sb, __FUNCTION__,
 			     "Unrecognised inode hash code %d",
 			     root->info.hash_version);
 		brelse(bh);
@@ -355,13 +355,13 @@ dx_probe(struct dentry *dentry, struct inode *dir,
 		goto fail;
 	}
 	hinfo->hash_version = root->info.hash_version;
-	hinfo->seed = EXT3_SB(dir->i_sb)->s_hash_seed;
+	hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
 	if (dentry)
-		ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo);
+		ext4fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo);
 	hash = hinfo->hash;
 
 	if (root->info.unused_flags & 1) {
-		ext3_warning(dir->i_sb, __FUNCTION__,
+		ext4_warning(dir->i_sb, __FUNCTION__,
 			     "Unimplemented inode hash flags: %#06x",
 			     root->info.unused_flags);
 		brelse(bh);
@@ -370,7 +370,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
 	}
 
 	if ((indirect = root->info.indirect_levels) > 1) {
-		ext3_warning(dir->i_sb, __FUNCTION__,
+		ext4_warning(dir->i_sb, __FUNCTION__,
 			     "Unimplemented inode hash depth: %#06x",
 			     root->info.indirect_levels);
 		brelse(bh);
@@ -421,7 +421,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
 		frame->entries = entries;
 		frame->at = at;
 		if (!indirect--) return frame;
-		if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
+		if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err)))
 			goto fail2;
 		at = entries = ((struct dx_node *) bh->b_data)->entries;
 		assert (dx_get_limit(entries) == dx_node_limit (dir));
@@ -463,7 +463,7 @@ static void dx_release (struct dx_frame *frames)
  * If start_hash is non-null, it will be filled in with the starting
  * hash of the next page.
  */
-static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+static int ext4_htree_next_block(struct inode *dir, __u32 hash,
 				 struct dx_frame *frame,
 				 struct dx_frame *frames,
 				 __u32 *start_hash)
@@ -509,7 +509,7 @@ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
 	 * block so no check is necessary
 	 */
 	while (num_frames--) {
-		if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at),
+		if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at),
 				      0, &err)))
 			return err; /* Failure */
 		p++;
@@ -524,9 +524,9 @@ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
 /*
  * p is at least 6 bytes before the end of page
  */
-static inline struct ext3_dir_entry_2 *ext3_next_entry(struct ext3_dir_entry_2 *p)
+static inline struct ext4_dir_entry_2 *ext4_next_entry(struct ext4_dir_entry_2 *p)
 {
-	return (struct ext3_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len));
+	return (struct ext4_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len));
 }
 
 /*
@@ -540,26 +540,26 @@ static int htree_dirblock_to_tree(struct file *dir_file,
 				  __u32 start_hash, __u32 start_minor_hash)
 {
 	struct buffer_head *bh;
-	struct ext3_dir_entry_2 *de, *top;
+	struct ext4_dir_entry_2 *de, *top;
 	int err, count = 0;
 
 	dxtrace(printk("In htree dirblock_to_tree: block %d\n", block));
-	if (!(bh = ext3_bread (NULL, dir, block, 0, &err)))
+	if (!(bh = ext4_bread (NULL, dir, block, 0, &err)))
 		return err;
 
-	de = (struct ext3_dir_entry_2 *) bh->b_data;
-	top = (struct ext3_dir_entry_2 *) ((char *) de +
+	de = (struct ext4_dir_entry_2 *) bh->b_data;
+	top = (struct ext4_dir_entry_2 *) ((char *) de +
 					   dir->i_sb->s_blocksize -
-					   EXT3_DIR_REC_LEN(0));
-	for (; de < top; de = ext3_next_entry(de)) {
-		ext3fs_dirhash(de->name, de->name_len, hinfo);
+					   EXT4_DIR_REC_LEN(0));
+	for (; de < top; de = ext4_next_entry(de)) {
+		ext4fs_dirhash(de->name, de->name_len, hinfo);
 		if ((hinfo->hash < start_hash) ||
 		    ((hinfo->hash == start_hash) &&
 		     (hinfo->minor_hash < start_minor_hash)))
 			continue;
 		if (de->inode == 0)
 			continue;
-		if ((err = ext3_htree_store_dirent(dir_file,
+		if ((err = ext4_htree_store_dirent(dir_file,
 				   hinfo->hash, hinfo->minor_hash, de)) != 0) {
 			brelse(bh);
 			return err;
@@ -579,11 +579,11 @@ static int htree_dirblock_to_tree(struct file *dir_file,
  * This function returns the number of entries inserted into the tree,
  * or a negative error code.
  */
-int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
+int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 			 __u32 start_minor_hash, __u32 *next_hash)
 {
 	struct dx_hash_info hinfo;
-	struct ext3_dir_entry_2 *de;
+	struct ext4_dir_entry_2 *de;
 	struct dx_frame frames[2], *frame;
 	struct inode *dir;
 	int block, err;
@@ -594,9 +594,9 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 	dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
 		       start_minor_hash));
 	dir = dir_file->f_dentry->d_inode;
-	if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) {
-		hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
-		hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
+	if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) {
+		hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
+		hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
 		count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
 					       start_hash, start_minor_hash);
 		*next_hash = ~0;
@@ -610,15 +610,15 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 
 	/* Add '.' and '..' from the htree header */
 	if (!start_hash && !start_minor_hash) {
-		de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
-		if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0)
+		de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
+		if ((err = ext4_htree_store_dirent(dir_file, 0, 0, de)) != 0)
 			goto errout;
 		count++;
 	}
 	if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
-		de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
-		de = ext3_next_entry(de);
-		if ((err = ext3_htree_store_dirent(dir_file, 2, 0, de)) != 0)
+		de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
+		de = ext4_next_entry(de);
+		if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0)
 			goto errout;
 		count++;
 	}
@@ -633,7 +633,7 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 		}
 		count += ret;
 		hashval = ~0;
-		ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS,
+		ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS,
 					    frame, frames, &hashval);
 		*next_hash = hashval;
 		if (ret < 0) {
@@ -663,7 +663,7 @@ errout:
  * Directory block splitting, compacting
  */
 
-static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
+static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
 			struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
 {
 	int count = 0;
@@ -673,7 +673,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
 	while ((char *) de < base + size)
 	{
 		if (de->name_len && de->inode) {
-			ext3fs_dirhash(de->name, de->name_len, &h);
+			ext4fs_dirhash(de->name, de->name_len, &h);
 			map_tail--;
 			map_tail->hash = h.hash;
 			map_tail->offs = (u32) ((char *) de - base);
@@ -681,7 +681,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
 			cond_resched();
 		}
 		/* XXX: do we need to check rec_len == 0 case? -Chris */
-		de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
+		de = (struct ext4_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
 	}
 	return count;
 }
@@ -730,21 +730,21 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
 #endif
 
 
-static void ext3_update_dx_flag(struct inode *inode)
+static void ext4_update_dx_flag(struct inode *inode)
 {
-	if (!EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
-				     EXT3_FEATURE_COMPAT_DIR_INDEX))
-		EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL;
+	if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
+				     EXT4_FEATURE_COMPAT_DIR_INDEX))
+		EXT4_I(inode)->i_flags &= ~EXT4_INDEX_FL;
 }
 
 /*
- * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure.
+ * NOTE! unlike strncmp, ext4_match returns 1 for success, 0 for failure.
  *
- * `len <= EXT3_NAME_LEN' is guaranteed by caller.
+ * `len <= EXT4_NAME_LEN' is guaranteed by caller.
  * `de != NULL' is guaranteed by caller.
  */
-static inline int ext3_match (int len, const char * const name,
-			      struct ext3_dir_entry_2 * de)
+static inline int ext4_match (int len, const char * const name,
+			      struct ext4_dir_entry_2 * de)
 {
 	if (len != de->name_len)
 		return 0;
@@ -760,24 +760,24 @@ static inline int search_dirblock(struct buffer_head * bh,
 				  struct inode *dir,
 				  struct dentry *dentry,
 				  unsigned long offset,
-				  struct ext3_dir_entry_2 ** res_dir)
+				  struct ext4_dir_entry_2 ** res_dir)
 {
-	struct ext3_dir_entry_2 * de;
+	struct ext4_dir_entry_2 * de;
 	char * dlimit;
 	int de_len;
 	const char *name = dentry->d_name.name;
 	int namelen = dentry->d_name.len;
 
-	de = (struct ext3_dir_entry_2 *) bh->b_data;
+	de = (struct ext4_dir_entry_2 *) bh->b_data;
 	dlimit = bh->b_data + dir->i_sb->s_blocksize;
 	while ((char *) de < dlimit) {
 		/* this code is executed quadratically often */
 		/* do minimal checking `by hand' */
 
 		if ((char *) de + namelen <= dlimit &&
-		    ext3_match (namelen, name, de)) {
+		    ext4_match (namelen, name, de)) {
 			/* found a match - just to be sure, do a full check */
-			if (!ext3_check_dir_entry("ext3_find_entry",
+			if (!ext4_check_dir_entry("ext4_find_entry",
 						  dir, de, bh, offset))
 				return -1;
 			*res_dir = de;
@@ -788,14 +788,14 @@ static inline int search_dirblock(struct buffer_head * bh,
 		if (de_len <= 0)
 			return -1;
 		offset += de_len;
-		de = (struct ext3_dir_entry_2 *) ((char *) de + de_len);
+		de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
 	}
 	return 0;
 }
 
 
 /*
- *	ext3_find_entry()
+ *	ext4_find_entry()
  *
  * finds an entry in the specified directory with the wanted name. It
  * returns the cache buffer in which the entry was found, and the entry
@@ -805,8 +805,8 @@ static inline int search_dirblock(struct buffer_head * bh,
  * The returned buffer_head has ->b_count elevated.  The caller is expected
  * to brelse() it when appropriate.
  */
-static struct buffer_head * ext3_find_entry (struct dentry *dentry,
-					struct ext3_dir_entry_2 ** res_dir)
+static struct buffer_head * ext4_find_entry (struct dentry *dentry,
+					struct ext4_dir_entry_2 ** res_dir)
 {
 	struct super_block * sb;
 	struct buffer_head * bh_use[NAMEI_RA_SIZE];
@@ -828,11 +828,11 @@ static struct buffer_head * ext3_find_entry (struct dentry *dentry,
 	blocksize = sb->s_blocksize;
 	namelen = dentry->d_name.len;
 	name = dentry->d_name.name;
-	if (namelen > EXT3_NAME_LEN)
+	if (namelen > EXT4_NAME_LEN)
 		return NULL;
-#ifdef CONFIG_EXT3_INDEX
+#ifdef CONFIG_EXT4_INDEX
 	if (is_dx(dir)) {
-		bh = ext3_dx_find_entry(dentry, res_dir, &err);
+		bh = ext4_dx_find_entry(dentry, res_dir, &err);
 		/*
 		 * On success, or if the error was file not found,
 		 * return.  Otherwise, fall back to doing a search the
@@ -840,11 +840,11 @@ static struct buffer_head * ext3_find_entry (struct dentry *dentry,
 		 */
 		if (bh || (err != ERR_BAD_DX_DIR))
 			return bh;
-		dxtrace(printk("ext3_find_entry: dx failed, falling back\n"));
+		dxtrace(printk("ext4_find_entry: dx failed, falling back\n"));
 	}
 #endif
-	nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
-	start = EXT3_I(dir)->i_dir_start_lookup;
+	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
+	start = EXT4_I(dir)->i_dir_start_lookup;
 	if (start >= nblocks)
 		start = 0;
 	block = start;
@@ -868,7 +868,7 @@ restart:
 					break;
 				}
 				num++;
-				bh = ext3_getblk(NULL, dir, b++, 0, &err);
+				bh = ext4_getblk(NULL, dir, b++, 0, &err);
 				bh_use[ra_max] = bh;
 				if (bh)
 					ll_rw_block(READ_META, 1, &bh);
@@ -879,15 +879,15 @@ restart:
 		wait_on_buffer(bh);
 		if (!buffer_uptodate(bh)) {
 			/* read error, skip block & hope for the best */
-			ext3_error(sb, __FUNCTION__, "reading directory #%lu "
+			ext4_error(sb, __FUNCTION__, "reading directory #%lu "
 				   "offset %lu", dir->i_ino, block);
 			brelse(bh);
 			goto next;
 		}
 		i = search_dirblock(bh, dir, dentry,
-			    block << EXT3_BLOCK_SIZE_BITS(sb), res_dir);
+			    block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
 		if (i == 1) {
-			EXT3_I(dir)->i_dir_start_lookup = block;
+			EXT4_I(dir)->i_dir_start_lookup = block;
 			ret = bh;
 			goto cleanup_and_exit;
 		} else {
@@ -905,7 +905,7 @@ restart:
 	 * search the last part of the directory before giving up.
 	 */
 	block = nblocks;
-	nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
+	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
 	if (block < nblocks) {
 		start = 0;
 		goto restart;
@@ -918,15 +918,15 @@ cleanup_and_exit:
 	return ret;
 }
 
-#ifdef CONFIG_EXT3_INDEX
-static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
-		       struct ext3_dir_entry_2 **res_dir, int *err)
+#ifdef CONFIG_EXT4_INDEX
+static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
+		       struct ext4_dir_entry_2 **res_dir, int *err)
 {
 	struct super_block * sb;
 	struct dx_hash_info	hinfo;
 	u32 hash;
 	struct dx_frame frames[2], *frame;
-	struct ext3_dir_entry_2 *de, *top;
+	struct ext4_dir_entry_2 *de, *top;
 	struct buffer_head *bh;
 	unsigned long block;
 	int retval;
@@ -948,16 +948,16 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
 	hash = hinfo.hash;
 	do {
 		block = dx_get_block(frame->at);
-		if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
+		if (!(bh = ext4_bread (NULL,dir, block, 0, err)))
 			goto errout;
-		de = (struct ext3_dir_entry_2 *) bh->b_data;
-		top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
-				       EXT3_DIR_REC_LEN(0));
-		for (; de < top; de = ext3_next_entry(de))
-		if (ext3_match (namelen, name, de)) {
-			if (!ext3_check_dir_entry("ext3_find_entry",
+		de = (struct ext4_dir_entry_2 *) bh->b_data;
+		top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize -
+				       EXT4_DIR_REC_LEN(0));
+		for (; de < top; de = ext4_next_entry(de))
+		if (ext4_match (namelen, name, de)) {
+			if (!ext4_check_dir_entry("ext4_find_entry",
 						  dir, de, bh,
-				  (block<<EXT3_BLOCK_SIZE_BITS(sb))
+				  (block<<EXT4_BLOCK_SIZE_BITS(sb))
 					  +((char *)de - bh->b_data))) {
 				brelse (bh);
 				goto errout;
@@ -968,10 +968,10 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
 		}
 		brelse (bh);
 		/* Check to see if we should continue to search */
-		retval = ext3_htree_next_block(dir, hash, frame,
+		retval = ext4_htree_next_block(dir, hash, frame,
 					       frames, NULL);
 		if (retval < 0) {
-			ext3_warning(sb, __FUNCTION__,
+			ext4_warning(sb, __FUNCTION__,
 			     "error reading index page in directory #%lu",
 			     dir->i_ino);
 			*err = retval;
@@ -987,22 +987,22 @@ errout:
 }
 #endif
 
-static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
 {
 	struct inode * inode;
-	struct ext3_dir_entry_2 * de;
+	struct ext4_dir_entry_2 * de;
 	struct buffer_head * bh;
 
-	if (dentry->d_name.len > EXT3_NAME_LEN)
+	if (dentry->d_name.len > EXT4_NAME_LEN)
 		return ERR_PTR(-ENAMETOOLONG);
 
-	bh = ext3_find_entry(dentry, &de);
+	bh = ext4_find_entry(dentry, &de);
 	inode = NULL;
 	if (bh) {
 		unsigned long ino = le32_to_cpu(de->inode);
 		brelse (bh);
-		if (!ext3_valid_inum(dir->i_sb, ino)) {
-			ext3_error(dir->i_sb, "ext3_lookup",
+		if (!ext4_valid_inum(dir->i_sb, ino)) {
+			ext4_error(dir->i_sb, "ext4_lookup",
 				   "bad inode number: %lu", ino);
 			inode = NULL;
 		} else
@@ -1015,28 +1015,28 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
 }
 
 
-struct dentry *ext3_get_parent(struct dentry *child)
+struct dentry *ext4_get_parent(struct dentry *child)
 {
 	unsigned long ino;
 	struct dentry *parent;
 	struct inode *inode;
 	struct dentry dotdot;
-	struct ext3_dir_entry_2 * de;
+	struct ext4_dir_entry_2 * de;
 	struct buffer_head *bh;
 
 	dotdot.d_name.name = "..";
 	dotdot.d_name.len = 2;
 	dotdot.d_parent = child; /* confusing, isn't it! */
 
-	bh = ext3_find_entry(&dotdot, &de);
+	bh = ext4_find_entry(&dotdot, &de);
 	inode = NULL;
 	if (!bh)
 		return ERR_PTR(-ENOENT);
 	ino = le32_to_cpu(de->inode);
 	brelse(bh);
 
-	if (!ext3_valid_inum(child->d_inode->i_sb, ino)) {
-		ext3_error(child->d_inode->i_sb, "ext3_get_parent",
+	if (!ext4_valid_inum(child->d_inode->i_sb, ino)) {
+		ext4_error(child->d_inode->i_sb, "ext4_get_parent",
 			   "bad inode number: %lu", ino);
 		inode = NULL;
 	} else
@@ -1054,65 +1054,65 @@ struct dentry *ext3_get_parent(struct dentry *child)
 }
 
 #define S_SHIFT 12
-static unsigned char ext3_type_by_mode[S_IFMT >> S_SHIFT] = {
-	[S_IFREG >> S_SHIFT]	= EXT3_FT_REG_FILE,
-	[S_IFDIR >> S_SHIFT]	= EXT3_FT_DIR,
-	[S_IFCHR >> S_SHIFT]	= EXT3_FT_CHRDEV,
-	[S_IFBLK >> S_SHIFT]	= EXT3_FT_BLKDEV,
-	[S_IFIFO >> S_SHIFT]	= EXT3_FT_FIFO,
-	[S_IFSOCK >> S_SHIFT]	= EXT3_FT_SOCK,
-	[S_IFLNK >> S_SHIFT]	= EXT3_FT_SYMLINK,
+static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = {
+	[S_IFREG >> S_SHIFT]	= EXT4_FT_REG_FILE,
+	[S_IFDIR >> S_SHIFT]	= EXT4_FT_DIR,
+	[S_IFCHR >> S_SHIFT]	= EXT4_FT_CHRDEV,
+	[S_IFBLK >> S_SHIFT]	= EXT4_FT_BLKDEV,
+	[S_IFIFO >> S_SHIFT]	= EXT4_FT_FIFO,
+	[S_IFSOCK >> S_SHIFT]	= EXT4_FT_SOCK,
+	[S_IFLNK >> S_SHIFT]	= EXT4_FT_SYMLINK,
 };
 
-static inline void ext3_set_de_type(struct super_block *sb,
-				struct ext3_dir_entry_2 *de,
+static inline void ext4_set_de_type(struct super_block *sb,
+				struct ext4_dir_entry_2 *de,
 				umode_t mode) {
-	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE))
-		de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
+	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE))
+		de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
 }
 
-#ifdef CONFIG_EXT3_INDEX
-static struct ext3_dir_entry_2 *
+#ifdef CONFIG_EXT4_INDEX
+static struct ext4_dir_entry_2 *
 dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
 {
 	unsigned rec_len = 0;
 
 	while (count--) {
-		struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
-		rec_len = EXT3_DIR_REC_LEN(de->name_len);
+		struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) (from + map->offs);
+		rec_len = EXT4_DIR_REC_LEN(de->name_len);
 		memcpy (to, de, rec_len);
-		((struct ext3_dir_entry_2 *) to)->rec_len =
+		((struct ext4_dir_entry_2 *) to)->rec_len =
 				cpu_to_le16(rec_len);
 		de->inode = 0;
 		map++;
 		to += rec_len;
 	}
-	return (struct ext3_dir_entry_2 *) (to - rec_len);
+	return (struct ext4_dir_entry_2 *) (to - rec_len);
 }
 
-static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
+static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
 {
-	struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
+	struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
 	unsigned rec_len = 0;
 
 	prev = to = de;
 	while ((char*)de < base + size) {
-		next = (struct ext3_dir_entry_2 *) ((char *) de +
+		next = (struct ext4_dir_entry_2 *) ((char *) de +
 						    le16_to_cpu(de->rec_len));
 		if (de->inode && de->name_len) {
-			rec_len = EXT3_DIR_REC_LEN(de->name_len);
+			rec_len = EXT4_DIR_REC_LEN(de->name_len);
 			if (de > to)
 				memmove(to, de, rec_len);
 			to->rec_len = cpu_to_le16(rec_len);
 			prev = to;
-			to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
+			to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len);
 		}
 		de = next;
 	}
 	return prev;
 }
 
-static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
+static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 			struct buffer_head **bh,struct dx_frame *frame,
 			struct dx_hash_info *hinfo, int *error)
 {
@@ -1124,10 +1124,10 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 	struct dx_map_entry *map;
 	char *data1 = (*bh)->b_data, *data2;
 	unsigned split;
-	struct ext3_dir_entry_2 *de = NULL, *de2;
+	struct ext4_dir_entry_2 *de = NULL, *de2;
 	int	err;
 
-	bh2 = ext3_append (handle, dir, &newblock, error);
+	bh2 = ext4_append (handle, dir, &newblock, error);
 	if (!(bh2)) {
 		brelse(*bh);
 		*bh = NULL;
@@ -1135,17 +1135,17 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 	}
 
 	BUFFER_TRACE(*bh, "get_write_access");
-	err = ext3_journal_get_write_access(handle, *bh);
+	err = ext4_journal_get_write_access(handle, *bh);
 	if (err) {
 	journal_error:
 		brelse(*bh);
 		brelse(bh2);
 		*bh = NULL;
-		ext3_std_error(dir->i_sb, err);
+		ext4_std_error(dir->i_sb, err);
 		goto errout;
 	}
 	BUFFER_TRACE(frame->bh, "get_write_access");
-	err = ext3_journal_get_write_access(handle, frame->bh);
+	err = ext4_journal_get_write_access(handle, frame->bh);
 	if (err)
 		goto journal_error;
 
@@ -1153,7 +1153,7 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 
 	/* create map in the end of data2 block */
 	map = (struct dx_map_entry *) (data2 + blocksize);
-	count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
+	count = dx_make_map ((struct ext4_dir_entry_2 *) data1,
 			     blocksize, hinfo, map);
 	map -= count;
 	split = count/2; // need to adjust to actual middle
@@ -1168,8 +1168,8 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 	de = dx_pack_dirents(data1,blocksize);
 	de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
 	de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
-	dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1));
-	dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1));
+	dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
+	dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1));
 
 	/* Which block gets the new entry? */
 	if (hinfo->hash >= hash2)
@@ -1178,10 +1178,10 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 		de = de2;
 	}
 	dx_insert_block (frame, hash2 + continued, newblock);
-	err = ext3_journal_dirty_metadata (handle, bh2);
+	err = ext4_journal_dirty_metadata (handle, bh2);
 	if (err)
 		goto journal_error;
-	err = ext3_journal_dirty_metadata (handle, frame->bh);
+	err = ext4_journal_dirty_metadata (handle, frame->bh);
 	if (err)
 		goto journal_error;
 	brelse (bh2);
@@ -1204,7 +1204,7 @@ errout:
  * all other cases bh is released.
  */
 static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
-			     struct inode *inode, struct ext3_dir_entry_2 *de,
+			     struct inode *inode, struct ext4_dir_entry_2 *de,
 			     struct buffer_head * bh)
 {
 	struct inode	*dir = dentry->d_parent->d_inode;
@@ -1215,51 +1215,51 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
 	int		nlen, rlen, err;
 	char		*top;
 
-	reclen = EXT3_DIR_REC_LEN(namelen);
+	reclen = EXT4_DIR_REC_LEN(namelen);
 	if (!de) {
-		de = (struct ext3_dir_entry_2 *)bh->b_data;
+		de = (struct ext4_dir_entry_2 *)bh->b_data;
 		top = bh->b_data + dir->i_sb->s_blocksize - reclen;
 		while ((char *) de <= top) {
-			if (!ext3_check_dir_entry("ext3_add_entry", dir, de,
+			if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
 						  bh, offset)) {
 				brelse (bh);
 				return -EIO;
 			}
-			if (ext3_match (namelen, name, de)) {
+			if (ext4_match (namelen, name, de)) {
 				brelse (bh);
 				return -EEXIST;
 			}
-			nlen = EXT3_DIR_REC_LEN(de->name_len);
+			nlen = EXT4_DIR_REC_LEN(de->name_len);
 			rlen = le16_to_cpu(de->rec_len);
 			if ((de->inode? rlen - nlen: rlen) >= reclen)
 				break;
-			de = (struct ext3_dir_entry_2 *)((char *)de + rlen);
+			de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
 			offset += rlen;
 		}
 		if ((char *) de > top)
 			return -ENOSPC;
 	}
 	BUFFER_TRACE(bh, "get_write_access");
-	err = ext3_journal_get_write_access(handle, bh);
+	err = ext4_journal_get_write_access(handle, bh);
 	if (err) {
-		ext3_std_error(dir->i_sb, err);
+		ext4_std_error(dir->i_sb, err);
 		brelse(bh);
 		return err;
 	}
 
 	/* By now the buffer is marked for journaling */
-	nlen = EXT3_DIR_REC_LEN(de->name_len);
+	nlen = EXT4_DIR_REC_LEN(de->name_len);
 	rlen = le16_to_cpu(de->rec_len);
 	if (de->inode) {
-		struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
+		struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen);
 		de1->rec_len = cpu_to_le16(rlen - nlen);
 		de->rec_len = cpu_to_le16(nlen);
 		de = de1;
 	}
-	de->file_type = EXT3_FT_UNKNOWN;
+	de->file_type = EXT4_FT_UNKNOWN;
 	if (inode) {
 		de->inode = cpu_to_le32(inode->i_ino);
-		ext3_set_de_type(dir->i_sb, de, inode->i_mode);
+		ext4_set_de_type(dir->i_sb, de, inode->i_mode);
 	} else
 		de->inode = 0;
 	de->name_len = namelen;
@@ -1270,24 +1270,24 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
 	 * on this.
 	 *
 	 * XXX similarly, too many callers depend on
-	 * ext3_new_inode() setting the times, but error
+	 * ext4_new_inode() setting the times, but error
 	 * recovery deletes the inode, so the worst that can
 	 * happen is that the times are slightly out of date
 	 * and/or different from the directory change time.
 	 */
 	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
-	ext3_update_dx_flag(dir);
+	ext4_update_dx_flag(dir);
 	dir->i_version++;
-	ext3_mark_inode_dirty(handle, dir);
-	BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-	err = ext3_journal_dirty_metadata(handle, bh);
+	ext4_mark_inode_dirty(handle, dir);
+	BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
+	err = ext4_journal_dirty_metadata(handle, bh);
 	if (err)
-		ext3_std_error(dir->i_sb, err);
+		ext4_std_error(dir->i_sb, err);
 	brelse(bh);
 	return 0;
 }
 
-#ifdef CONFIG_EXT3_INDEX
+#ifdef CONFIG_EXT4_INDEX
 /*
  * This converts a one block unindexed directory to a 3 block indexed
  * directory, and adds the dentry to the indexed directory.
@@ -1302,7 +1302,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
 	struct dx_root	*root;
 	struct dx_frame	frames[2], *frame;
 	struct dx_entry *entries;
-	struct ext3_dir_entry_2	*de, *de2;
+	struct ext4_dir_entry_2	*de, *de2;
 	char		*data1, *top;
 	unsigned	len;
 	int		retval;
@@ -1313,38 +1313,38 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
 
 	blocksize =  dir->i_sb->s_blocksize;
 	dxtrace(printk("Creating index\n"));
-	retval = ext3_journal_get_write_access(handle, bh);
+	retval = ext4_journal_get_write_access(handle, bh);
 	if (retval) {
-		ext3_std_error(dir->i_sb, retval);
+		ext4_std_error(dir->i_sb, retval);
 		brelse(bh);
 		return retval;
 	}
 	root = (struct dx_root *) bh->b_data;
 
-	bh2 = ext3_append (handle, dir, &block, &retval);
+	bh2 = ext4_append (handle, dir, &block, &retval);
 	if (!(bh2)) {
 		brelse(bh);
 		return retval;
 	}
-	EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
+	EXT4_I(dir)->i_flags |= EXT4_INDEX_FL;
 	data1 = bh2->b_data;
 
 	/* The 0th block becomes the root, move the dirents out */
 	fde = &root->dotdot;
-	de = (struct ext3_dir_entry_2 *)((char *)fde + le16_to_cpu(fde->rec_len));
+	de = (struct ext4_dir_entry_2 *)((char *)fde + le16_to_cpu(fde->rec_len));
 	len = ((char *) root) + blocksize - (char *) de;
 	memcpy (data1, de, len);
-	de = (struct ext3_dir_entry_2 *) data1;
+	de = (struct ext4_dir_entry_2 *) data1;
 	top = data1 + len;
 	while ((char *)(de2=(void*)de+le16_to_cpu(de->rec_len)) < top)
 		de = de2;
 	de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
 	/* Initialize the root; the dot dirents already exist */
-	de = (struct ext3_dir_entry_2 *) (&root->dotdot);
-	de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2));
+	de = (struct ext4_dir_entry_2 *) (&root->dotdot);
+	de->rec_len = cpu_to_le16(blocksize - EXT4_DIR_REC_LEN(2));
 	memset (&root->info, 0, sizeof(root->info));
 	root->info.info_length = sizeof(root->info);
-	root->info.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
+	root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
 	entries = root->entries;
 	dx_set_block (entries, 1);
 	dx_set_count (entries, 1);
@@ -1352,8 +1352,8 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
 
 	/* Initialize as for dx_probe */
 	hinfo.hash_version = root->info.hash_version;
-	hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
-	ext3fs_dirhash(name, namelen, &hinfo);
+	hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
+	ext4fs_dirhash(name, namelen, &hinfo);
 	frame = frames;
 	frame->entries = entries;
 	frame->at = entries;
@@ -1369,25 +1369,25 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
 #endif
 
 /*
- *	ext3_add_entry()
+ *	ext4_add_entry()
  *
  * adds a file entry to the specified directory, using the same
- * semantics as ext3_find_entry(). It returns NULL if it failed.
+ * semantics as ext4_find_entry(). It returns NULL if it failed.
  *
  * NOTE!! The inode part of 'de' is left at 0 - which means you
  * may not sleep between calling this and putting something into
  * the entry, as someone else might have used it while you slept.
  */
-static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
+static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
 	struct inode *inode)
 {
 	struct inode *dir = dentry->d_parent->d_inode;
 	unsigned long offset;
 	struct buffer_head * bh;
-	struct ext3_dir_entry_2 *de;
+	struct ext4_dir_entry_2 *de;
 	struct super_block * sb;
 	int	retval;
-#ifdef CONFIG_EXT3_INDEX
+#ifdef CONFIG_EXT4_INDEX
 	int	dx_fallback=0;
 #endif
 	unsigned blocksize;
@@ -1397,46 +1397,46 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
 	blocksize = sb->s_blocksize;
 	if (!dentry->d_name.len)
 		return -EINVAL;
-#ifdef CONFIG_EXT3_INDEX
+#ifdef CONFIG_EXT4_INDEX
 	if (is_dx(dir)) {
-		retval = ext3_dx_add_entry(handle, dentry, inode);
+		retval = ext4_dx_add_entry(handle, dentry, inode);
 		if (!retval || (retval != ERR_BAD_DX_DIR))
 			return retval;
-		EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL;
+		EXT4_I(dir)->i_flags &= ~EXT4_INDEX_FL;
 		dx_fallback++;
-		ext3_mark_inode_dirty(handle, dir);
+		ext4_mark_inode_dirty(handle, dir);
 	}
 #endif
 	blocks = dir->i_size >> sb->s_blocksize_bits;
 	for (block = 0, offset = 0; block < blocks; block++) {
-		bh = ext3_bread(handle, dir, block, 0, &retval);
+		bh = ext4_bread(handle, dir, block, 0, &retval);
 		if(!bh)
 			return retval;
 		retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
 		if (retval != -ENOSPC)
 			return retval;
 
-#ifdef CONFIG_EXT3_INDEX
+#ifdef CONFIG_EXT4_INDEX
 		if (blocks == 1 && !dx_fallback &&
-		    EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
+		    EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
 			return make_indexed_dir(handle, dentry, inode, bh);
 #endif
 		brelse(bh);
 	}
-	bh = ext3_append(handle, dir, &block, &retval);
+	bh = ext4_append(handle, dir, &block, &retval);
 	if (!bh)
 		return retval;
-	de = (struct ext3_dir_entry_2 *) bh->b_data;
+	de = (struct ext4_dir_entry_2 *) bh->b_data;
 	de->inode = 0;
 	de->rec_len = cpu_to_le16(blocksize);
 	return add_dirent_to_buf(handle, dentry, inode, de, bh);
 }
 
-#ifdef CONFIG_EXT3_INDEX
+#ifdef CONFIG_EXT4_INDEX
 /*
  * Returns 0 for success, or a negative error value
  */
-static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
 			     struct inode *inode)
 {
 	struct dx_frame frames[2], *frame;
@@ -1445,7 +1445,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
 	struct buffer_head * bh;
 	struct inode *dir = dentry->d_parent->d_inode;
 	struct super_block * sb = dir->i_sb;
-	struct ext3_dir_entry_2 *de;
+	struct ext4_dir_entry_2 *de;
 	int err;
 
 	frame = dx_probe(dentry, NULL, &hinfo, frames, &err);
@@ -1454,11 +1454,11 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
 	entries = frame->entries;
 	at = frame->at;
 
-	if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
+	if (!(bh = ext4_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
 		goto cleanup;
 
 	BUFFER_TRACE(bh, "get_write_access");
-	err = ext3_journal_get_write_access(handle, bh);
+	err = ext4_journal_get_write_access(handle, bh);
 	if (err)
 		goto journal_error;
 
@@ -1482,12 +1482,12 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
 
 		if (levels && (dx_get_count(frames->entries) ==
 			       dx_get_limit(frames->entries))) {
-			ext3_warning(sb, __FUNCTION__,
+			ext4_warning(sb, __FUNCTION__,
 				     "Directory index full!");
 			err = -ENOSPC;
 			goto cleanup;
 		}
-		bh2 = ext3_append (handle, dir, &newblock, &err);
+		bh2 = ext4_append (handle, dir, &newblock, &err);
 		if (!(bh2))
 			goto cleanup;
 		node2 = (struct dx_node *)(bh2->b_data);
@@ -1495,7 +1495,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
 		node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
 		node2->fake.inode = 0;
 		BUFFER_TRACE(frame->bh, "get_write_access");
-		err = ext3_journal_get_write_access(handle, frame->bh);
+		err = ext4_journal_get_write_access(handle, frame->bh);
 		if (err)
 			goto journal_error;
 		if (levels) {
@@ -1504,7 +1504,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
 			dxtrace(printk("Split index %i/%i\n", icount1, icount2));
 
 			BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
-			err = ext3_journal_get_write_access(handle,
+			err = ext4_journal_get_write_access(handle,
 							     frames[0].bh);
 			if (err)
 				goto journal_error;
@@ -1525,7 +1525,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
 			dxtrace(dx_show_index ("node", frames[1].entries));
 			dxtrace(dx_show_index ("node",
 			       ((struct dx_node *) bh2->b_data)->entries));
-			err = ext3_journal_dirty_metadata(handle, bh2);
+			err = ext4_journal_dirty_metadata(handle, bh2);
 			if (err)
 				goto journal_error;
 			brelse (bh2);
@@ -1545,12 +1545,12 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
 			frame->at = at = at - entries + entries2;
 			frame->entries = entries = entries2;
 			frame->bh = bh2;
-			err = ext3_journal_get_write_access(handle,
+			err = ext4_journal_get_write_access(handle,
 							     frame->bh);
 			if (err)
 				goto journal_error;
 		}
-		ext3_journal_dirty_metadata(handle, frames[0].bh);
+		ext4_journal_dirty_metadata(handle, frames[0].bh);
 	}
 	de = do_split(handle, dir, &bh, frame, &hinfo, &err);
 	if (!de)
@@ -1560,7 +1560,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
 	goto cleanup;
 
 journal_error:
-	ext3_std_error(dir->i_sb, err);
+	ext4_std_error(dir->i_sb, err);
 cleanup:
 	if (bh)
 		brelse(bh);
@@ -1570,26 +1570,26 @@ cleanup:
 #endif
 
 /*
- * ext3_delete_entry deletes a directory entry by merging it with the
+ * ext4_delete_entry deletes a directory entry by merging it with the
  * previous entry
  */
-static int ext3_delete_entry (handle_t *handle,
+static int ext4_delete_entry (handle_t *handle,
 			      struct inode * dir,
-			      struct ext3_dir_entry_2 * de_del,
+			      struct ext4_dir_entry_2 * de_del,
 			      struct buffer_head * bh)
 {
-	struct ext3_dir_entry_2 * de, * pde;
+	struct ext4_dir_entry_2 * de, * pde;
 	int i;
 
 	i = 0;
 	pde = NULL;
-	de = (struct ext3_dir_entry_2 *) bh->b_data;
+	de = (struct ext4_dir_entry_2 *) bh->b_data;
 	while (i < bh->b_size) {
-		if (!ext3_check_dir_entry("ext3_delete_entry", dir, de, bh, i))
+		if (!ext4_check_dir_entry("ext4_delete_entry", dir, de, bh, i))
 			return -EIO;
 		if (de == de_del)  {
 			BUFFER_TRACE(bh, "get_write_access");
-			ext3_journal_get_write_access(handle, bh);
+			ext4_journal_get_write_access(handle, bh);
 			if (pde)
 				pde->rec_len =
 					cpu_to_le16(le16_to_cpu(pde->rec_len) +
@@ -1597,43 +1597,43 @@ static int ext3_delete_entry (handle_t *handle,
 			else
 				de->inode = 0;
 			dir->i_version++;
-			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-			ext3_journal_dirty_metadata(handle, bh);
+			BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
+			ext4_journal_dirty_metadata(handle, bh);
 			return 0;
 		}
 		i += le16_to_cpu(de->rec_len);
 		pde = de;
-		de = (struct ext3_dir_entry_2 *)
+		de = (struct ext4_dir_entry_2 *)
 			((char *) de + le16_to_cpu(de->rec_len));
 	}
 	return -ENOENT;
 }
 
 /*
- * ext3_mark_inode_dirty is somewhat expensive, so unlike ext2 we
+ * ext4_mark_inode_dirty is somewhat expensive, so unlike ext2 we
  * do not perform it in these functions.  We perform it at the call site,
  * if it is needed.
  */
-static inline void ext3_inc_count(handle_t *handle, struct inode *inode)
+static inline void ext4_inc_count(handle_t *handle, struct inode *inode)
 {
 	inc_nlink(inode);
 }
 
-static inline void ext3_dec_count(handle_t *handle, struct inode *inode)
+static inline void ext4_dec_count(handle_t *handle, struct inode *inode)
 {
 	drop_nlink(inode);
 }
 
-static int ext3_add_nondir(handle_t *handle,
+static int ext4_add_nondir(handle_t *handle,
 		struct dentry *dentry, struct inode *inode)
 {
-	int err = ext3_add_entry(handle, dentry, inode);
+	int err = ext4_add_entry(handle, dentry, inode);
 	if (!err) {
-		ext3_mark_inode_dirty(handle, inode);
+		ext4_mark_inode_dirty(handle, inode);
 		d_instantiate(dentry, inode);
 		return 0;
 	}
-	ext3_dec_count(handle, inode);
+	ext4_dec_count(handle, inode);
 	iput(inode);
 	return err;
 }
@@ -1646,7 +1646,7 @@ static int ext3_add_nondir(handle_t *handle,
  * If the create succeeds, we fill in the inode information
  * with d_instantiate().
  */
-static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
+static int ext4_create (struct inode * dir, struct dentry * dentry, int mode,
 		struct nameidata *nd)
 {
 	handle_t *handle;
@@ -1654,30 +1654,30 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
 	int err, retries = 0;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
-					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+					2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
 	if (IS_DIRSYNC(dir))
 		handle->h_sync = 1;
 
-	inode = ext3_new_inode (handle, dir, mode);
+	inode = ext4_new_inode (handle, dir, mode);
 	err = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
-		inode->i_op = &ext3_file_inode_operations;
-		inode->i_fop = &ext3_file_operations;
-		ext3_set_aops(inode);
-		err = ext3_add_nondir(handle, dentry, inode);
+		inode->i_op = &ext4_file_inode_operations;
+		inode->i_fop = &ext4_file_operations;
+		ext4_set_aops(inode);
+		err = ext4_add_nondir(handle, dentry, inode);
 	}
-	ext3_journal_stop(handle);
-	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+	ext4_journal_stop(handle);
+	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
 		goto retry;
 	return err;
 }
 
-static int ext3_mknod (struct inode * dir, struct dentry *dentry,
+static int ext4_mknod (struct inode * dir, struct dentry *dentry,
 			int mode, dev_t rdev)
 {
 	handle_t *handle;
@@ -1688,100 +1688,100 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
 		return -EINVAL;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
-					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+					2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
 	if (IS_DIRSYNC(dir))
 		handle->h_sync = 1;
 
-	inode = ext3_new_inode (handle, dir, mode);
+	inode = ext4_new_inode (handle, dir, mode);
 	err = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
 		init_special_inode(inode, inode->i_mode, rdev);
-#ifdef CONFIG_EXT3_FS_XATTR
-		inode->i_op = &ext3_special_inode_operations;
+#ifdef CONFIG_EXT4DEV_FS_XATTR
+		inode->i_op = &ext4_special_inode_operations;
 #endif
-		err = ext3_add_nondir(handle, dentry, inode);
+		err = ext4_add_nondir(handle, dentry, inode);
 	}
-	ext3_journal_stop(handle);
-	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+	ext4_journal_stop(handle);
+	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
 		goto retry;
 	return err;
 }
 
-static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
+static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 {
 	handle_t *handle;
 	struct inode * inode;
 	struct buffer_head * dir_block;
-	struct ext3_dir_entry_2 * de;
+	struct ext4_dir_entry_2 * de;
 	int err, retries = 0;
 
-	if (dir->i_nlink >= EXT3_LINK_MAX)
+	if (dir->i_nlink >= EXT4_LINK_MAX)
 		return -EMLINK;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
-					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+					2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
 	if (IS_DIRSYNC(dir))
 		handle->h_sync = 1;
 
-	inode = ext3_new_inode (handle, dir, S_IFDIR | mode);
+	inode = ext4_new_inode (handle, dir, S_IFDIR | mode);
 	err = PTR_ERR(inode);
 	if (IS_ERR(inode))
 		goto out_stop;
 
-	inode->i_op = &ext3_dir_inode_operations;
-	inode->i_fop = &ext3_dir_operations;
-	inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
-	dir_block = ext3_bread (handle, inode, 0, 1, &err);
+	inode->i_op = &ext4_dir_inode_operations;
+	inode->i_fop = &ext4_dir_operations;
+	inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
+	dir_block = ext4_bread (handle, inode, 0, 1, &err);
 	if (!dir_block) {
 		drop_nlink(inode); /* is this nlink == 0? */
-		ext3_mark_inode_dirty(handle, inode);
+		ext4_mark_inode_dirty(handle, inode);
 		iput (inode);
 		goto out_stop;
 	}
 	BUFFER_TRACE(dir_block, "get_write_access");
-	ext3_journal_get_write_access(handle, dir_block);
-	de = (struct ext3_dir_entry_2 *) dir_block->b_data;
+	ext4_journal_get_write_access(handle, dir_block);
+	de = (struct ext4_dir_entry_2 *) dir_block->b_data;
 	de->inode = cpu_to_le32(inode->i_ino);
 	de->name_len = 1;
-	de->rec_len = cpu_to_le16(EXT3_DIR_REC_LEN(de->name_len));
+	de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de->name_len));
 	strcpy (de->name, ".");
-	ext3_set_de_type(dir->i_sb, de, S_IFDIR);
-	de = (struct ext3_dir_entry_2 *)
+	ext4_set_de_type(dir->i_sb, de, S_IFDIR);
+	de = (struct ext4_dir_entry_2 *)
 			((char *) de + le16_to_cpu(de->rec_len));
 	de->inode = cpu_to_le32(dir->i_ino);
-	de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT3_DIR_REC_LEN(1));
+	de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT4_DIR_REC_LEN(1));
 	de->name_len = 2;
 	strcpy (de->name, "..");
-	ext3_set_de_type(dir->i_sb, de, S_IFDIR);
+	ext4_set_de_type(dir->i_sb, de, S_IFDIR);
 	inode->i_nlink = 2;
-	BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
-	ext3_journal_dirty_metadata(handle, dir_block);
+	BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata");
+	ext4_journal_dirty_metadata(handle, dir_block);
 	brelse (dir_block);
-	ext3_mark_inode_dirty(handle, inode);
-	err = ext3_add_entry (handle, dentry, inode);
+	ext4_mark_inode_dirty(handle, inode);
+	err = ext4_add_entry (handle, dentry, inode);
 	if (err) {
 		inode->i_nlink = 0;
-		ext3_mark_inode_dirty(handle, inode);
+		ext4_mark_inode_dirty(handle, inode);
 		iput (inode);
 		goto out_stop;
 	}
 	inc_nlink(dir);
-	ext3_update_dx_flag(dir);
-	ext3_mark_inode_dirty(handle, dir);
+	ext4_update_dx_flag(dir);
+	ext4_mark_inode_dirty(handle, dir);
 	d_instantiate(dentry, inode);
 out_stop:
-	ext3_journal_stop(handle);
-	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+	ext4_journal_stop(handle);
+	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
 		goto retry;
 	return err;
 }
@@ -1793,59 +1793,59 @@ static int empty_dir (struct inode * inode)
 {
 	unsigned long offset;
 	struct buffer_head * bh;
-	struct ext3_dir_entry_2 * de, * de1;
+	struct ext4_dir_entry_2 * de, * de1;
 	struct super_block * sb;
 	int err = 0;
 
 	sb = inode->i_sb;
-	if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) ||
-	    !(bh = ext3_bread (NULL, inode, 0, 0, &err))) {
+	if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
+	    !(bh = ext4_bread (NULL, inode, 0, 0, &err))) {
 		if (err)
-			ext3_error(inode->i_sb, __FUNCTION__,
+			ext4_error(inode->i_sb, __FUNCTION__,
 				   "error %d reading directory #%lu offset 0",
 				   err, inode->i_ino);
 		else
-			ext3_warning(inode->i_sb, __FUNCTION__,
+			ext4_warning(inode->i_sb, __FUNCTION__,
 				     "bad directory (dir #%lu) - no data block",
 				     inode->i_ino);
 		return 1;
 	}
-	de = (struct ext3_dir_entry_2 *) bh->b_data;
-	de1 = (struct ext3_dir_entry_2 *)
+	de = (struct ext4_dir_entry_2 *) bh->b_data;
+	de1 = (struct ext4_dir_entry_2 *)
 			((char *) de + le16_to_cpu(de->rec_len));
 	if (le32_to_cpu(de->inode) != inode->i_ino ||
 			!le32_to_cpu(de1->inode) ||
 			strcmp (".", de->name) ||
 			strcmp ("..", de1->name)) {
-		ext3_warning (inode->i_sb, "empty_dir",
+		ext4_warning (inode->i_sb, "empty_dir",
 			      "bad directory (dir #%lu) - no `.' or `..'",
 			      inode->i_ino);
 		brelse (bh);
 		return 1;
 	}
 	offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len);
-	de = (struct ext3_dir_entry_2 *)
+	de = (struct ext4_dir_entry_2 *)
 			((char *) de1 + le16_to_cpu(de1->rec_len));
 	while (offset < inode->i_size ) {
 		if (!bh ||
 			(void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
 			err = 0;
 			brelse (bh);
-			bh = ext3_bread (NULL, inode,
-				offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err);
+			bh = ext4_bread (NULL, inode,
+				offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err);
 			if (!bh) {
 				if (err)
-					ext3_error(sb, __FUNCTION__,
+					ext4_error(sb, __FUNCTION__,
 						   "error %d reading directory"
 						   " #%lu offset %lu",
 						   err, inode->i_ino, offset);
 				offset += sb->s_blocksize;
 				continue;
 			}
-			de = (struct ext3_dir_entry_2 *) bh->b_data;
+			de = (struct ext4_dir_entry_2 *) bh->b_data;
 		}
-		if (!ext3_check_dir_entry("empty_dir", inode, de, bh, offset)) {
-			de = (struct ext3_dir_entry_2 *)(bh->b_data +
+		if (!ext4_check_dir_entry("empty_dir", inode, de, bh, offset)) {
+			de = (struct ext4_dir_entry_2 *)(bh->b_data +
 							 sb->s_blocksize);
 			offset = (offset | (sb->s_blocksize - 1)) + 1;
 			continue;
@@ -1855,57 +1855,57 @@ static int empty_dir (struct inode * inode)
 			return 0;
 		}
 		offset += le16_to_cpu(de->rec_len);
-		de = (struct ext3_dir_entry_2 *)
+		de = (struct ext4_dir_entry_2 *)
 				((char *) de + le16_to_cpu(de->rec_len));
 	}
 	brelse (bh);
 	return 1;
 }
 
-/* ext3_orphan_add() links an unlinked or truncated inode into a list of
+/* ext4_orphan_add() links an unlinked or truncated inode into a list of
  * such inodes, starting at the superblock, in case we crash before the
  * file is closed/deleted, or in case the inode truncate spans multiple
  * transactions and the last transaction is not recovered after a crash.
  *
  * At filesystem recovery time, we walk this list deleting unlinked
- * inodes and truncating linked inodes in ext3_orphan_cleanup().
+ * inodes and truncating linked inodes in ext4_orphan_cleanup().
  */
-int ext3_orphan_add(handle_t *handle, struct inode *inode)
+int ext4_orphan_add(handle_t *handle, struct inode *inode)
 {
 	struct super_block *sb = inode->i_sb;
-	struct ext3_iloc iloc;
+	struct ext4_iloc iloc;
 	int err = 0, rc;
 
 	lock_super(sb);
-	if (!list_empty(&EXT3_I(inode)->i_orphan))
+	if (!list_empty(&EXT4_I(inode)->i_orphan))
 		goto out_unlock;
 
 	/* Orphan handling is only valid for files with data blocks
 	 * being truncated, or files being unlinked. */
 
 	/* @@@ FIXME: Observation from aviro:
-	 * I think I can trigger J_ASSERT in ext3_orphan_add().  We block
-	 * here (on lock_super()), so race with ext3_link() which might bump
+	 * I think I can trigger J_ASSERT in ext4_orphan_add().  We block
+	 * here (on lock_super()), so race with ext4_link() which might bump
 	 * ->i_nlink. For, say it, character device. Not a regular file,
 	 * not a directory, not a symlink and ->i_nlink > 0.
 	 */
 	J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
 		S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
 
-	BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access");
-	err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
+	BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
+	err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
 	if (err)
 		goto out_unlock;
 
-	err = ext3_reserve_inode_write(handle, inode, &iloc);
+	err = ext4_reserve_inode_write(handle, inode, &iloc);
 	if (err)
 		goto out_unlock;
 
 	/* Insert this inode at the head of the on-disk orphan list... */
-	NEXT_ORPHAN(inode) = le32_to_cpu(EXT3_SB(sb)->s_es->s_last_orphan);
-	EXT3_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
-	err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
-	rc = ext3_mark_iloc_dirty(handle, inode, &iloc);
+	NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan);
+	EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
+	err = ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
+	rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
 	if (!err)
 		err = rc;
 
@@ -1918,28 +1918,28 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
 	 * This is safe: on error we're going to ignore the orphan list
 	 * anyway on the next recovery. */
 	if (!err)
-		list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
+		list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
 
 	jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
 	jbd_debug(4, "orphan inode %lu will point to %d\n",
 			inode->i_ino, NEXT_ORPHAN(inode));
 out_unlock:
 	unlock_super(sb);
-	ext3_std_error(inode->i_sb, err);
+	ext4_std_error(inode->i_sb, err);
 	return err;
 }
 
 /*
- * ext3_orphan_del() removes an unlinked or truncated inode from the list
+ * ext4_orphan_del() removes an unlinked or truncated inode from the list
  * of such inodes stored on disk, because it is finally being cleaned up.
  */
-int ext3_orphan_del(handle_t *handle, struct inode *inode)
+int ext4_orphan_del(handle_t *handle, struct inode *inode)
 {
 	struct list_head *prev;
-	struct ext3_inode_info *ei = EXT3_I(inode);
-	struct ext3_sb_info *sbi;
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	struct ext4_sb_info *sbi;
 	unsigned long ino_next;
-	struct ext3_iloc iloc;
+	struct ext4_iloc iloc;
 	int err = 0;
 
 	lock_super(inode->i_sb);
@@ -1950,7 +1950,7 @@ int ext3_orphan_del(handle_t *handle, struct inode *inode)
 
 	ino_next = NEXT_ORPHAN(inode);
 	prev = ei->i_orphan.prev;
-	sbi = EXT3_SB(inode->i_sb);
+	sbi = EXT4_SB(inode->i_sb);
 
 	jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
 
@@ -1963,38 +1963,38 @@ int ext3_orphan_del(handle_t *handle, struct inode *inode)
 	if (!handle)
 		goto out;
 
-	err = ext3_reserve_inode_write(handle, inode, &iloc);
+	err = ext4_reserve_inode_write(handle, inode, &iloc);
 	if (err)
 		goto out_err;
 
 	if (prev == &sbi->s_orphan) {
 		jbd_debug(4, "superblock will point to %lu\n", ino_next);
 		BUFFER_TRACE(sbi->s_sbh, "get_write_access");
-		err = ext3_journal_get_write_access(handle, sbi->s_sbh);
+		err = ext4_journal_get_write_access(handle, sbi->s_sbh);
 		if (err)
 			goto out_brelse;
 		sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
-		err = ext3_journal_dirty_metadata(handle, sbi->s_sbh);
+		err = ext4_journal_dirty_metadata(handle, sbi->s_sbh);
 	} else {
-		struct ext3_iloc iloc2;
+		struct ext4_iloc iloc2;
 		struct inode *i_prev =
-			&list_entry(prev, struct ext3_inode_info, i_orphan)->vfs_inode;
+			&list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode;
 
 		jbd_debug(4, "orphan inode %lu will point to %lu\n",
 			  i_prev->i_ino, ino_next);
-		err = ext3_reserve_inode_write(handle, i_prev, &iloc2);
+		err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
 		if (err)
 			goto out_brelse;
 		NEXT_ORPHAN(i_prev) = ino_next;
-		err = ext3_mark_iloc_dirty(handle, i_prev, &iloc2);
+		err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2);
 	}
 	if (err)
 		goto out_brelse;
 	NEXT_ORPHAN(inode) = 0;
-	err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+	err = ext4_mark_iloc_dirty(handle, inode, &iloc);
 
 out_err:
-	ext3_std_error(inode->i_sb, err);
+	ext4_std_error(inode->i_sb, err);
 out:
 	unlock_super(inode->i_sb);
 	return err;
@@ -2004,23 +2004,23 @@ out_brelse:
 	goto out_err;
 }
 
-static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
+static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
 {
 	int retval;
 	struct inode * inode;
 	struct buffer_head * bh;
-	struct ext3_dir_entry_2 * de;
+	struct ext4_dir_entry_2 * de;
 	handle_t *handle;
 
 	/* Initialize quotas before so that eventual writes go in
 	 * separate transaction */
 	DQUOT_INIT(dentry->d_inode);
-	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
+	handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
 	retval = -ENOENT;
-	bh = ext3_find_entry (dentry, &de);
+	bh = ext4_find_entry (dentry, &de);
 	if (!bh)
 		goto end_rmdir;
 
@@ -2037,11 +2037,11 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
 	if (!empty_dir (inode))
 		goto end_rmdir;
 
-	retval = ext3_delete_entry(handle, dir, de, bh);
+	retval = ext4_delete_entry(handle, dir, de, bh);
 	if (retval)
 		goto end_rmdir;
 	if (inode->i_nlink != 2)
-		ext3_warning (inode->i_sb, "ext3_rmdir",
+		ext4_warning (inode->i_sb, "ext4_rmdir",
 			      "empty directory has nlink!=2 (%d)",
 			      inode->i_nlink);
 	inode->i_version++;
@@ -2050,31 +2050,31 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
 	 * zero will ensure that the right thing happens during any
 	 * recovery. */
 	inode->i_size = 0;
-	ext3_orphan_add(handle, inode);
+	ext4_orphan_add(handle, inode);
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
-	ext3_mark_inode_dirty(handle, inode);
+	ext4_mark_inode_dirty(handle, inode);
 	drop_nlink(dir);
-	ext3_update_dx_flag(dir);
-	ext3_mark_inode_dirty(handle, dir);
+	ext4_update_dx_flag(dir);
+	ext4_mark_inode_dirty(handle, dir);
 
 end_rmdir:
-	ext3_journal_stop(handle);
+	ext4_journal_stop(handle);
 	brelse (bh);
 	return retval;
 }
 
-static int ext3_unlink(struct inode * dir, struct dentry *dentry)
+static int ext4_unlink(struct inode * dir, struct dentry *dentry)
 {
 	int retval;
 	struct inode * inode;
 	struct buffer_head * bh;
-	struct ext3_dir_entry_2 * de;
+	struct ext4_dir_entry_2 * de;
 	handle_t *handle;
 
 	/* Initialize quotas before so that eventual writes go
 	 * in separate transaction */
 	DQUOT_INIT(dentry->d_inode);
-	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
+	handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -2082,7 +2082,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
 		handle->h_sync = 1;
 
 	retval = -ENOENT;
-	bh = ext3_find_entry (dentry, &de);
+	bh = ext4_find_entry (dentry, &de);
 	if (!bh)
 		goto end_unlink;
 
@@ -2093,31 +2093,31 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
 		goto end_unlink;
 
 	if (!inode->i_nlink) {
-		ext3_warning (inode->i_sb, "ext3_unlink",
+		ext4_warning (inode->i_sb, "ext4_unlink",
 			      "Deleting nonexistent file (%lu), %d",
 			      inode->i_ino, inode->i_nlink);
 		inode->i_nlink = 1;
 	}
-	retval = ext3_delete_entry(handle, dir, de, bh);
+	retval = ext4_delete_entry(handle, dir, de, bh);
 	if (retval)
 		goto end_unlink;
 	dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
-	ext3_update_dx_flag(dir);
-	ext3_mark_inode_dirty(handle, dir);
+	ext4_update_dx_flag(dir);
+	ext4_mark_inode_dirty(handle, dir);
 	drop_nlink(inode);
 	if (!inode->i_nlink)
-		ext3_orphan_add(handle, inode);
+		ext4_orphan_add(handle, inode);
 	inode->i_ctime = dir->i_ctime;
-	ext3_mark_inode_dirty(handle, inode);
+	ext4_mark_inode_dirty(handle, inode);
 	retval = 0;
 
 end_unlink:
-	ext3_journal_stop(handle);
+	ext4_journal_stop(handle);
 	brelse (bh);
 	return retval;
 }
 
-static int ext3_symlink (struct inode * dir,
+static int ext4_symlink (struct inode * dir,
 		struct dentry *dentry, const char * symname)
 {
 	handle_t *handle;
@@ -2129,63 +2129,63 @@ static int ext3_symlink (struct inode * dir,
 		return -ENAMETOOLONG;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
-					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
-					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 +
+					2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
 	if (IS_DIRSYNC(dir))
 		handle->h_sync = 1;
 
-	inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO);
+	inode = ext4_new_inode (handle, dir, S_IFLNK|S_IRWXUGO);
 	err = PTR_ERR(inode);
 	if (IS_ERR(inode))
 		goto out_stop;
 
-	if (l > sizeof (EXT3_I(inode)->i_data)) {
-		inode->i_op = &ext3_symlink_inode_operations;
-		ext3_set_aops(inode);
+	if (l > sizeof (EXT4_I(inode)->i_data)) {
+		inode->i_op = &ext4_symlink_inode_operations;
+		ext4_set_aops(inode);
 		/*
-		 * page_symlink() calls into ext3_prepare/commit_write.
+		 * page_symlink() calls into ext4_prepare/commit_write.
 		 * We have a transaction open.  All is sweetness.  It also sets
 		 * i_size in generic_commit_write().
 		 */
 		err = __page_symlink(inode, symname, l,
 				mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
 		if (err) {
-			ext3_dec_count(handle, inode);
-			ext3_mark_inode_dirty(handle, inode);
+			ext4_dec_count(handle, inode);
+			ext4_mark_inode_dirty(handle, inode);
 			iput (inode);
 			goto out_stop;
 		}
 	} else {
-		inode->i_op = &ext3_fast_symlink_inode_operations;
-		memcpy((char*)&EXT3_I(inode)->i_data,symname,l);
+		inode->i_op = &ext4_fast_symlink_inode_operations;
+		memcpy((char*)&EXT4_I(inode)->i_data,symname,l);
 		inode->i_size = l-1;
 	}
-	EXT3_I(inode)->i_disksize = inode->i_size;
-	err = ext3_add_nondir(handle, dentry, inode);
+	EXT4_I(inode)->i_disksize = inode->i_size;
+	err = ext4_add_nondir(handle, dentry, inode);
 out_stop:
-	ext3_journal_stop(handle);
-	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+	ext4_journal_stop(handle);
+	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
 		goto retry;
 	return err;
 }
 
-static int ext3_link (struct dentry * old_dentry,
+static int ext4_link (struct dentry * old_dentry,
 		struct inode * dir, struct dentry *dentry)
 {
 	handle_t *handle;
 	struct inode *inode = old_dentry->d_inode;
 	int err, retries = 0;
 
-	if (inode->i_nlink >= EXT3_LINK_MAX)
+	if (inode->i_nlink >= EXT4_LINK_MAX)
 		return -EMLINK;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
-					EXT3_INDEX_EXTRA_TRANS_BLOCKS);
+	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+					EXT4_INDEX_EXTRA_TRANS_BLOCKS);
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -2193,31 +2193,31 @@ retry:
 		handle->h_sync = 1;
 
 	inode->i_ctime = CURRENT_TIME_SEC;
-	ext3_inc_count(handle, inode);
+	ext4_inc_count(handle, inode);
 	atomic_inc(&inode->i_count);
 
-	err = ext3_add_nondir(handle, dentry, inode);
-	ext3_journal_stop(handle);
-	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+	err = ext4_add_nondir(handle, dentry, inode);
+	ext4_journal_stop(handle);
+	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
 		goto retry;
 	return err;
 }
 
 #define PARENT_INO(buffer) \
-	((struct ext3_dir_entry_2 *) ((char *) buffer + \
-	le16_to_cpu(((struct ext3_dir_entry_2 *) buffer)->rec_len)))->inode
+	((struct ext4_dir_entry_2 *) ((char *) buffer + \
+	le16_to_cpu(((struct ext4_dir_entry_2 *) buffer)->rec_len)))->inode
 
 /*
  * Anybody can rename anything with this: the permission checks are left to the
  * higher-level routines.
  */
-static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
+static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
 			   struct inode * new_dir,struct dentry *new_dentry)
 {
 	handle_t *handle;
 	struct inode * old_inode, * new_inode;
 	struct buffer_head * old_bh, * new_bh, * dir_bh;
-	struct ext3_dir_entry_2 * old_de, * new_de;
+	struct ext4_dir_entry_2 * old_de, * new_de;
 	int retval;
 
 	old_bh = new_bh = dir_bh = NULL;
@@ -2226,16 +2226,16 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
 	 * in separate transaction */
 	if (new_dentry->d_inode)
 		DQUOT_INIT(new_dentry->d_inode);
-	handle = ext3_journal_start(old_dir, 2 *
-					EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) +
-					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
+	handle = ext4_journal_start(old_dir, 2 *
+					EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
+					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
 	if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
 		handle->h_sync = 1;
 
-	old_bh = ext3_find_entry (old_dentry, &old_de);
+	old_bh = ext4_find_entry (old_dentry, &old_de);
 	/*
 	 *  Check for inode number is _not_ due to possible IO errors.
 	 *  We might rmdir the source, keep it as pwd of some process
@@ -2248,7 +2248,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
 		goto end_rename;
 
 	new_inode = new_dentry->d_inode;
-	new_bh = ext3_find_entry (new_dentry, &new_de);
+	new_bh = ext4_find_entry (new_dentry, &new_de);
 	if (new_bh) {
 		if (!new_inode) {
 			brelse (new_bh);
@@ -2262,30 +2262,30 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
 				goto end_rename;
 		}
 		retval = -EIO;
-		dir_bh = ext3_bread (handle, old_inode, 0, 0, &retval);
+		dir_bh = ext4_bread (handle, old_inode, 0, 0, &retval);
 		if (!dir_bh)
 			goto end_rename;
 		if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
 			goto end_rename;
 		retval = -EMLINK;
 		if (!new_inode && new_dir!=old_dir &&
-				new_dir->i_nlink >= EXT3_LINK_MAX)
+				new_dir->i_nlink >= EXT4_LINK_MAX)
 			goto end_rename;
 	}
 	if (!new_bh) {
-		retval = ext3_add_entry (handle, new_dentry, old_inode);
+		retval = ext4_add_entry (handle, new_dentry, old_inode);
 		if (retval)
 			goto end_rename;
 	} else {
 		BUFFER_TRACE(new_bh, "get write access");
-		ext3_journal_get_write_access(handle, new_bh);
+		ext4_journal_get_write_access(handle, new_bh);
 		new_de->inode = cpu_to_le32(old_inode->i_ino);
-		if (EXT3_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
-					      EXT3_FEATURE_INCOMPAT_FILETYPE))
+		if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
+					      EXT4_FEATURE_INCOMPAT_FILETYPE))
 			new_de->file_type = old_de->file_type;
 		new_dir->i_version++;
-		BUFFER_TRACE(new_bh, "call ext3_journal_dirty_metadata");
-		ext3_journal_dirty_metadata(handle, new_bh);
+		BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata");
+		ext4_journal_dirty_metadata(handle, new_bh);
 		brelse(new_bh);
 		new_bh = NULL;
 	}
@@ -2295,7 +2295,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
 	 * rename.
 	 */
 	old_inode->i_ctime = CURRENT_TIME_SEC;
-	ext3_mark_inode_dirty(handle, old_inode);
+	ext4_mark_inode_dirty(handle, old_inode);
 
 	/*
 	 * ok, that's it
@@ -2303,24 +2303,24 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
 	if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
 	    old_de->name_len != old_dentry->d_name.len ||
 	    strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
-	    (retval = ext3_delete_entry(handle, old_dir,
+	    (retval = ext4_delete_entry(handle, old_dir,
 					old_de, old_bh)) == -ENOENT) {
 		/* old_de could have moved from under us during htree split, so
 		 * make sure that we are deleting the right entry.  We might
 		 * also be pointing to a stale entry in the unused part of
 		 * old_bh so just checking inum and the name isn't enough. */
 		struct buffer_head *old_bh2;
-		struct ext3_dir_entry_2 *old_de2;
+		struct ext4_dir_entry_2 *old_de2;
 
-		old_bh2 = ext3_find_entry(old_dentry, &old_de2);
+		old_bh2 = ext4_find_entry(old_dentry, &old_de2);
 		if (old_bh2) {
-			retval = ext3_delete_entry(handle, old_dir,
+			retval = ext4_delete_entry(handle, old_dir,
 						   old_de2, old_bh2);
 			brelse(old_bh2);
 		}
 	}
 	if (retval) {
-		ext3_warning(old_dir->i_sb, "ext3_rename",
+		ext4_warning(old_dir->i_sb, "ext4_rename",
 				"Deleting old file (%lu), %d, error=%d",
 				old_dir->i_ino, old_dir->i_nlink, retval);
 	}
@@ -2330,27 +2330,27 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
 		new_inode->i_ctime = CURRENT_TIME_SEC;
 	}
 	old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC;
-	ext3_update_dx_flag(old_dir);
+	ext4_update_dx_flag(old_dir);
 	if (dir_bh) {
 		BUFFER_TRACE(dir_bh, "get_write_access");
-		ext3_journal_get_write_access(handle, dir_bh);
+		ext4_journal_get_write_access(handle, dir_bh);
 		PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
-		BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata");
-		ext3_journal_dirty_metadata(handle, dir_bh);
+		BUFFER_TRACE(dir_bh, "call ext4_journal_dirty_metadata");
+		ext4_journal_dirty_metadata(handle, dir_bh);
 		drop_nlink(old_dir);
 		if (new_inode) {
 			drop_nlink(new_inode);
 		} else {
 			inc_nlink(new_dir);
-			ext3_update_dx_flag(new_dir);
-			ext3_mark_inode_dirty(handle, new_dir);
+			ext4_update_dx_flag(new_dir);
+			ext4_mark_inode_dirty(handle, new_dir);
 		}
 	}
-	ext3_mark_inode_dirty(handle, old_dir);
+	ext4_mark_inode_dirty(handle, old_dir);
 	if (new_inode) {
-		ext3_mark_inode_dirty(handle, new_inode);
+		ext4_mark_inode_dirty(handle, new_inode);
 		if (!new_inode->i_nlink)
-			ext3_orphan_add(handle, new_inode);
+			ext4_orphan_add(handle, new_inode);
 	}
 	retval = 0;
 
@@ -2358,40 +2358,40 @@ end_rename:
 	brelse (dir_bh);
 	brelse (old_bh);
 	brelse (new_bh);
-	ext3_journal_stop(handle);
+	ext4_journal_stop(handle);
 	return retval;
 }
 
 /*
  * directories can handle most operations...
  */
-struct inode_operations ext3_dir_inode_operations = {
-	.create		= ext3_create,
-	.lookup		= ext3_lookup,
-	.link		= ext3_link,
-	.unlink		= ext3_unlink,
-	.symlink	= ext3_symlink,
-	.mkdir		= ext3_mkdir,
-	.rmdir		= ext3_rmdir,
-	.mknod		= ext3_mknod,
-	.rename		= ext3_rename,
-	.setattr	= ext3_setattr,
-#ifdef CONFIG_EXT3_FS_XATTR
+struct inode_operations ext4_dir_inode_operations = {
+	.create		= ext4_create,
+	.lookup		= ext4_lookup,
+	.link		= ext4_link,
+	.unlink		= ext4_unlink,
+	.symlink	= ext4_symlink,
+	.mkdir		= ext4_mkdir,
+	.rmdir		= ext4_rmdir,
+	.mknod		= ext4_mknod,
+	.rename		= ext4_rename,
+	.setattr	= ext4_setattr,
+#ifdef CONFIG_EXT4DEV_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
-	.listxattr	= ext3_listxattr,
+	.listxattr	= ext4_listxattr,
 	.removexattr	= generic_removexattr,
 #endif
-	.permission	= ext3_permission,
+	.permission	= ext4_permission,
 };
 
-struct inode_operations ext3_special_inode_operations = {
-	.setattr	= ext3_setattr,
-#ifdef CONFIG_EXT3_FS_XATTR
+struct inode_operations ext4_special_inode_operations = {
+	.setattr	= ext4_setattr,
+#ifdef CONFIG_EXT4DEV_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
-	.listxattr	= ext3_listxattr,
+	.listxattr	= ext4_listxattr,
 	.removexattr	= generic_removexattr,
 #endif
-	.permission	= ext3_permission,
+	.permission	= ext4_permission,
 };
diff --git a/fs/ext4/namei.h b/fs/ext4/namei.h
index f2ce2b0065c9..5e4dfff36a00 100644
--- a/fs/ext4/namei.h
+++ b/fs/ext4/namei.h
@@ -1,8 +1,8 @@
-/*  linux/fs/ext3/namei.h
+/*  linux/fs/ext4/namei.h
  *
  * Copyright (C) 2005 Simtec Electronics
  *	Ben Dooks <ben@simtec.co.uk>
  *
 */
 
-extern struct dentry *ext3_get_parent(struct dentry *child);
+extern struct dentry *ext4_get_parent(struct dentry *child);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index b73cba12f79c..4a47895d9d6d 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1,7 +1,7 @@
 /*
- *  linux/fs/ext3/resize.c
+ *  linux/fs/ext4/resize.c
  *
- * Support for resizing an ext3 filesystem while it is mounted.
+ * Support for resizing an ext4 filesystem while it is mounted.
  *
  * Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com>
  *
@@ -9,11 +9,11 @@
  */
 
 
-#define EXT3FS_DEBUG
+#define EXT4FS_DEBUG
 
 #include <linux/sched.h>
 #include <linux/smp_lock.h>
-#include <linux/ext3_jbd.h>
+#include <linux/ext4_jbd.h>
 
 #include <linux/errno.h>
 #include <linux/slab.h>
@@ -23,87 +23,87 @@
 #define inside(b, first, last)	((b) >= (first) && (b) < (last))
 
 static int verify_group_input(struct super_block *sb,
-			      struct ext3_new_group_data *input)
+			      struct ext4_new_group_data *input)
 {
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
-	struct ext3_super_block *es = sbi->s_es;
-	ext3_fsblk_t start = le32_to_cpu(es->s_blocks_count);
-	ext3_fsblk_t end = start + input->blocks_count;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_super_block *es = sbi->s_es;
+	ext4_fsblk_t start = le32_to_cpu(es->s_blocks_count);
+	ext4_fsblk_t end = start + input->blocks_count;
 	unsigned group = input->group;
-	ext3_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
-	unsigned overhead = ext3_bg_has_super(sb, group) ?
-		(1 + ext3_bg_num_gdb(sb, group) +
+	ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
+	unsigned overhead = ext4_bg_has_super(sb, group) ?
+		(1 + ext4_bg_num_gdb(sb, group) +
 		 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
-	ext3_fsblk_t metaend = start + overhead;
+	ext4_fsblk_t metaend = start + overhead;
 	struct buffer_head *bh = NULL;
-	ext3_grpblk_t free_blocks_count;
+	ext4_grpblk_t free_blocks_count;
 	int err = -EINVAL;
 
 	input->free_blocks_count = free_blocks_count =
 		input->blocks_count - 2 - overhead - sbi->s_itb_per_group;
 
 	if (test_opt(sb, DEBUG))
-		printk(KERN_DEBUG "EXT3-fs: adding %s group %u: %u blocks "
+		printk(KERN_DEBUG "EXT4-fs: adding %s group %u: %u blocks "
 		       "(%d free, %u reserved)\n",
-		       ext3_bg_has_super(sb, input->group) ? "normal" :
+		       ext4_bg_has_super(sb, input->group) ? "normal" :
 		       "no-super", input->group, input->blocks_count,
 		       free_blocks_count, input->reserved_blocks);
 
 	if (group != sbi->s_groups_count)
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Cannot add at group %u (only %lu groups)",
 			     input->group, sbi->s_groups_count);
 	else if ((start - le32_to_cpu(es->s_first_data_block)) %
-		 EXT3_BLOCKS_PER_GROUP(sb))
-		ext3_warning(sb, __FUNCTION__, "Last group not full");
+		 EXT4_BLOCKS_PER_GROUP(sb))
+		ext4_warning(sb, __FUNCTION__, "Last group not full");
 	else if (input->reserved_blocks > input->blocks_count / 5)
-		ext3_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)",
+		ext4_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)",
 			     input->reserved_blocks);
 	else if (free_blocks_count < 0)
-		ext3_warning(sb, __FUNCTION__, "Bad blocks count %u",
+		ext4_warning(sb, __FUNCTION__, "Bad blocks count %u",
 			     input->blocks_count);
 	else if (!(bh = sb_bread(sb, end - 1)))
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Cannot read last block ("E3FSBLK")",
 			     end - 1);
 	else if (outside(input->block_bitmap, start, end))
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Block bitmap not in group (block %u)",
 			     input->block_bitmap);
 	else if (outside(input->inode_bitmap, start, end))
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Inode bitmap not in group (block %u)",
 			     input->inode_bitmap);
 	else if (outside(input->inode_table, start, end) ||
 	         outside(itend - 1, start, end))
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Inode table not in group (blocks %u-"E3FSBLK")",
 			     input->inode_table, itend - 1);
 	else if (input->inode_bitmap == input->block_bitmap)
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Block bitmap same as inode bitmap (%u)",
 			     input->block_bitmap);
 	else if (inside(input->block_bitmap, input->inode_table, itend))
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Block bitmap (%u) in inode table (%u-"E3FSBLK")",
 			     input->block_bitmap, input->inode_table, itend-1);
 	else if (inside(input->inode_bitmap, input->inode_table, itend))
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Inode bitmap (%u) in inode table (%u-"E3FSBLK")",
 			     input->inode_bitmap, input->inode_table, itend-1);
 	else if (inside(input->block_bitmap, start, metaend))
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Block bitmap (%u) in GDT table"
 			     " ("E3FSBLK"-"E3FSBLK")",
 			     input->block_bitmap, start, metaend - 1);
 	else if (inside(input->inode_bitmap, start, metaend))
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Inode bitmap (%u) in GDT table"
 			     " ("E3FSBLK"-"E3FSBLK")",
 			     input->inode_bitmap, start, metaend - 1);
 	else if (inside(input->inode_table, start, metaend) ||
 	         inside(itend - 1, start, metaend))
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Inode table (%u-"E3FSBLK") overlaps"
 			     "GDT table ("E3FSBLK"-"E3FSBLK")",
 			     input->inode_table, itend - 1, start, metaend - 1);
@@ -115,7 +115,7 @@ static int verify_group_input(struct super_block *sb,
 }
 
 static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
-				  ext3_fsblk_t blk)
+				  ext4_fsblk_t blk)
 {
 	struct buffer_head *bh;
 	int err;
@@ -123,7 +123,7 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
 	bh = sb_getblk(sb, blk);
 	if (!bh)
 		return ERR_PTR(-EIO);
-	if ((err = ext3_journal_get_write_access(handle, bh))) {
+	if ((err = ext4_journal_get_write_access(handle, bh))) {
 		brelse(bh);
 		bh = ERR_PTR(err);
 	} else {
@@ -148,9 +148,9 @@ static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
 	if (start_bit >= end_bit)
 		return;
 
-	ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
+	ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
 	for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
-		ext3_set_bit(i, bitmap);
+		ext4_set_bit(i, bitmap);
 	if (i < end_bit)
 		memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
 }
@@ -163,21 +163,21 @@ static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
  * If any part of this fails, we simply abort the resize.
  */
 static int setup_new_group_blocks(struct super_block *sb,
-				  struct ext3_new_group_data *input)
+				  struct ext4_new_group_data *input)
 {
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
-	ext3_fsblk_t start = ext3_group_first_block_no(sb, input->group);
-	int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	ext4_fsblk_t start = ext4_group_first_block_no(sb, input->group);
+	int reserved_gdb = ext4_bg_has_super(sb, input->group) ?
 		le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0;
-	unsigned long gdblocks = ext3_bg_num_gdb(sb, input->group);
+	unsigned long gdblocks = ext4_bg_num_gdb(sb, input->group);
 	struct buffer_head *bh;
 	handle_t *handle;
-	ext3_fsblk_t block;
-	ext3_grpblk_t bit;
+	ext4_fsblk_t block;
+	ext4_grpblk_t bit;
 	int i;
 	int err = 0, err2;
 
-	handle = ext3_journal_start_sb(sb, reserved_gdb + gdblocks +
+	handle = ext4_journal_start_sb(sb, reserved_gdb + gdblocks +
 				       2 + sbi->s_itb_per_group);
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
@@ -193,9 +193,9 @@ static int setup_new_group_blocks(struct super_block *sb,
 		goto exit_journal;
 	}
 
-	if (ext3_bg_has_super(sb, input->group)) {
-		ext3_debug("mark backup superblock %#04lx (+0)\n", start);
-		ext3_set_bit(0, bh->b_data);
+	if (ext4_bg_has_super(sb, input->group)) {
+		ext4_debug("mark backup superblock %#04lx (+0)\n", start);
+		ext4_set_bit(0, bh->b_data);
 	}
 
 	/* Copy all of the GDT blocks into the backup in this group */
@@ -203,14 +203,14 @@ static int setup_new_group_blocks(struct super_block *sb,
 	     i < gdblocks; i++, block++, bit++) {
 		struct buffer_head *gdb;
 
-		ext3_debug("update backup group %#04lx (+%d)\n", block, bit);
+		ext4_debug("update backup group %#04lx (+%d)\n", block, bit);
 
 		gdb = sb_getblk(sb, block);
 		if (!gdb) {
 			err = -EIO;
 			goto exit_bh;
 		}
-		if ((err = ext3_journal_get_write_access(handle, gdb))) {
+		if ((err = ext4_journal_get_write_access(handle, gdb))) {
 			brelse(gdb);
 			goto exit_bh;
 		}
@@ -218,8 +218,8 @@ static int setup_new_group_blocks(struct super_block *sb,
 		memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, bh->b_size);
 		set_buffer_uptodate(gdb);
 		unlock_buffer(bh);
-		ext3_journal_dirty_metadata(handle, gdb);
-		ext3_set_bit(bit, bh->b_data);
+		ext4_journal_dirty_metadata(handle, gdb);
+		ext4_set_bit(bit, bh->b_data);
 		brelse(gdb);
 	}
 
@@ -228,59 +228,59 @@ static int setup_new_group_blocks(struct super_block *sb,
 	     i < reserved_gdb; i++, block++, bit++) {
 		struct buffer_head *gdb;
 
-		ext3_debug("clear reserved block %#04lx (+%d)\n", block, bit);
+		ext4_debug("clear reserved block %#04lx (+%d)\n", block, bit);
 
 		if (IS_ERR(gdb = bclean(handle, sb, block))) {
 			err = PTR_ERR(bh);
 			goto exit_bh;
 		}
-		ext3_journal_dirty_metadata(handle, gdb);
-		ext3_set_bit(bit, bh->b_data);
+		ext4_journal_dirty_metadata(handle, gdb);
+		ext4_set_bit(bit, bh->b_data);
 		brelse(gdb);
 	}
-	ext3_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap,
+	ext4_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap,
 		   input->block_bitmap - start);
-	ext3_set_bit(input->block_bitmap - start, bh->b_data);
-	ext3_debug("mark inode bitmap %#04x (+%ld)\n", input->inode_bitmap,
+	ext4_set_bit(input->block_bitmap - start, bh->b_data);
+	ext4_debug("mark inode bitmap %#04x (+%ld)\n", input->inode_bitmap,
 		   input->inode_bitmap - start);
-	ext3_set_bit(input->inode_bitmap - start, bh->b_data);
+	ext4_set_bit(input->inode_bitmap - start, bh->b_data);
 
 	/* Zero out all of the inode table blocks */
 	for (i = 0, block = input->inode_table, bit = block - start;
 	     i < sbi->s_itb_per_group; i++, bit++, block++) {
 		struct buffer_head *it;
 
-		ext3_debug("clear inode block %#04lx (+%d)\n", block, bit);
+		ext4_debug("clear inode block %#04lx (+%d)\n", block, bit);
 		if (IS_ERR(it = bclean(handle, sb, block))) {
 			err = PTR_ERR(it);
 			goto exit_bh;
 		}
-		ext3_journal_dirty_metadata(handle, it);
+		ext4_journal_dirty_metadata(handle, it);
 		brelse(it);
-		ext3_set_bit(bit, bh->b_data);
+		ext4_set_bit(bit, bh->b_data);
 	}
-	mark_bitmap_end(input->blocks_count, EXT3_BLOCKS_PER_GROUP(sb),
+	mark_bitmap_end(input->blocks_count, EXT4_BLOCKS_PER_GROUP(sb),
 			bh->b_data);
-	ext3_journal_dirty_metadata(handle, bh);
+	ext4_journal_dirty_metadata(handle, bh);
 	brelse(bh);
 
 	/* Mark unused entries in inode bitmap used */
-	ext3_debug("clear inode bitmap %#04x (+%ld)\n",
+	ext4_debug("clear inode bitmap %#04x (+%ld)\n",
 		   input->inode_bitmap, input->inode_bitmap - start);
 	if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) {
 		err = PTR_ERR(bh);
 		goto exit_journal;
 	}
 
-	mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb),
+	mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
 			bh->b_data);
-	ext3_journal_dirty_metadata(handle, bh);
+	ext4_journal_dirty_metadata(handle, bh);
 exit_bh:
 	brelse(bh);
 
 exit_journal:
 	unlock_super(sb);
-	if ((err2 = ext3_journal_stop(handle)) && !err)
+	if ((err2 = ext4_journal_stop(handle)) && !err)
 		err = err2;
 
 	return err;
@@ -288,20 +288,20 @@ exit_journal:
 
 /*
  * Iterate through the groups which hold BACKUP superblock/GDT copies in an
- * ext3 filesystem.  The counters should be initialized to 1, 5, and 7 before
+ * ext4 filesystem.  The counters should be initialized to 1, 5, and 7 before
  * calling this for the first time.  In a sparse filesystem it will be the
  * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ...
  * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ...
  */
-static unsigned ext3_list_backups(struct super_block *sb, unsigned *three,
+static unsigned ext4_list_backups(struct super_block *sb, unsigned *three,
 				  unsigned *five, unsigned *seven)
 {
 	unsigned *min = three;
 	int mult = 3;
 	unsigned ret;
 
-	if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
-					EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
+	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
+					EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
 		ret = *min;
 		*min += 1;
 		return ret;
@@ -330,8 +330,8 @@ static unsigned ext3_list_backups(struct super_block *sb, unsigned *three,
 static int verify_reserved_gdb(struct super_block *sb,
 			       struct buffer_head *primary)
 {
-	const ext3_fsblk_t blk = primary->b_blocknr;
-	const unsigned long end = EXT3_SB(sb)->s_groups_count;
+	const ext4_fsblk_t blk = primary->b_blocknr;
+	const unsigned long end = EXT4_SB(sb)->s_groups_count;
 	unsigned three = 1;
 	unsigned five = 5;
 	unsigned seven = 7;
@@ -339,16 +339,16 @@ static int verify_reserved_gdb(struct super_block *sb,
 	__le32 *p = (__le32 *)primary->b_data;
 	int gdbackups = 0;
 
-	while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
-		if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){
-			ext3_warning(sb, __FUNCTION__,
+	while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) {
+		if (le32_to_cpu(*p++) != grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
+			ext4_warning(sb, __FUNCTION__,
 				     "reserved GDT "E3FSBLK
 				     " missing grp %d ("E3FSBLK")",
 				     blk, grp,
-				     grp * EXT3_BLOCKS_PER_GROUP(sb) + blk);
+				     grp * EXT4_BLOCKS_PER_GROUP(sb) + blk);
 			return -EINVAL;
 		}
-		if (++gdbackups > EXT3_ADDR_PER_BLOCK(sb))
+		if (++gdbackups > EXT4_ADDR_PER_BLOCK(sb))
 			return -EFBIG;
 	}
 
@@ -369,23 +369,23 @@ static int verify_reserved_gdb(struct super_block *sb,
  * fail once we start modifying the data on disk, because JBD has no rollback.
  */
 static int add_new_gdb(handle_t *handle, struct inode *inode,
-		       struct ext3_new_group_data *input,
+		       struct ext4_new_group_data *input,
 		       struct buffer_head **primary)
 {
 	struct super_block *sb = inode->i_sb;
-	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
-	unsigned long gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
-	ext3_fsblk_t gdblock = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
+	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+	unsigned long gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb);
+	ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
 	struct buffer_head **o_group_desc, **n_group_desc;
 	struct buffer_head *dind;
 	int gdbackups;
-	struct ext3_iloc iloc;
+	struct ext4_iloc iloc;
 	__le32 *data;
 	int err;
 
 	if (test_opt(sb, DEBUG))
 		printk(KERN_DEBUG
-		       "EXT3-fs: ext3_add_new_gdb: adding group block %lu\n",
+		       "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
 		       gdb_num);
 
 	/*
@@ -393,11 +393,11 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	 * because the user tools have no way of handling this.  Probably a
 	 * bad time to do it anyways.
 	 */
-	if (EXT3_SB(sb)->s_sbh->b_blocknr !=
-	    le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) {
-		ext3_warning(sb, __FUNCTION__,
+	if (EXT4_SB(sb)->s_sbh->b_blocknr !=
+	    le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
+		ext4_warning(sb, __FUNCTION__,
 			"won't resize using backup superblock at %llu",
-			(unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr);
+			(unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
 		return -EPERM;
 	}
 
@@ -410,7 +410,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 		goto exit_bh;
 	}
 
-	data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK;
+	data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK;
 	dind = sb_bread(sb, le32_to_cpu(*data));
 	if (!dind) {
 		err = -EIO;
@@ -418,32 +418,32 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	}
 
 	data = (__le32 *)dind->b_data;
-	if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) {
-		ext3_warning(sb, __FUNCTION__,
+	if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
+		ext4_warning(sb, __FUNCTION__,
 			     "new group %u GDT block "E3FSBLK" not reserved",
 			     input->group, gdblock);
 		err = -EINVAL;
 		goto exit_dind;
 	}
 
-	if ((err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh)))
+	if ((err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh)))
 		goto exit_dind;
 
-	if ((err = ext3_journal_get_write_access(handle, *primary)))
+	if ((err = ext4_journal_get_write_access(handle, *primary)))
 		goto exit_sbh;
 
-	if ((err = ext3_journal_get_write_access(handle, dind)))
+	if ((err = ext4_journal_get_write_access(handle, dind)))
 		goto exit_primary;
 
-	/* ext3_reserve_inode_write() gets a reference on the iloc */
-	if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
+	/* ext4_reserve_inode_write() gets a reference on the iloc */
+	if ((err = ext4_reserve_inode_write(handle, inode, &iloc)))
 		goto exit_dindj;
 
 	n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
 			GFP_KERNEL);
 	if (!n_group_desc) {
 		err = -ENOMEM;
-		ext3_warning (sb, __FUNCTION__,
+		ext4_warning (sb, __FUNCTION__,
 			      "not enough memory for %lu groups", gdb_num + 1);
 		goto exit_inode;
 	}
@@ -457,43 +457,43 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	 * these blocks, because they are marked as in-use from being in the
 	 * reserved inode, and will become GDT blocks (primary and backup).
 	 */
-	data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)] = 0;
-	ext3_journal_dirty_metadata(handle, dind);
+	data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0;
+	ext4_journal_dirty_metadata(handle, dind);
 	brelse(dind);
 	inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
-	ext3_mark_iloc_dirty(handle, inode, &iloc);
+	ext4_mark_iloc_dirty(handle, inode, &iloc);
 	memset((*primary)->b_data, 0, sb->s_blocksize);
-	ext3_journal_dirty_metadata(handle, *primary);
+	ext4_journal_dirty_metadata(handle, *primary);
 
-	o_group_desc = EXT3_SB(sb)->s_group_desc;
+	o_group_desc = EXT4_SB(sb)->s_group_desc;
 	memcpy(n_group_desc, o_group_desc,
-	       EXT3_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
+	       EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
 	n_group_desc[gdb_num] = *primary;
-	EXT3_SB(sb)->s_group_desc = n_group_desc;
-	EXT3_SB(sb)->s_gdb_count++;
+	EXT4_SB(sb)->s_group_desc = n_group_desc;
+	EXT4_SB(sb)->s_gdb_count++;
 	kfree(o_group_desc);
 
 	es->s_reserved_gdt_blocks =
 		cpu_to_le16(le16_to_cpu(es->s_reserved_gdt_blocks) - 1);
-	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+	ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
 
 	return 0;
 
 exit_inode:
-	//ext3_journal_release_buffer(handle, iloc.bh);
+	//ext4_journal_release_buffer(handle, iloc.bh);
 	brelse(iloc.bh);
 exit_dindj:
-	//ext3_journal_release_buffer(handle, dind);
+	//ext4_journal_release_buffer(handle, dind);
 exit_primary:
-	//ext3_journal_release_buffer(handle, *primary);
+	//ext4_journal_release_buffer(handle, *primary);
 exit_sbh:
-	//ext3_journal_release_buffer(handle, *primary);
+	//ext4_journal_release_buffer(handle, *primary);
 exit_dind:
 	brelse(dind);
 exit_bh:
 	brelse(*primary);
 
-	ext3_debug("leaving with error %d\n", err);
+	ext4_debug("leaving with error %d\n", err);
 	return err;
 }
 
@@ -511,14 +511,14 @@ exit_bh:
  * backup GDT blocks are stored in their reserved primary GDT block.
  */
 static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
-			      struct ext3_new_group_data *input)
+			      struct ext4_new_group_data *input)
 {
 	struct super_block *sb = inode->i_sb;
-	int reserved_gdb =le16_to_cpu(EXT3_SB(sb)->s_es->s_reserved_gdt_blocks);
+	int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks);
 	struct buffer_head **primary;
 	struct buffer_head *dind;
-	struct ext3_iloc iloc;
-	ext3_fsblk_t blk;
+	struct ext4_iloc iloc;
+	ext4_fsblk_t blk;
 	__le32 *data, *end;
 	int gdbackups = 0;
 	int res, i;
@@ -528,21 +528,21 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
 	if (!primary)
 		return -ENOMEM;
 
-	data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK;
+	data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK;
 	dind = sb_bread(sb, le32_to_cpu(*data));
 	if (!dind) {
 		err = -EIO;
 		goto exit_free;
 	}
 
-	blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count;
-	data = (__le32 *)dind->b_data + EXT3_SB(sb)->s_gdb_count;
-	end = (__le32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb);
+	blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count;
+	data = (__le32 *)dind->b_data + EXT4_SB(sb)->s_gdb_count;
+	end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb);
 
 	/* Get each reserved primary GDT block and verify it holds backups */
 	for (res = 0; res < reserved_gdb; res++, blk++) {
 		if (le32_to_cpu(*data) != blk) {
-			ext3_warning(sb, __FUNCTION__,
+			ext4_warning(sb, __FUNCTION__,
 				     "reserved block "E3FSBLK
 				     " not at offset %ld",
 				     blk,
@@ -565,24 +565,24 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
 	}
 
 	for (i = 0; i < reserved_gdb; i++) {
-		if ((err = ext3_journal_get_write_access(handle, primary[i]))) {
+		if ((err = ext4_journal_get_write_access(handle, primary[i]))) {
 			/*
 			int j;
 			for (j = 0; j < i; j++)
-				ext3_journal_release_buffer(handle, primary[j]);
+				ext4_journal_release_buffer(handle, primary[j]);
 			 */
 			goto exit_bh;
 		}
 	}
 
-	if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
+	if ((err = ext4_reserve_inode_write(handle, inode, &iloc)))
 		goto exit_bh;
 
 	/*
 	 * Finally we can add each of the reserved backup GDT blocks from
 	 * the new group to its reserved primary GDT block.
 	 */
-	blk = input->group * EXT3_BLOCKS_PER_GROUP(sb);
+	blk = input->group * EXT4_BLOCKS_PER_GROUP(sb);
 	for (i = 0; i < reserved_gdb; i++) {
 		int err2;
 		data = (__le32 *)primary[i]->b_data;
@@ -590,12 +590,12 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
 		       primary[i]->b_blocknr, gdbackups,
 		       blk + primary[i]->b_blocknr); */
 		data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr);
-		err2 = ext3_journal_dirty_metadata(handle, primary[i]);
+		err2 = ext4_journal_dirty_metadata(handle, primary[i]);
 		if (!err)
 			err = err2;
 	}
 	inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9;
-	ext3_mark_iloc_dirty(handle, inode, &iloc);
+	ext4_mark_iloc_dirty(handle, inode, &iloc);
 
 exit_bh:
 	while (--res >= 0)
@@ -609,7 +609,7 @@ exit_free:
 }
 
 /*
- * Update the backup copies of the ext3 metadata.  These don't need to be part
+ * Update the backup copies of the ext4 metadata.  These don't need to be part
  * of the main resize transaction, because e2fsck will re-write them if there
  * is a problem (basically only OOM will cause a problem).  However, we
  * _should_ update the backups if possible, in case the primary gets trashed
@@ -626,9 +626,9 @@ exit_free:
 static void update_backups(struct super_block *sb,
 			   int blk_off, char *data, int size)
 {
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	const unsigned long last = sbi->s_groups_count;
-	const int bpg = EXT3_BLOCKS_PER_GROUP(sb);
+	const int bpg = EXT4_BLOCKS_PER_GROUP(sb);
 	unsigned three = 1;
 	unsigned five = 5;
 	unsigned seven = 7;
@@ -637,20 +637,20 @@ static void update_backups(struct super_block *sb,
 	handle_t *handle;
 	int err = 0, err2;
 
-	handle = ext3_journal_start_sb(sb, EXT3_MAX_TRANS_DATA);
+	handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
 	if (IS_ERR(handle)) {
 		group = 1;
 		err = PTR_ERR(handle);
 		goto exit_err;
 	}
 
-	while ((group = ext3_list_backups(sb, &three, &five, &seven)) < last) {
+	while ((group = ext4_list_backups(sb, &three, &five, &seven)) < last) {
 		struct buffer_head *bh;
 
 		/* Out of journal space, and can't get more - abort - so sad */
 		if (handle->h_buffer_credits == 0 &&
-		    ext3_journal_extend(handle, EXT3_MAX_TRANS_DATA) &&
-		    (err = ext3_journal_restart(handle, EXT3_MAX_TRANS_DATA)))
+		    ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA) &&
+		    (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
 			break;
 
 		bh = sb_getblk(sb, group * bpg + blk_off);
@@ -658,9 +658,9 @@ static void update_backups(struct super_block *sb,
 			err = -EIO;
 			break;
 		}
-		ext3_debug("update metadata backup %#04lx\n",
+		ext4_debug("update metadata backup %#04lx\n",
 			  (unsigned long)bh->b_blocknr);
-		if ((err = ext3_journal_get_write_access(handle, bh)))
+		if ((err = ext4_journal_get_write_access(handle, bh)))
 			break;
 		lock_buffer(bh);
 		memcpy(bh->b_data, data, size);
@@ -668,10 +668,10 @@ static void update_backups(struct super_block *sb,
 			memset(bh->b_data + size, 0, rest);
 		set_buffer_uptodate(bh);
 		unlock_buffer(bh);
-		ext3_journal_dirty_metadata(handle, bh);
+		ext4_journal_dirty_metadata(handle, bh);
 		brelse(bh);
 	}
-	if ((err2 = ext3_journal_stop(handle)) && !err)
+	if ((err2 = ext4_journal_stop(handle)) && !err)
 		err = err2;
 
 	/*
@@ -686,11 +686,11 @@ static void update_backups(struct super_block *sb,
 	 */
 exit_err:
 	if (err) {
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "can't update backup for group %d (err %d), "
 			     "forcing fsck on next reboot", group, err);
-		sbi->s_mount_state &= ~EXT3_VALID_FS;
-		sbi->s_es->s_state &= cpu_to_le16(~EXT3_VALID_FS);
+		sbi->s_mount_state &= ~EXT4_VALID_FS;
+		sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
 		mark_buffer_dirty(sbi->s_sbh);
 	}
 }
@@ -708,51 +708,51 @@ exit_err:
  * not really "added" the group at all.  We re-check that we are still
  * adding in the last group in case things have changed since verifying.
  */
-int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
+int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 {
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
-	struct ext3_super_block *es = sbi->s_es;
-	int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_super_block *es = sbi->s_es;
+	int reserved_gdb = ext4_bg_has_super(sb, input->group) ?
 		le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
 	struct buffer_head *primary = NULL;
-	struct ext3_group_desc *gdp;
+	struct ext4_group_desc *gdp;
 	struct inode *inode = NULL;
 	handle_t *handle;
 	int gdb_off, gdb_num;
 	int err, err2;
 
-	gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
-	gdb_off = input->group % EXT3_DESC_PER_BLOCK(sb);
+	gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb);
+	gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb);
 
-	if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb,
-					EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
-		ext3_warning(sb, __FUNCTION__,
+	if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb,
+					EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
+		ext4_warning(sb, __FUNCTION__,
 			     "Can't resize non-sparse filesystem further");
 		return -EPERM;
 	}
 
 	if (le32_to_cpu(es->s_blocks_count) + input->blocks_count <
 	    le32_to_cpu(es->s_blocks_count)) {
-		ext3_warning(sb, __FUNCTION__, "blocks_count overflow\n");
+		ext4_warning(sb, __FUNCTION__, "blocks_count overflow\n");
 		return -EINVAL;
 	}
 
-	if (le32_to_cpu(es->s_inodes_count) + EXT3_INODES_PER_GROUP(sb) <
+	if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) <
 	    le32_to_cpu(es->s_inodes_count)) {
-		ext3_warning(sb, __FUNCTION__, "inodes_count overflow\n");
+		ext4_warning(sb, __FUNCTION__, "inodes_count overflow\n");
 		return -EINVAL;
 	}
 
 	if (reserved_gdb || gdb_off == 0) {
-		if (!EXT3_HAS_COMPAT_FEATURE(sb,
-					     EXT3_FEATURE_COMPAT_RESIZE_INODE)){
-			ext3_warning(sb, __FUNCTION__,
+		if (!EXT4_HAS_COMPAT_FEATURE(sb,
+					     EXT4_FEATURE_COMPAT_RESIZE_INODE)){
+			ext4_warning(sb, __FUNCTION__,
 				     "No reserved GDT blocks, can't resize");
 			return -EPERM;
 		}
-		inode = iget(sb, EXT3_RESIZE_INO);
+		inode = iget(sb, EXT4_RESIZE_INO);
 		if (!inode || is_bad_inode(inode)) {
-			ext3_warning(sb, __FUNCTION__,
+			ext4_warning(sb, __FUNCTION__,
 				     "Error opening resize inode");
 			iput(inode);
 			return -ENOENT;
@@ -772,8 +772,8 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	 * are adding a group with superblock/GDT backups  we will also
 	 * modify each of the reserved GDT dindirect blocks.
 	 */
-	handle = ext3_journal_start_sb(sb,
-				       ext3_bg_has_super(sb, input->group) ?
+	handle = ext4_journal_start_sb(sb,
+				       ext4_bg_has_super(sb, input->group) ?
 				       3 + reserved_gdb : 4);
 	if (IS_ERR(handle)) {
 		err = PTR_ERR(handle);
@@ -782,13 +782,13 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 
 	lock_super(sb);
 	if (input->group != sbi->s_groups_count) {
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "multiple resizers run on filesystem!");
 		err = -EBUSY;
 		goto exit_journal;
 	}
 
-	if ((err = ext3_journal_get_write_access(handle, sbi->s_sbh)))
+	if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh)))
 		goto exit_journal;
 
 	/*
@@ -799,10 +799,10 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	 */
 	if (gdb_off) {
 		primary = sbi->s_group_desc[gdb_num];
-		if ((err = ext3_journal_get_write_access(handle, primary)))
+		if ((err = ext4_journal_get_write_access(handle, primary)))
 			goto exit_journal;
 
-		if (reserved_gdb && ext3_bg_num_gdb(sb, input->group) &&
+		if (reserved_gdb && ext4_bg_num_gdb(sb, input->group) &&
 		    (err = reserve_backup_gdb(handle, inode, input)))
 			goto exit_journal;
 	} else if ((err = add_new_gdb(handle, inode, input, &primary)))
@@ -828,13 +828,13 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	 */
 
 	/* Update group descriptor block for new group */
-	gdp = (struct ext3_group_desc *)primary->b_data + gdb_off;
+	gdp = (struct ext4_group_desc *)primary->b_data + gdb_off;
 
 	gdp->bg_block_bitmap = cpu_to_le32(input->block_bitmap);
 	gdp->bg_inode_bitmap = cpu_to_le32(input->inode_bitmap);
 	gdp->bg_inode_table = cpu_to_le32(input->inode_table);
 	gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
-	gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb));
+	gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
 
 	/*
 	 * Make the new blocks and inodes valid next.  We do this before
@@ -849,7 +849,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	es->s_blocks_count = cpu_to_le32(le32_to_cpu(es->s_blocks_count) +
 		input->blocks_count);
 	es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) +
-		EXT3_INODES_PER_GROUP(sb));
+		EXT4_INODES_PER_GROUP(sb));
 
 	/*
 	 * We need to protect s_groups_count against other CPUs seeing
@@ -878,7 +878,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	/* Update the global fs size fields */
 	sbi->s_groups_count++;
 
-	ext3_journal_dirty_metadata(handle, primary);
+	ext4_journal_dirty_metadata(handle, primary);
 
 	/* Update the reserved block counts only once the new group is
 	 * active. */
@@ -889,42 +889,42 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	percpu_counter_mod(&sbi->s_freeblocks_counter,
 			   input->free_blocks_count);
 	percpu_counter_mod(&sbi->s_freeinodes_counter,
-			   EXT3_INODES_PER_GROUP(sb));
+			   EXT4_INODES_PER_GROUP(sb));
 
-	ext3_journal_dirty_metadata(handle, sbi->s_sbh);
+	ext4_journal_dirty_metadata(handle, sbi->s_sbh);
 	sb->s_dirt = 1;
 
 exit_journal:
 	unlock_super(sb);
-	if ((err2 = ext3_journal_stop(handle)) && !err)
+	if ((err2 = ext4_journal_stop(handle)) && !err)
 		err = err2;
 	if (!err) {
 		update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
-			       sizeof(struct ext3_super_block));
+			       sizeof(struct ext4_super_block));
 		update_backups(sb, primary->b_blocknr, primary->b_data,
 			       primary->b_size);
 	}
 exit_put:
 	iput(inode);
 	return err;
-} /* ext3_group_add */
+} /* ext4_group_add */
 
 /* Extend the filesystem to the new number of blocks specified.  This entry
  * point is only used to extend the current filesystem to the end of the last
  * existing group.  It can be accessed via ioctl, or by "remount,resize=<size>"
  * for emergencies (because it has no dependencies on reserved blocks).
  *
- * If we _really_ wanted, we could use default values to call ext3_group_add()
+ * If we _really_ wanted, we could use default values to call ext4_group_add()
  * allow the "remount" trick to work for arbitrary resizing, assuming enough
  * GDT blocks are reserved to grow to the desired size.
  */
-int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
-		      ext3_fsblk_t n_blocks_count)
+int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
+		      ext4_fsblk_t n_blocks_count)
 {
-	ext3_fsblk_t o_blocks_count;
+	ext4_fsblk_t o_blocks_count;
 	unsigned long o_groups_count;
-	ext3_grpblk_t last;
-	ext3_grpblk_t add;
+	ext4_grpblk_t last;
+	ext4_grpblk_t add;
 	struct buffer_head * bh;
 	handle_t *handle;
 	int err;
@@ -934,45 +934,45 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 	 * yet: we're going to revalidate es->s_blocks_count after
 	 * taking lock_super() below. */
 	o_blocks_count = le32_to_cpu(es->s_blocks_count);
-	o_groups_count = EXT3_SB(sb)->s_groups_count;
+	o_groups_count = EXT4_SB(sb)->s_groups_count;
 
 	if (test_opt(sb, DEBUG))
-		printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK" uto "E3FSBLK" blocks\n",
+		printk(KERN_DEBUG "EXT4-fs: extending last group from "E3FSBLK" uto "E3FSBLK" blocks\n",
 		       o_blocks_count, n_blocks_count);
 
 	if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
 		return 0;
 
 	if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
-		printk(KERN_ERR "EXT3-fs: filesystem on %s:"
+		printk(KERN_ERR "EXT4-fs: filesystem on %s:"
 			" too large to resize to %lu blocks safely\n",
 			sb->s_id, n_blocks_count);
 		if (sizeof(sector_t) < 8)
-			ext3_warning(sb, __FUNCTION__,
+			ext4_warning(sb, __FUNCTION__,
 			"CONFIG_LBD not enabled\n");
 		return -EINVAL;
 	}
 
 	if (n_blocks_count < o_blocks_count) {
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "can't shrink FS - resize aborted");
 		return -EBUSY;
 	}
 
 	/* Handle the remaining blocks in the last group only. */
 	last = (o_blocks_count - le32_to_cpu(es->s_first_data_block)) %
-		EXT3_BLOCKS_PER_GROUP(sb);
+		EXT4_BLOCKS_PER_GROUP(sb);
 
 	if (last == 0) {
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "need to use ext2online to resize further");
 		return -EPERM;
 	}
 
-	add = EXT3_BLOCKS_PER_GROUP(sb) - last;
+	add = EXT4_BLOCKS_PER_GROUP(sb) - last;
 
 	if (o_blocks_count + add < o_blocks_count) {
-		ext3_warning(sb, __FUNCTION__, "blocks_count overflow");
+		ext4_warning(sb, __FUNCTION__, "blocks_count overflow");
 		return -EINVAL;
 	}
 
@@ -980,7 +980,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 		add = n_blocks_count - o_blocks_count;
 
 	if (o_blocks_count + add < n_blocks_count)
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "will only finish group ("E3FSBLK
 			     " blocks, %u new)",
 			     o_blocks_count + add, add);
@@ -988,55 +988,55 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 	/* See if the device is actually as big as what was requested */
 	bh = sb_bread(sb, o_blocks_count + add -1);
 	if (!bh) {
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "can't read last block, resize aborted");
 		return -ENOSPC;
 	}
 	brelse(bh);
 
 	/* We will update the superblock, one block bitmap, and
-	 * one group descriptor via ext3_free_blocks().
+	 * one group descriptor via ext4_free_blocks().
 	 */
-	handle = ext3_journal_start_sb(sb, 3);
+	handle = ext4_journal_start_sb(sb, 3);
 	if (IS_ERR(handle)) {
 		err = PTR_ERR(handle);
-		ext3_warning(sb, __FUNCTION__, "error %d on journal start",err);
+		ext4_warning(sb, __FUNCTION__, "error %d on journal start",err);
 		goto exit_put;
 	}
 
 	lock_super(sb);
 	if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "multiple resizers run on filesystem!");
 		unlock_super(sb);
 		err = -EBUSY;
 		goto exit_put;
 	}
 
-	if ((err = ext3_journal_get_write_access(handle,
-						 EXT3_SB(sb)->s_sbh))) {
-		ext3_warning(sb, __FUNCTION__,
+	if ((err = ext4_journal_get_write_access(handle,
+						 EXT4_SB(sb)->s_sbh))) {
+		ext4_warning(sb, __FUNCTION__,
 			     "error %d on journal write access", err);
 		unlock_super(sb);
-		ext3_journal_stop(handle);
+		ext4_journal_stop(handle);
 		goto exit_put;
 	}
 	es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
-	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+	ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
 	sb->s_dirt = 1;
 	unlock_super(sb);
-	ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
+	ext4_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
 		   o_blocks_count + add);
-	ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
-	ext3_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n", o_blocks_count,
+	ext4_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
+	ext4_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n", o_blocks_count,
 		   o_blocks_count + add);
-	if ((err = ext3_journal_stop(handle)))
+	if ((err = ext4_journal_stop(handle)))
 		goto exit_put;
 	if (test_opt(sb, DEBUG))
-		printk(KERN_DEBUG "EXT3-fs: extended group to %u blocks\n",
+		printk(KERN_DEBUG "EXT4-fs: extended group to %u blocks\n",
 		       le32_to_cpu(es->s_blocks_count));
-	update_backups(sb, EXT3_SB(sb)->s_sbh->b_blocknr, (char *)es,
-		       sizeof(struct ext3_super_block));
+	update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es,
+		       sizeof(struct ext4_super_block));
 exit_put:
 	return err;
-} /* ext3_group_extend */
+} /* ext4_group_extend */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8bfd56ef18ca..9e32a2a8d286 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/ext3/super.c
+ *  linux/fs/ext4/super.c
  *
  * Copyright (C) 1992, 1993, 1994, 1995
  * Remy Card (card@masi.ibp.fr)
@@ -21,8 +21,8 @@
 #include <linux/fs.h>
 #include <linux/time.h>
 #include <linux/jbd.h>
-#include <linux/ext3_fs.h>
-#include <linux/ext3_jbd.h>
+#include <linux/ext4_fs.h>
+#include <linux/ext4_jbd.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/blkdev.h>
@@ -42,25 +42,25 @@
 #include "acl.h"
 #include "namei.h"
 
-static int ext3_load_journal(struct super_block *, struct ext3_super_block *,
+static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
 			     unsigned long journal_devnum);
-static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
+static int ext4_create_journal(struct super_block *, struct ext4_super_block *,
 			       unsigned int);
-static void ext3_commit_super (struct super_block * sb,
-			       struct ext3_super_block * es,
+static void ext4_commit_super (struct super_block * sb,
+			       struct ext4_super_block * es,
 			       int sync);
-static void ext3_mark_recovery_complete(struct super_block * sb,
-					struct ext3_super_block * es);
-static void ext3_clear_journal_err(struct super_block * sb,
-				   struct ext3_super_block * es);
-static int ext3_sync_fs(struct super_block *sb, int wait);
-static const char *ext3_decode_error(struct super_block * sb, int errno,
+static void ext4_mark_recovery_complete(struct super_block * sb,
+					struct ext4_super_block * es);
+static void ext4_clear_journal_err(struct super_block * sb,
+				   struct ext4_super_block * es);
+static int ext4_sync_fs(struct super_block *sb, int wait);
+static const char *ext4_decode_error(struct super_block * sb, int errno,
 				     char nbuf[16]);
-static int ext3_remount (struct super_block * sb, int * flags, char * data);
-static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf);
-static void ext3_unlockfs(struct super_block *sb);
-static void ext3_write_super (struct super_block * sb);
-static void ext3_write_super_lockfs(struct super_block *sb);
+static int ext4_remount (struct super_block * sb, int * flags, char * data);
+static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf);
+static void ext4_unlockfs(struct super_block *sb);
+static void ext4_write_super (struct super_block * sb);
+static void ext4_write_super_lockfs(struct super_block *sb);
 
 /*
  * Wrappers for journal_start/end.
@@ -70,7 +70,7 @@ static void ext3_write_super_lockfs(struct super_block *sb);
  * that sync() will call the filesystem's write_super callback if
  * appropriate.
  */
-handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
+handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
 {
 	journal_t *journal;
 
@@ -80,9 +80,9 @@ handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
 	/* Special case here: if the journal has aborted behind our
 	 * backs (eg. EIO in the commit thread), then we still need to
 	 * take the FS itself readonly cleanly. */
-	journal = EXT3_SB(sb)->s_journal;
+	journal = EXT4_SB(sb)->s_journal;
 	if (is_journal_aborted(journal)) {
-		ext3_abort(sb, __FUNCTION__,
+		ext4_abort(sb, __FUNCTION__,
 			   "Detected aborted journal");
 		return ERR_PTR(-EROFS);
 	}
@@ -96,7 +96,7 @@ handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
  * that sync() will call the filesystem's write_super callback if
  * appropriate.
  */
-int __ext3_journal_stop(const char *where, handle_t *handle)
+int __ext4_journal_stop(const char *where, handle_t *handle)
 {
 	struct super_block *sb;
 	int err;
@@ -109,15 +109,15 @@ int __ext3_journal_stop(const char *where, handle_t *handle)
 	if (!err)
 		err = rc;
 	if (err)
-		__ext3_std_error(sb, where, err);
+		__ext4_std_error(sb, where, err);
 	return err;
 }
 
-void ext3_journal_abort_handle(const char *caller, const char *err_fn,
+void ext4_journal_abort_handle(const char *caller, const char *err_fn,
 		struct buffer_head *bh, handle_t *handle, int err)
 {
 	char nbuf[16];
-	const char *errstr = ext3_decode_error(NULL, err, nbuf);
+	const char *errstr = ext4_decode_error(NULL, err, nbuf);
 
 	if (bh)
 		BUFFER_TRACE(bh, "abort");
@@ -138,7 +138,7 @@ void ext3_journal_abort_handle(const char *caller, const char *err_fn,
  * inconsistencies detected or read IO failures.
  *
  * On ext2, we can store the error state of the filesystem in the
- * superblock.  That is not possible on ext3, because we may have other
+ * superblock.  That is not possible on ext4, because we may have other
  * write ordering constraints on the superblock which prevent us from
  * writing it out straight away; and given that the journal is about to
  * be aborted, we can't rely on the current, or future, transactions to
@@ -149,20 +149,20 @@ void ext3_journal_abort_handle(const char *caller, const char *err_fn,
  * that error until we've noted it down and cleared it.
  */
 
-static void ext3_handle_error(struct super_block *sb)
+static void ext4_handle_error(struct super_block *sb)
 {
-	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
+	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 
-	EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
-	es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
+	EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+	es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
 
 	if (sb->s_flags & MS_RDONLY)
 		return;
 
 	if (!test_opt (sb, ERRORS_CONT)) {
-		journal_t *journal = EXT3_SB(sb)->s_journal;
+		journal_t *journal = EXT4_SB(sb)->s_journal;
 
-		EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
+		EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
 		if (journal)
 			journal_abort(journal, -EIO);
 	}
@@ -170,27 +170,27 @@ static void ext3_handle_error(struct super_block *sb)
 		printk (KERN_CRIT "Remounting filesystem read-only\n");
 		sb->s_flags |= MS_RDONLY;
 	}
-	ext3_commit_super(sb, es, 1);
+	ext4_commit_super(sb, es, 1);
 	if (test_opt(sb, ERRORS_PANIC))
-		panic("EXT3-fs (device %s): panic forced after error\n",
+		panic("EXT4-fs (device %s): panic forced after error\n",
 			sb->s_id);
 }
 
-void ext3_error (struct super_block * sb, const char * function,
+void ext4_error (struct super_block * sb, const char * function,
 		 const char * fmt, ...)
 {
 	va_list args;
 
 	va_start(args, fmt);
-	printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
+	printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function);
 	vprintk(fmt, args);
 	printk("\n");
 	va_end(args);
 
-	ext3_handle_error(sb);
+	ext4_handle_error(sb);
 }
 
-static const char *ext3_decode_error(struct super_block * sb, int errno,
+static const char *ext4_decode_error(struct super_block * sb, int errno,
 				     char nbuf[16])
 {
 	char *errstr = NULL;
@@ -203,7 +203,7 @@ static const char *ext3_decode_error(struct super_block * sb, int errno,
 		errstr = "Out of memory";
 		break;
 	case -EROFS:
-		if (!sb || EXT3_SB(sb)->s_journal->j_flags & JFS_ABORT)
+		if (!sb || EXT4_SB(sb)->s_journal->j_flags & JFS_ABORT)
 			errstr = "Journal has aborted";
 		else
 			errstr = "Readonly filesystem";
@@ -223,10 +223,10 @@ static const char *ext3_decode_error(struct super_block * sb, int errno,
 	return errstr;
 }
 
-/* __ext3_std_error decodes expected errors from journaling functions
+/* __ext4_std_error decodes expected errors from journaling functions
  * automatically and invokes the appropriate error response.  */
 
-void __ext3_std_error (struct super_block * sb, const char * function,
+void __ext4_std_error (struct super_block * sb, const char * function,
 		       int errno)
 {
 	char nbuf[16];
@@ -239,15 +239,15 @@ void __ext3_std_error (struct super_block * sb, const char * function,
 	    (sb->s_flags & MS_RDONLY))
 		return;
 
-	errstr = ext3_decode_error(sb, errno, nbuf);
-	printk (KERN_CRIT "EXT3-fs error (device %s) in %s: %s\n",
+	errstr = ext4_decode_error(sb, errno, nbuf);
+	printk (KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
 		sb->s_id, function, errstr);
 
-	ext3_handle_error(sb);
+	ext4_handle_error(sb);
 }
 
 /*
- * ext3_abort is a much stronger failure handler than ext3_error.  The
+ * ext4_abort is a much stronger failure handler than ext4_error.  The
  * abort function may be used to deal with unrecoverable failures such
  * as journal IO errors or ENOMEM at a critical moment in log management.
  *
@@ -256,60 +256,60 @@ void __ext3_std_error (struct super_block * sb, const char * function,
  * case we take the easy way out and panic immediately.
  */
 
-void ext3_abort (struct super_block * sb, const char * function,
+void ext4_abort (struct super_block * sb, const char * function,
 		 const char * fmt, ...)
 {
 	va_list args;
 
-	printk (KERN_CRIT "ext3_abort called.\n");
+	printk (KERN_CRIT "ext4_abort called.\n");
 
 	va_start(args, fmt);
-	printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
+	printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function);
 	vprintk(fmt, args);
 	printk("\n");
 	va_end(args);
 
 	if (test_opt(sb, ERRORS_PANIC))
-		panic("EXT3-fs panic from previous error\n");
+		panic("EXT4-fs panic from previous error\n");
 
 	if (sb->s_flags & MS_RDONLY)
 		return;
 
 	printk(KERN_CRIT "Remounting filesystem read-only\n");
-	EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
+	EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 	sb->s_flags |= MS_RDONLY;
-	EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
-	journal_abort(EXT3_SB(sb)->s_journal, -EIO);
+	EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
+	journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 }
 
-void ext3_warning (struct super_block * sb, const char * function,
+void ext4_warning (struct super_block * sb, const char * function,
 		   const char * fmt, ...)
 {
 	va_list args;
 
 	va_start(args, fmt);
-	printk(KERN_WARNING "EXT3-fs warning (device %s): %s: ",
+	printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ",
 	       sb->s_id, function);
 	vprintk(fmt, args);
 	printk("\n");
 	va_end(args);
 }
 
-void ext3_update_dynamic_rev(struct super_block *sb)
+void ext4_update_dynamic_rev(struct super_block *sb)
 {
-	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
+	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 
-	if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV)
+	if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
 		return;
 
-	ext3_warning(sb, __FUNCTION__,
+	ext4_warning(sb, __FUNCTION__,
 		     "updating to rev %d because of new feature flag, "
 		     "running e2fsck is recommended",
-		     EXT3_DYNAMIC_REV);
+		     EXT4_DYNAMIC_REV);
 
-	es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO);
-	es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE);
-	es->s_rev_level = cpu_to_le32(EXT3_DYNAMIC_REV);
+	es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
+	es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
+	es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
 	/* leave es->s_feature_*compat flags alone */
 	/* es->s_uuid will be set by e2fsck if empty */
 
@@ -323,7 +323,7 @@ void ext3_update_dynamic_rev(struct super_block *sb)
 /*
  * Open the external journal device
  */
-static struct block_device *ext3_blkdev_get(dev_t dev)
+static struct block_device *ext4_blkdev_get(dev_t dev)
 {
 	struct block_device *bdev;
 	char b[BDEVNAME_SIZE];
@@ -334,7 +334,7 @@ static struct block_device *ext3_blkdev_get(dev_t dev)
 	return bdev;
 
 fail:
-	printk(KERN_ERR "EXT3: failed to open journal device %s: %ld\n",
+	printk(KERN_ERR "EXT4: failed to open journal device %s: %ld\n",
 			__bdevname(dev, b), PTR_ERR(bdev));
 	return NULL;
 }
@@ -342,20 +342,20 @@ fail:
 /*
  * Release the journal device
  */
-static int ext3_blkdev_put(struct block_device *bdev)
+static int ext4_blkdev_put(struct block_device *bdev)
 {
 	bd_release(bdev);
 	return blkdev_put(bdev);
 }
 
-static int ext3_blkdev_remove(struct ext3_sb_info *sbi)
+static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
 {
 	struct block_device *bdev;
 	int ret = -ENODEV;
 
 	bdev = sbi->journal_bdev;
 	if (bdev) {
-		ret = ext3_blkdev_put(bdev);
+		ret = ext4_blkdev_put(bdev);
 		sbi->journal_bdev = NULL;
 	}
 	return ret;
@@ -363,10 +363,10 @@ static int ext3_blkdev_remove(struct ext3_sb_info *sbi)
 
 static inline struct inode *orphan_list_entry(struct list_head *l)
 {
-	return &list_entry(l, struct ext3_inode_info, i_orphan)->vfs_inode;
+	return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
 }
 
-static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi)
+static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
 {
 	struct list_head *l;
 
@@ -384,20 +384,20 @@ static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi)
 	}
 }
 
-static void ext3_put_super (struct super_block * sb)
+static void ext4_put_super (struct super_block * sb)
 {
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
-	struct ext3_super_block *es = sbi->s_es;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_super_block *es = sbi->s_es;
 	int i;
 
-	ext3_xattr_put_super(sb);
+	ext4_xattr_put_super(sb);
 	journal_destroy(sbi->s_journal);
 	if (!(sb->s_flags & MS_RDONLY)) {
-		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 		es->s_state = cpu_to_le16(sbi->s_mount_state);
 		BUFFER_TRACE(sbi->s_sbh, "marking dirty");
 		mark_buffer_dirty(sbi->s_sbh);
-		ext3_commit_super(sb, es, 1);
+		ext4_commit_super(sb, es, 1);
 	}
 
 	for (i = 0; i < sbi->s_gdb_count; i++)
@@ -429,47 +429,47 @@ static void ext3_put_super (struct super_block * sb)
 		 */
 		sync_blockdev(sbi->journal_bdev);
 		invalidate_bdev(sbi->journal_bdev, 0);
-		ext3_blkdev_remove(sbi);
+		ext4_blkdev_remove(sbi);
 	}
 	sb->s_fs_info = NULL;
 	kfree(sbi);
 	return;
 }
 
-static kmem_cache_t *ext3_inode_cachep;
+static kmem_cache_t *ext4_inode_cachep;
 
 /*
  * Called inside transaction, so use GFP_NOFS
  */
-static struct inode *ext3_alloc_inode(struct super_block *sb)
+static struct inode *ext4_alloc_inode(struct super_block *sb)
 {
-	struct ext3_inode_info *ei;
+	struct ext4_inode_info *ei;
 
-	ei = kmem_cache_alloc(ext3_inode_cachep, SLAB_NOFS);
+	ei = kmem_cache_alloc(ext4_inode_cachep, SLAB_NOFS);
 	if (!ei)
 		return NULL;
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-	ei->i_acl = EXT3_ACL_NOT_CACHED;
-	ei->i_default_acl = EXT3_ACL_NOT_CACHED;
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+	ei->i_acl = EXT4_ACL_NOT_CACHED;
+	ei->i_default_acl = EXT4_ACL_NOT_CACHED;
 #endif
 	ei->i_block_alloc_info = NULL;
 	ei->vfs_inode.i_version = 1;
 	return &ei->vfs_inode;
 }
 
-static void ext3_destroy_inode(struct inode *inode)
+static void ext4_destroy_inode(struct inode *inode)
 {
-	kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
+	kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
 }
 
 static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 {
-	struct ext3_inode_info *ei = (struct ext3_inode_info *) foo;
+	struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
 
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 	    SLAB_CTOR_CONSTRUCTOR) {
 		INIT_LIST_HEAD(&ei->i_orphan);
-#ifdef CONFIG_EXT3_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
 		init_rwsem(&ei->xattr_sem);
 #endif
 		mutex_init(&ei->truncate_mutex);
@@ -479,46 +479,46 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 
 static int init_inodecache(void)
 {
-	ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
-					     sizeof(struct ext3_inode_info),
+	ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
+					     sizeof(struct ext4_inode_info),
 					     0, (SLAB_RECLAIM_ACCOUNT|
 						SLAB_MEM_SPREAD),
 					     init_once, NULL);
-	if (ext3_inode_cachep == NULL)
+	if (ext4_inode_cachep == NULL)
 		return -ENOMEM;
 	return 0;
 }
 
 static void destroy_inodecache(void)
 {
-	kmem_cache_destroy(ext3_inode_cachep);
+	kmem_cache_destroy(ext4_inode_cachep);
 }
 
-static void ext3_clear_inode(struct inode *inode)
+static void ext4_clear_inode(struct inode *inode)
 {
-	struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-	if (EXT3_I(inode)->i_acl &&
-			EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) {
-		posix_acl_release(EXT3_I(inode)->i_acl);
-		EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED;
-	}
-	if (EXT3_I(inode)->i_default_acl &&
-			EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) {
-		posix_acl_release(EXT3_I(inode)->i_default_acl);
-		EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED;
+	struct ext4_block_alloc_info *rsv = EXT4_I(inode)->i_block_alloc_info;
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+	if (EXT4_I(inode)->i_acl &&
+			EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) {
+		posix_acl_release(EXT4_I(inode)->i_acl);
+		EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED;
+	}
+	if (EXT4_I(inode)->i_default_acl &&
+			EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) {
+		posix_acl_release(EXT4_I(inode)->i_default_acl);
+		EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED;
 	}
 #endif
-	ext3_discard_reservation(inode);
-	EXT3_I(inode)->i_block_alloc_info = NULL;
+	ext4_discard_reservation(inode);
+	EXT4_I(inode)->i_block_alloc_info = NULL;
 	if (unlikely(rsv))
 		kfree(rsv);
 }
 
-static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb)
+static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb)
 {
 #if defined(CONFIG_QUOTA)
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
 	if (sbi->s_jquota_fmt)
 		seq_printf(seq, ",jqfmt=%s",
@@ -530,32 +530,32 @@ static inline void ext3_show_quota_options(struct seq_file *seq, struct super_bl
 	if (sbi->s_qf_names[GRPQUOTA])
 		seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
 
-	if (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA)
+	if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA)
 		seq_puts(seq, ",usrquota");
 
-	if (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)
+	if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)
 		seq_puts(seq, ",grpquota");
 #endif
 }
 
-static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
+static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
 	struct super_block *sb = vfs->mnt_sb;
 
-	if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA)
+	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
 		seq_puts(seq, ",data=journal");
-	else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA)
+	else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
 		seq_puts(seq, ",data=ordered");
-	else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
+	else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
 		seq_puts(seq, ",data=writeback");
 
-	ext3_show_quota_options(seq, sb);
+	ext4_show_quota_options(seq, sb);
 
 	return 0;
 }
 
 
-static struct dentry *ext3_get_dentry(struct super_block *sb, void *vobjp)
+static struct dentry *ext4_get_dentry(struct super_block *sb, void *vobjp)
 {
 	__u32 *objp = vobjp;
 	unsigned long ino = objp[0];
@@ -563,14 +563,14 @@ static struct dentry *ext3_get_dentry(struct super_block *sb, void *vobjp)
 	struct inode *inode;
 	struct dentry *result;
 
-	if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO)
+	if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
 		return ERR_PTR(-ESTALE);
-	if (ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count))
+	if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
 		return ERR_PTR(-ESTALE);
 
 	/* iget isn't really right if the inode is currently unallocated!!
 	 *
-	 * ext3_read_inode will return a bad_inode if the inode had been
+	 * ext4_read_inode will return a bad_inode if the inode had been
 	 * deleted, so we should be safe.
 	 *
 	 * Currently we don't know the generation for parent directory, so
@@ -599,37 +599,37 @@ static struct dentry *ext3_get_dentry(struct super_block *sb, void *vobjp)
 #define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
 #define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
 
-static int ext3_dquot_initialize(struct inode *inode, int type);
-static int ext3_dquot_drop(struct inode *inode);
-static int ext3_write_dquot(struct dquot *dquot);
-static int ext3_acquire_dquot(struct dquot *dquot);
-static int ext3_release_dquot(struct dquot *dquot);
-static int ext3_mark_dquot_dirty(struct dquot *dquot);
-static int ext3_write_info(struct super_block *sb, int type);
-static int ext3_quota_on(struct super_block *sb, int type, int format_id, char *path);
-static int ext3_quota_on_mount(struct super_block *sb, int type);
-static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
+static int ext4_dquot_initialize(struct inode *inode, int type);
+static int ext4_dquot_drop(struct inode *inode);
+static int ext4_write_dquot(struct dquot *dquot);
+static int ext4_acquire_dquot(struct dquot *dquot);
+static int ext4_release_dquot(struct dquot *dquot);
+static int ext4_mark_dquot_dirty(struct dquot *dquot);
+static int ext4_write_info(struct super_block *sb, int type);
+static int ext4_quota_on(struct super_block *sb, int type, int format_id, char *path);
+static int ext4_quota_on_mount(struct super_block *sb, int type);
+static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
 			       size_t len, loff_t off);
-static ssize_t ext3_quota_write(struct super_block *sb, int type,
+static ssize_t ext4_quota_write(struct super_block *sb, int type,
 				const char *data, size_t len, loff_t off);
 
-static struct dquot_operations ext3_quota_operations = {
-	.initialize	= ext3_dquot_initialize,
-	.drop		= ext3_dquot_drop,
+static struct dquot_operations ext4_quota_operations = {
+	.initialize	= ext4_dquot_initialize,
+	.drop		= ext4_dquot_drop,
 	.alloc_space	= dquot_alloc_space,
 	.alloc_inode	= dquot_alloc_inode,
 	.free_space	= dquot_free_space,
 	.free_inode	= dquot_free_inode,
 	.transfer	= dquot_transfer,
-	.write_dquot	= ext3_write_dquot,
-	.acquire_dquot	= ext3_acquire_dquot,
-	.release_dquot	= ext3_release_dquot,
-	.mark_dirty	= ext3_mark_dquot_dirty,
-	.write_info	= ext3_write_info
+	.write_dquot	= ext4_write_dquot,
+	.acquire_dquot	= ext4_acquire_dquot,
+	.release_dquot	= ext4_release_dquot,
+	.mark_dirty	= ext4_mark_dquot_dirty,
+	.write_info	= ext4_write_info
 };
 
-static struct quotactl_ops ext3_qctl_operations = {
-	.quota_on	= ext3_quota_on,
+static struct quotactl_ops ext4_qctl_operations = {
+	.quota_on	= ext4_quota_on,
 	.quota_off	= vfs_quota_off,
 	.quota_sync	= vfs_quota_sync,
 	.get_info	= vfs_get_dqinfo,
@@ -639,31 +639,31 @@ static struct quotactl_ops ext3_qctl_operations = {
 };
 #endif
 
-static struct super_operations ext3_sops = {
-	.alloc_inode	= ext3_alloc_inode,
-	.destroy_inode	= ext3_destroy_inode,
-	.read_inode	= ext3_read_inode,
-	.write_inode	= ext3_write_inode,
-	.dirty_inode	= ext3_dirty_inode,
-	.delete_inode	= ext3_delete_inode,
-	.put_super	= ext3_put_super,
-	.write_super	= ext3_write_super,
-	.sync_fs	= ext3_sync_fs,
-	.write_super_lockfs = ext3_write_super_lockfs,
-	.unlockfs	= ext3_unlockfs,
-	.statfs		= ext3_statfs,
-	.remount_fs	= ext3_remount,
-	.clear_inode	= ext3_clear_inode,
-	.show_options	= ext3_show_options,
+static struct super_operations ext4_sops = {
+	.alloc_inode	= ext4_alloc_inode,
+	.destroy_inode	= ext4_destroy_inode,
+	.read_inode	= ext4_read_inode,
+	.write_inode	= ext4_write_inode,
+	.dirty_inode	= ext4_dirty_inode,
+	.delete_inode	= ext4_delete_inode,
+	.put_super	= ext4_put_super,
+	.write_super	= ext4_write_super,
+	.sync_fs	= ext4_sync_fs,
+	.write_super_lockfs = ext4_write_super_lockfs,
+	.unlockfs	= ext4_unlockfs,
+	.statfs		= ext4_statfs,
+	.remount_fs	= ext4_remount,
+	.clear_inode	= ext4_clear_inode,
+	.show_options	= ext4_show_options,
 #ifdef CONFIG_QUOTA
-	.quota_read	= ext3_quota_read,
-	.quota_write	= ext3_quota_write,
+	.quota_read	= ext4_quota_read,
+	.quota_write	= ext4_quota_write,
 #endif
 };
 
-static struct export_operations ext3_export_ops = {
-	.get_parent = ext3_get_parent,
-	.get_dentry = ext3_get_dentry,
+static struct export_operations ext4_export_ops = {
+	.get_parent = ext4_get_parent,
+	.get_dentry = ext4_get_dentry,
 };
 
 enum {
@@ -731,18 +731,18 @@ static match_table_t tokens = {
 	{Opt_resize, "resize"},
 };
 
-static ext3_fsblk_t get_sb_block(void **data)
+static ext4_fsblk_t get_sb_block(void **data)
 {
-	ext3_fsblk_t	sb_block;
+	ext4_fsblk_t	sb_block;
 	char		*options = (char *) *data;
 
 	if (!options || strncmp(options, "sb=", 3) != 0)
 		return 1;	/* Default location */
 	options += 3;
-	/*todo: use simple_strtoll with >32bit ext3 */
+	/*todo: use simple_strtoll with >32bit ext4 */
 	sb_block = simple_strtoul(options, &options, 0);
 	if (*options && *options != ',') {
-		printk("EXT3-fs: Invalid sb specification: %s\n",
+		printk("EXT4-fs: Invalid sb specification: %s\n",
 		       (char *) *data);
 		return 1;
 	}
@@ -754,9 +754,9 @@ static ext3_fsblk_t get_sb_block(void **data)
 
 static int parse_options (char *options, struct super_block *sb,
 			  unsigned int *inum, unsigned long *journal_devnum,
-			  ext3_fsblk_t *n_blocks_count, int is_remount)
+			  ext4_fsblk_t *n_blocks_count, int is_remount)
 {
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	char * p;
 	substring_t args[MAX_OPT_ARGS];
 	int data_opt = 0;
@@ -832,7 +832,7 @@ static int parse_options (char *options, struct super_block *sb,
 		case Opt_orlov:
 			clear_opt (sbi->s_mount_opt, OLDALLOC);
 			break;
-#ifdef CONFIG_EXT3_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
 		case Opt_user_xattr:
 			set_opt (sbi->s_mount_opt, XATTR_USER);
 			break;
@@ -842,10 +842,10 @@ static int parse_options (char *options, struct super_block *sb,
 #else
 		case Opt_user_xattr:
 		case Opt_nouser_xattr:
-			printk("EXT3 (no)user_xattr options not supported\n");
+			printk("EXT4 (no)user_xattr options not supported\n");
 			break;
 #endif
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
 		case Opt_acl:
 			set_opt(sbi->s_mount_opt, POSIX_ACL);
 			break;
@@ -855,7 +855,7 @@ static int parse_options (char *options, struct super_block *sb,
 #else
 		case Opt_acl:
 		case Opt_noacl:
-			printk("EXT3 (no)acl options not supported\n");
+			printk("EXT4 (no)acl options not supported\n");
 			break;
 #endif
 		case Opt_reservation:
@@ -871,7 +871,7 @@ static int parse_options (char *options, struct super_block *sb,
 			   user to specify an existing inode to be the
 			   journal file. */
 			if (is_remount) {
-				printk(KERN_ERR "EXT3-fs: cannot specify "
+				printk(KERN_ERR "EXT4-fs: cannot specify "
 				       "journal on remount\n");
 				return 0;
 			}
@@ -879,7 +879,7 @@ static int parse_options (char *options, struct super_block *sb,
 			break;
 		case Opt_journal_inum:
 			if (is_remount) {
-				printk(KERN_ERR "EXT3-fs: cannot specify "
+				printk(KERN_ERR "EXT4-fs: cannot specify "
 				       "journal on remount\n");
 				return 0;
 			}
@@ -889,7 +889,7 @@ static int parse_options (char *options, struct super_block *sb,
 			break;
 		case Opt_journal_dev:
 			if (is_remount) {
-				printk(KERN_ERR "EXT3-fs: cannot specify "
+				printk(KERN_ERR "EXT4-fs: cannot specify "
 				       "journal on remount\n");
 				return 0;
 			}
@@ -910,24 +910,24 @@ static int parse_options (char *options, struct super_block *sb,
 			sbi->s_commit_interval = HZ * option;
 			break;
 		case Opt_data_journal:
-			data_opt = EXT3_MOUNT_JOURNAL_DATA;
+			data_opt = EXT4_MOUNT_JOURNAL_DATA;
 			goto datacheck;
 		case Opt_data_ordered:
-			data_opt = EXT3_MOUNT_ORDERED_DATA;
+			data_opt = EXT4_MOUNT_ORDERED_DATA;
 			goto datacheck;
 		case Opt_data_writeback:
-			data_opt = EXT3_MOUNT_WRITEBACK_DATA;
+			data_opt = EXT4_MOUNT_WRITEBACK_DATA;
 		datacheck:
 			if (is_remount) {
-				if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS)
+				if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS)
 						!= data_opt) {
 					printk(KERN_ERR
-						"EXT3-fs: cannot change data "
+						"EXT4-fs: cannot change data "
 						"mode on remount\n");
 					return 0;
 				}
 			} else {
-				sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS;
+				sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS;
 				sbi->s_mount_opt |= data_opt;
 			}
 			break;
@@ -940,21 +940,21 @@ static int parse_options (char *options, struct super_block *sb,
 set_qf_name:
 			if (sb_any_quota_enabled(sb)) {
 				printk(KERN_ERR
-					"EXT3-fs: Cannot change journalled "
+					"EXT4-fs: Cannot change journalled "
 					"quota options when quota turned on.\n");
 				return 0;
 			}
 			qname = match_strdup(&args[0]);
 			if (!qname) {
 				printk(KERN_ERR
-					"EXT3-fs: not enough memory for "
+					"EXT4-fs: not enough memory for "
 					"storing quotafile name.\n");
 				return 0;
 			}
 			if (sbi->s_qf_names[qtype] &&
 			    strcmp(sbi->s_qf_names[qtype], qname)) {
 				printk(KERN_ERR
-					"EXT3-fs: %s quota file already "
+					"EXT4-fs: %s quota file already "
 					"specified.\n", QTYPE2NAME(qtype));
 				kfree(qname);
 				return 0;
@@ -962,7 +962,7 @@ set_qf_name:
 			sbi->s_qf_names[qtype] = qname;
 			if (strchr(sbi->s_qf_names[qtype], '/')) {
 				printk(KERN_ERR
-					"EXT3-fs: quotafile must be on "
+					"EXT4-fs: quotafile must be on "
 					"filesystem root.\n");
 				kfree(sbi->s_qf_names[qtype]);
 				sbi->s_qf_names[qtype] = NULL;
@@ -977,7 +977,7 @@ set_qf_name:
 			qtype = GRPQUOTA;
 clear_qf_name:
 			if (sb_any_quota_enabled(sb)) {
-				printk(KERN_ERR "EXT3-fs: Cannot change "
+				printk(KERN_ERR "EXT4-fs: Cannot change "
 					"journalled quota options when "
 					"quota turned on.\n");
 				return 0;
@@ -1005,7 +1005,7 @@ clear_qf_name:
 			break;
 		case Opt_noquota:
 			if (sb_any_quota_enabled(sb)) {
-				printk(KERN_ERR "EXT3-fs: Cannot change quota "
+				printk(KERN_ERR "EXT4-fs: Cannot change quota "
 					"options when quota turned on.\n");
 				return 0;
 			}
@@ -1024,7 +1024,7 @@ clear_qf_name:
 		case Opt_jqfmt_vfsold:
 		case Opt_jqfmt_vfsv0:
 			printk(KERN_ERR
-				"EXT3-fs: journalled quota options not "
+				"EXT4-fs: journalled quota options not "
 				"supported.\n");
 			break;
 		case Opt_noquota:
@@ -1045,7 +1045,7 @@ clear_qf_name:
 			break;
 		case Opt_resize:
 			if (!is_remount) {
-				printk("EXT3-fs: resize option only available "
+				printk("EXT4-fs: resize option only available "
 					"for remount\n");
 				return 0;
 			}
@@ -1061,38 +1061,38 @@ clear_qf_name:
 			break;
 		default:
 			printk (KERN_ERR
-				"EXT3-fs: Unrecognized mount option \"%s\" "
+				"EXT4-fs: Unrecognized mount option \"%s\" "
 				"or missing value\n", p);
 			return 0;
 		}
 	}
 #ifdef CONFIG_QUOTA
 	if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
-		if ((sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) &&
+		if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) &&
 		     sbi->s_qf_names[USRQUOTA])
 			clear_opt(sbi->s_mount_opt, USRQUOTA);
 
-		if ((sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) &&
+		if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) &&
 		     sbi->s_qf_names[GRPQUOTA])
 			clear_opt(sbi->s_mount_opt, GRPQUOTA);
 
 		if ((sbi->s_qf_names[USRQUOTA] &&
-				(sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)) ||
+				(sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) ||
 		    (sbi->s_qf_names[GRPQUOTA] &&
-				(sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA))) {
-			printk(KERN_ERR "EXT3-fs: old and new quota "
+				(sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) {
+			printk(KERN_ERR "EXT4-fs: old and new quota "
 					"format mixing.\n");
 			return 0;
 		}
 
 		if (!sbi->s_jquota_fmt) {
-			printk(KERN_ERR "EXT3-fs: journalled quota format "
+			printk(KERN_ERR "EXT4-fs: journalled quota format "
 					"not specified.\n");
 			return 0;
 		}
 	} else {
 		if (sbi->s_jquota_fmt) {
-			printk(KERN_ERR "EXT3-fs: journalled quota format "
+			printk(KERN_ERR "EXT4-fs: journalled quota format "
 					"specified with no journalling "
 					"enabled.\n");
 			return 0;
@@ -1102,68 +1102,68 @@ clear_qf_name:
 	return 1;
 }
 
-static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
+static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
 			    int read_only)
 {
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	int res = 0;
 
-	if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) {
-		printk (KERN_ERR "EXT3-fs warning: revision level too high, "
+	if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
+		printk (KERN_ERR "EXT4-fs warning: revision level too high, "
 			"forcing read-only mode\n");
 		res = MS_RDONLY;
 	}
 	if (read_only)
 		return res;
-	if (!(sbi->s_mount_state & EXT3_VALID_FS))
-		printk (KERN_WARNING "EXT3-fs warning: mounting unchecked fs, "
+	if (!(sbi->s_mount_state & EXT4_VALID_FS))
+		printk (KERN_WARNING "EXT4-fs warning: mounting unchecked fs, "
 			"running e2fsck is recommended\n");
-	else if ((sbi->s_mount_state & EXT3_ERROR_FS))
+	else if ((sbi->s_mount_state & EXT4_ERROR_FS))
 		printk (KERN_WARNING
-			"EXT3-fs warning: mounting fs with errors, "
+			"EXT4-fs warning: mounting fs with errors, "
 			"running e2fsck is recommended\n");
 	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
 		 le16_to_cpu(es->s_mnt_count) >=
 		 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
 		printk (KERN_WARNING
-			"EXT3-fs warning: maximal mount count reached, "
+			"EXT4-fs warning: maximal mount count reached, "
 			"running e2fsck is recommended\n");
 	else if (le32_to_cpu(es->s_checkinterval) &&
 		(le32_to_cpu(es->s_lastcheck) +
 			le32_to_cpu(es->s_checkinterval) <= get_seconds()))
 		printk (KERN_WARNING
-			"EXT3-fs warning: checktime reached, "
+			"EXT4-fs warning: checktime reached, "
 			"running e2fsck is recommended\n");
 #if 0
 		/* @@@ We _will_ want to clear the valid bit if we find
                    inconsistencies, to force a fsck at reboot.  But for
                    a plain journaled filesystem we can keep it set as
                    valid forever! :) */
-	es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT3_VALID_FS);
+	es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT4_VALID_FS);
 #endif
 	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
-		es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
+		es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
 	es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1);
 	es->s_mtime = cpu_to_le32(get_seconds());
-	ext3_update_dynamic_rev(sb);
-	EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+	ext4_update_dynamic_rev(sb);
+	EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 
-	ext3_commit_super(sb, es, 1);
+	ext4_commit_super(sb, es, 1);
 	if (test_opt(sb, DEBUG))
-		printk(KERN_INFO "[EXT3 FS bs=%lu, gc=%lu, "
+		printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%lu, "
 				"bpg=%lu, ipg=%lu, mo=%04lx]\n",
 			sb->s_blocksize,
 			sbi->s_groups_count,
-			EXT3_BLOCKS_PER_GROUP(sb),
-			EXT3_INODES_PER_GROUP(sb),
+			EXT4_BLOCKS_PER_GROUP(sb),
+			EXT4_INODES_PER_GROUP(sb),
 			sbi->s_mount_opt);
 
-	printk(KERN_INFO "EXT3 FS on %s, ", sb->s_id);
-	if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
+	printk(KERN_INFO "EXT4 FS on %s, ", sb->s_id);
+	if (EXT4_SB(sb)->s_journal->j_inode == NULL) {
 		char b[BDEVNAME_SIZE];
 
 		printk("external journal on %s\n",
-			bdevname(EXT3_SB(sb)->s_journal->j_dev, b));
+			bdevname(EXT4_SB(sb)->s_journal->j_dev, b));
 	} else {
 		printk("internal journal\n");
 	}
@@ -1171,16 +1171,16 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
 }
 
 /* Called at mount-time, super-block is locked */
-static int ext3_check_descriptors (struct super_block * sb)
+static int ext4_check_descriptors (struct super_block * sb)
 {
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
-	ext3_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
-	ext3_fsblk_t last_block;
-	struct ext3_group_desc * gdp = NULL;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
+	ext4_fsblk_t last_block;
+	struct ext4_group_desc * gdp = NULL;
 	int desc_block = 0;
 	int i;
 
-	ext3_debug ("Checking group descriptors");
+	ext4_debug ("Checking group descriptors");
 
 	for (i = 0; i < sbi->s_groups_count; i++)
 	{
@@ -1188,15 +1188,15 @@ static int ext3_check_descriptors (struct super_block * sb)
 			last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
 		else
 			last_block = first_block +
-				(EXT3_BLOCKS_PER_GROUP(sb) - 1);
+				(EXT4_BLOCKS_PER_GROUP(sb) - 1);
 
-		if ((i % EXT3_DESC_PER_BLOCK(sb)) == 0)
-			gdp = (struct ext3_group_desc *)
+		if ((i % EXT4_DESC_PER_BLOCK(sb)) == 0)
+			gdp = (struct ext4_group_desc *)
 					sbi->s_group_desc[desc_block++]->b_data;
 		if (le32_to_cpu(gdp->bg_block_bitmap) < first_block ||
 		    le32_to_cpu(gdp->bg_block_bitmap) > last_block)
 		{
-			ext3_error (sb, "ext3_check_descriptors",
+			ext4_error (sb, "ext4_check_descriptors",
 				    "Block bitmap for group %d"
 				    " not in group (block %lu)!",
 				    i, (unsigned long)
@@ -1206,7 +1206,7 @@ static int ext3_check_descriptors (struct super_block * sb)
 		if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block ||
 		    le32_to_cpu(gdp->bg_inode_bitmap) > last_block)
 		{
-			ext3_error (sb, "ext3_check_descriptors",
+			ext4_error (sb, "ext4_check_descriptors",
 				    "Inode bitmap for group %d"
 				    " not in group (block %lu)!",
 				    i, (unsigned long)
@@ -1217,24 +1217,24 @@ static int ext3_check_descriptors (struct super_block * sb)
 		    le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >
 		    last_block)
 		{
-			ext3_error (sb, "ext3_check_descriptors",
+			ext4_error (sb, "ext4_check_descriptors",
 				    "Inode table for group %d"
 				    " not in group (block %lu)!",
 				    i, (unsigned long)
 					le32_to_cpu(gdp->bg_inode_table));
 			return 0;
 		}
-		first_block += EXT3_BLOCKS_PER_GROUP(sb);
+		first_block += EXT4_BLOCKS_PER_GROUP(sb);
 		gdp++;
 	}
 
-	sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb));
-	sbi->s_es->s_free_inodes_count=cpu_to_le32(ext3_count_free_inodes(sb));
+	sbi->s_es->s_free_blocks_count=cpu_to_le32(ext4_count_free_blocks(sb));
+	sbi->s_es->s_free_inodes_count=cpu_to_le32(ext4_count_free_inodes(sb));
 	return 1;
 }
 
 
-/* ext3_orphan_cleanup() walks a singly-linked list of inodes (starting at
+/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
  * the superblock) which were deleted from all directories, but held open by
  * a process at the time of a crash.  We walk the list and try to delete these
  * inodes at recovery time (only with a read-write filesystem).
@@ -1247,12 +1247,12 @@ static int ext3_check_descriptors (struct super_block * sb)
  * We only do an iget() and an iput() on each inode, which is very safe if we
  * accidentally point at an in-use or already deleted inode.  The worst that
  * can happen in this case is that we get a "bit already cleared" message from
- * ext3_free_inode().  The only reason we would point at a wrong inode is if
+ * ext4_free_inode().  The only reason we would point at a wrong inode is if
  * e2fsck was run on this filesystem, and it must have already done the orphan
  * inode cleanup for us, so we can safely abort without any further action.
  */
-static void ext3_orphan_cleanup (struct super_block * sb,
-				 struct ext3_super_block * es)
+static void ext4_orphan_cleanup (struct super_block * sb,
+				 struct ext4_super_block * es)
 {
 	unsigned int s_flags = sb->s_flags;
 	int nr_orphans = 0, nr_truncates = 0;
@@ -1264,7 +1264,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
 		return;
 	}
 
-	if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
+	if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
 		if (es->s_last_orphan)
 			jbd_debug(1, "Errors on filesystem, "
 				  "clearing orphan list.\n");
@@ -1274,7 +1274,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
 	}
 
 	if (s_flags & MS_RDONLY) {
-		printk(KERN_INFO "EXT3-fs: %s: orphan cleanup on readonly fs\n",
+		printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n",
 		       sb->s_id);
 		sb->s_flags &= ~MS_RDONLY;
 	}
@@ -1283,11 +1283,11 @@ static void ext3_orphan_cleanup (struct super_block * sb,
 	sb->s_flags |= MS_ACTIVE;
 	/* Turn on quotas so that they are updated correctly */
 	for (i = 0; i < MAXQUOTAS; i++) {
-		if (EXT3_SB(sb)->s_qf_names[i]) {
-			int ret = ext3_quota_on_mount(sb, i);
+		if (EXT4_SB(sb)->s_qf_names[i]) {
+			int ret = ext4_quota_on_mount(sb, i);
 			if (ret < 0)
 				printk(KERN_ERR
-					"EXT3-fs: Cannot turn on journalled "
+					"EXT4-fs: Cannot turn on journalled "
 					"quota: error %d\n", ret);
 		}
 	}
@@ -1297,12 +1297,12 @@ static void ext3_orphan_cleanup (struct super_block * sb,
 		struct inode *inode;
 
 		if (!(inode =
-		      ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) {
+		      ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) {
 			es->s_last_orphan = 0;
 			break;
 		}
 
-		list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
+		list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
 		DQUOT_INIT(inode);
 		if (inode->i_nlink) {
 			printk(KERN_DEBUG
@@ -1310,7 +1310,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
 				__FUNCTION__, inode->i_ino, inode->i_size);
 			jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
 				  inode->i_ino, inode->i_size);
-			ext3_truncate(inode);
+			ext4_truncate(inode);
 			nr_truncates++;
 		} else {
 			printk(KERN_DEBUG
@@ -1326,10 +1326,10 @@ static void ext3_orphan_cleanup (struct super_block * sb,
 #define PLURAL(x) (x), ((x)==1) ? "" : "s"
 
 	if (nr_orphans)
-		printk(KERN_INFO "EXT3-fs: %s: %d orphan inode%s deleted\n",
+		printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n",
 		       sb->s_id, PLURAL(nr_orphans));
 	if (nr_truncates)
-		printk(KERN_INFO "EXT3-fs: %s: %d truncate%s cleaned up\n",
+		printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n",
 		       sb->s_id, PLURAL(nr_truncates));
 #ifdef CONFIG_QUOTA
 	/* Turn quotas off */
@@ -1348,9 +1348,9 @@ static void ext3_orphan_cleanup (struct super_block * sb,
  * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks.
  * We need to be 1 filesystem block less than the 2^32 sector limit.
  */
-static loff_t ext3_max_size(int bits)
+static loff_t ext4_max_size(int bits)
 {
-	loff_t res = EXT3_NDIR_BLOCKS;
+	loff_t res = EXT4_NDIR_BLOCKS;
 	/* This constant is calculated to be the largest file size for a
 	 * dense, 4k-blocksize file such that the total number of
 	 * sectors in the file, including data and all indirect blocks,
@@ -1366,34 +1366,34 @@ static loff_t ext3_max_size(int bits)
 	return res;
 }
 
-static ext3_fsblk_t descriptor_loc(struct super_block *sb,
-				    ext3_fsblk_t logic_sb_block,
+static ext4_fsblk_t descriptor_loc(struct super_block *sb,
+				    ext4_fsblk_t logic_sb_block,
 				    int nr)
 {
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	unsigned long bg, first_meta_bg;
 	int has_super = 0;
 
 	first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
 
-	if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) ||
+	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
 	    nr < first_meta_bg)
 		return (logic_sb_block + nr + 1);
 	bg = sbi->s_desc_per_block * nr;
-	if (ext3_bg_has_super(sb, bg))
+	if (ext4_bg_has_super(sb, bg))
 		has_super = 1;
-	return (has_super + ext3_group_first_block_no(sb, bg));
+	return (has_super + ext4_group_first_block_no(sb, bg));
 }
 
 
-static int ext3_fill_super (struct super_block *sb, void *data, int silent)
+static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 {
 	struct buffer_head * bh;
-	struct ext3_super_block *es = NULL;
-	struct ext3_sb_info *sbi;
-	ext3_fsblk_t block;
-	ext3_fsblk_t sb_block = get_sb_block(&data);
-	ext3_fsblk_t logic_sb_block;
+	struct ext4_super_block *es = NULL;
+	struct ext4_sb_info *sbi;
+	ext4_fsblk_t block;
+	ext4_fsblk_t sb_block = get_sb_block(&data);
+	ext4_fsblk_t logic_sb_block;
 	unsigned long offset = 0;
 	unsigned int journal_inum = 0;
 	unsigned long journal_devnum = 0;
@@ -1411,64 +1411,64 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		return -ENOMEM;
 	sb->s_fs_info = sbi;
 	sbi->s_mount_opt = 0;
-	sbi->s_resuid = EXT3_DEF_RESUID;
-	sbi->s_resgid = EXT3_DEF_RESGID;
+	sbi->s_resuid = EXT4_DEF_RESUID;
+	sbi->s_resgid = EXT4_DEF_RESGID;
 
 	unlock_kernel();
 
-	blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
+	blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
 	if (!blocksize) {
-		printk(KERN_ERR "EXT3-fs: unable to set blocksize\n");
+		printk(KERN_ERR "EXT4-fs: unable to set blocksize\n");
 		goto out_fail;
 	}
 
 	/*
-	 * The ext3 superblock will not be buffer aligned for other than 1kB
+	 * The ext4 superblock will not be buffer aligned for other than 1kB
 	 * block sizes.  We need to calculate the offset from buffer start.
 	 */
-	if (blocksize != EXT3_MIN_BLOCK_SIZE) {
-		logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
-		offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
+	if (blocksize != EXT4_MIN_BLOCK_SIZE) {
+		logic_sb_block = (sb_block * EXT4_MIN_BLOCK_SIZE) / blocksize;
+		offset = (sb_block * EXT4_MIN_BLOCK_SIZE) % blocksize;
 	} else {
 		logic_sb_block = sb_block;
 	}
 
 	if (!(bh = sb_bread(sb, logic_sb_block))) {
-		printk (KERN_ERR "EXT3-fs: unable to read superblock\n");
+		printk (KERN_ERR "EXT4-fs: unable to read superblock\n");
 		goto out_fail;
 	}
 	/*
 	 * Note: s_es must be initialized as soon as possible because
-	 *       some ext3 macro-instructions depend on its value
+	 *       some ext4 macro-instructions depend on its value
 	 */
-	es = (struct ext3_super_block *) (((char *)bh->b_data) + offset);
+	es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
 	sbi->s_es = es;
 	sb->s_magic = le16_to_cpu(es->s_magic);
-	if (sb->s_magic != EXT3_SUPER_MAGIC)
-		goto cantfind_ext3;
+	if (sb->s_magic != EXT4_SUPER_MAGIC)
+		goto cantfind_ext4;
 
 	/* Set defaults before we parse the mount options */
 	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
-	if (def_mount_opts & EXT3_DEFM_DEBUG)
+	if (def_mount_opts & EXT4_DEFM_DEBUG)
 		set_opt(sbi->s_mount_opt, DEBUG);
-	if (def_mount_opts & EXT3_DEFM_BSDGROUPS)
+	if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
 		set_opt(sbi->s_mount_opt, GRPID);
-	if (def_mount_opts & EXT3_DEFM_UID16)
+	if (def_mount_opts & EXT4_DEFM_UID16)
 		set_opt(sbi->s_mount_opt, NO_UID32);
-	if (def_mount_opts & EXT3_DEFM_XATTR_USER)
+	if (def_mount_opts & EXT4_DEFM_XATTR_USER)
 		set_opt(sbi->s_mount_opt, XATTR_USER);
-	if (def_mount_opts & EXT3_DEFM_ACL)
+	if (def_mount_opts & EXT4_DEFM_ACL)
 		set_opt(sbi->s_mount_opt, POSIX_ACL);
-	if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA)
-		sbi->s_mount_opt |= EXT3_MOUNT_JOURNAL_DATA;
-	else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED)
-		sbi->s_mount_opt |= EXT3_MOUNT_ORDERED_DATA;
-	else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK)
-		sbi->s_mount_opt |= EXT3_MOUNT_WRITEBACK_DATA;
-
-	if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC)
+	if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
+		sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
+	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
+		sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
+	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
+		sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA;
+
+	if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
 		set_opt(sbi->s_mount_opt, ERRORS_PANIC);
-	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_RO)
+	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_RO)
 		set_opt(sbi->s_mount_opt, ERRORS_RO);
 
 	sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
@@ -1481,40 +1481,40 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		goto failed_mount;
 
 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
-		((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
+		((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
 
-	if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV &&
-	    (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) ||
-	     EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
-	     EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U)))
+	if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
+	    (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
+	     EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
+	     EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
 		printk(KERN_WARNING
-		       "EXT3-fs warning: feature flags set on rev 0 fs, "
+		       "EXT4-fs warning: feature flags set on rev 0 fs, "
 		       "running e2fsck is recommended\n");
 	/*
 	 * Check feature flags regardless of the revision level, since we
 	 * previously didn't change the revision level when setting the flags,
 	 * so there is a chance incompat flags are set on a rev 0 filesystem.
 	 */
-	features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP);
+	features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP);
 	if (features) {
-		printk(KERN_ERR "EXT3-fs: %s: couldn't mount because of "
+		printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of "
 		       "unsupported optional features (%x).\n",
 		       sb->s_id, le32_to_cpu(features));
 		goto failed_mount;
 	}
-	features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP);
+	features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP);
 	if (!(sb->s_flags & MS_RDONLY) && features) {
-		printk(KERN_ERR "EXT3-fs: %s: couldn't mount RDWR because of "
+		printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of "
 		       "unsupported optional features (%x).\n",
 		       sb->s_id, le32_to_cpu(features));
 		goto failed_mount;
 	}
 	blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
 
-	if (blocksize < EXT3_MIN_BLOCK_SIZE ||
-	    blocksize > EXT3_MAX_BLOCK_SIZE) {
+	if (blocksize < EXT4_MIN_BLOCK_SIZE ||
+	    blocksize > EXT4_MAX_BLOCK_SIZE) {
 		printk(KERN_ERR
-		       "EXT3-fs: Unsupported filesystem blocksize %d on %s.\n",
+		       "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n",
 		       blocksize, sb->s_id);
 		goto failed_mount;
 	}
@@ -1526,52 +1526,52 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		 * than the hardware sectorsize for the machine.
 		 */
 		if (blocksize < hblock) {
-			printk(KERN_ERR "EXT3-fs: blocksize %d too small for "
+			printk(KERN_ERR "EXT4-fs: blocksize %d too small for "
 			       "device blocksize %d.\n", blocksize, hblock);
 			goto failed_mount;
 		}
 
 		brelse (bh);
 		sb_set_blocksize(sb, blocksize);
-		logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
-		offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
+		logic_sb_block = (sb_block * EXT4_MIN_BLOCK_SIZE) / blocksize;
+		offset = (sb_block * EXT4_MIN_BLOCK_SIZE) % blocksize;
 		bh = sb_bread(sb, logic_sb_block);
 		if (!bh) {
 			printk(KERN_ERR
-			       "EXT3-fs: Can't read superblock on 2nd try.\n");
+			       "EXT4-fs: Can't read superblock on 2nd try.\n");
 			goto failed_mount;
 		}
-		es = (struct ext3_super_block *)(((char *)bh->b_data) + offset);
+		es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
 		sbi->s_es = es;
-		if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
+		if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
 			printk (KERN_ERR
-				"EXT3-fs: Magic mismatch, very weird !\n");
+				"EXT4-fs: Magic mismatch, very weird !\n");
 			goto failed_mount;
 		}
 	}
 
-	sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits);
+	sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits);
 
-	if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV) {
-		sbi->s_inode_size = EXT3_GOOD_OLD_INODE_SIZE;
-		sbi->s_first_ino = EXT3_GOOD_OLD_FIRST_INO;
+	if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
+		sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
+		sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
 	} else {
 		sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
 		sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
-		if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) ||
+		if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
 		    (sbi->s_inode_size & (sbi->s_inode_size - 1)) ||
 		    (sbi->s_inode_size > blocksize)) {
 			printk (KERN_ERR
-				"EXT3-fs: unsupported inode size: %d\n",
+				"EXT4-fs: unsupported inode size: %d\n",
 				sbi->s_inode_size);
 			goto failed_mount;
 		}
 	}
-	sbi->s_frag_size = EXT3_MIN_FRAG_SIZE <<
+	sbi->s_frag_size = EXT4_MIN_FRAG_SIZE <<
 				   le32_to_cpu(es->s_log_frag_size);
 	if (blocksize != sbi->s_frag_size) {
 		printk(KERN_ERR
-		       "EXT3-fs: fragsize %lu != blocksize %u (unsupported)\n",
+		       "EXT4-fs: fragsize %lu != blocksize %u (unsupported)\n",
 		       sbi->s_frag_size, blocksize);
 		goto failed_mount;
 	}
@@ -1579,62 +1579,62 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
 	sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
 	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
-	if (EXT3_INODE_SIZE(sb) == 0)
-		goto cantfind_ext3;
-	sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb);
+	if (EXT4_INODE_SIZE(sb) == 0)
+		goto cantfind_ext4;
+	sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
 	if (sbi->s_inodes_per_block == 0)
-		goto cantfind_ext3;
+		goto cantfind_ext4;
 	sbi->s_itb_per_group = sbi->s_inodes_per_group /
 					sbi->s_inodes_per_block;
-	sbi->s_desc_per_block = blocksize / sizeof(struct ext3_group_desc);
+	sbi->s_desc_per_block = blocksize / sizeof(struct ext4_group_desc);
 	sbi->s_sbh = bh;
 	sbi->s_mount_state = le16_to_cpu(es->s_state);
-	sbi->s_addr_per_block_bits = log2(EXT3_ADDR_PER_BLOCK(sb));
-	sbi->s_desc_per_block_bits = log2(EXT3_DESC_PER_BLOCK(sb));
+	sbi->s_addr_per_block_bits = log2(EXT4_ADDR_PER_BLOCK(sb));
+	sbi->s_desc_per_block_bits = log2(EXT4_DESC_PER_BLOCK(sb));
 	for (i=0; i < 4; i++)
 		sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
 	sbi->s_def_hash_version = es->s_def_hash_version;
 
 	if (sbi->s_blocks_per_group > blocksize * 8) {
 		printk (KERN_ERR
-			"EXT3-fs: #blocks per group too big: %lu\n",
+			"EXT4-fs: #blocks per group too big: %lu\n",
 			sbi->s_blocks_per_group);
 		goto failed_mount;
 	}
 	if (sbi->s_frags_per_group > blocksize * 8) {
 		printk (KERN_ERR
-			"EXT3-fs: #fragments per group too big: %lu\n",
+			"EXT4-fs: #fragments per group too big: %lu\n",
 			sbi->s_frags_per_group);
 		goto failed_mount;
 	}
 	if (sbi->s_inodes_per_group > blocksize * 8) {
 		printk (KERN_ERR
-			"EXT3-fs: #inodes per group too big: %lu\n",
+			"EXT4-fs: #inodes per group too big: %lu\n",
 			sbi->s_inodes_per_group);
 		goto failed_mount;
 	}
 
 	if (le32_to_cpu(es->s_blocks_count) >
 		    (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
-		printk(KERN_ERR "EXT3-fs: filesystem on %s:"
+		printk(KERN_ERR "EXT4-fs: filesystem on %s:"
 			" too large to mount safely\n", sb->s_id);
 		if (sizeof(sector_t) < 8)
-			printk(KERN_WARNING "EXT3-fs: CONFIG_LBD not "
+			printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not "
 					"enabled\n");
 		goto failed_mount;
 	}
 
-	if (EXT3_BLOCKS_PER_GROUP(sb) == 0)
-		goto cantfind_ext3;
+	if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
+		goto cantfind_ext4;
 	sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
 			       le32_to_cpu(es->s_first_data_block) - 1)
-				       / EXT3_BLOCKS_PER_GROUP(sb)) + 1;
-	db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) /
-		   EXT3_DESC_PER_BLOCK(sb);
+				       / EXT4_BLOCKS_PER_GROUP(sb)) + 1;
+	db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
+		   EXT4_DESC_PER_BLOCK(sb);
 	sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
 				    GFP_KERNEL);
 	if (sbi->s_group_desc == NULL) {
-		printk (KERN_ERR "EXT3-fs: not enough memory\n");
+		printk (KERN_ERR "EXT4-fs: not enough memory\n");
 		goto failed_mount;
 	}
 
@@ -1644,14 +1644,14 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		block = descriptor_loc(sb, logic_sb_block, i);
 		sbi->s_group_desc[i] = sb_bread(sb, block);
 		if (!sbi->s_group_desc[i]) {
-			printk (KERN_ERR "EXT3-fs: "
+			printk (KERN_ERR "EXT4-fs: "
 				"can't read group descriptor %d\n", i);
 			db_count = i;
 			goto failed_mount2;
 		}
 	}
-	if (!ext3_check_descriptors (sb)) {
-		printk(KERN_ERR "EXT3-fs: group descriptors corrupted!\n");
+	if (!ext4_check_descriptors (sb)) {
+		printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
 		goto failed_mount2;
 	}
 	sbi->s_gdb_count = db_count;
@@ -1659,11 +1659,11 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	spin_lock_init(&sbi->s_next_gen_lock);
 
 	percpu_counter_init(&sbi->s_freeblocks_counter,
-		ext3_count_free_blocks(sb));
+		ext4_count_free_blocks(sb));
 	percpu_counter_init(&sbi->s_freeinodes_counter,
-		ext3_count_free_inodes(sb));
+		ext4_count_free_inodes(sb));
 	percpu_counter_init(&sbi->s_dirs_counter,
-		ext3_count_dirs(sb));
+		ext4_count_dirs(sb));
 
 	/* per fileystem reservation list head & lock */
 	spin_lock_init(&sbi->s_rsv_window_lock);
@@ -1672,45 +1672,45 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	 * reservation window list --- it gives us a placeholder for
 	 * append-at-start-of-list which makes the allocation logic
 	 * _much_ simpler. */
-	sbi->s_rsv_window_head.rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
-	sbi->s_rsv_window_head.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+	sbi->s_rsv_window_head.rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
+	sbi->s_rsv_window_head.rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
 	sbi->s_rsv_window_head.rsv_alloc_hit = 0;
 	sbi->s_rsv_window_head.rsv_goal_size = 0;
-	ext3_rsv_window_add(sb, &sbi->s_rsv_window_head);
+	ext4_rsv_window_add(sb, &sbi->s_rsv_window_head);
 
 	/*
 	 * set up enough so that it can read an inode
 	 */
-	sb->s_op = &ext3_sops;
-	sb->s_export_op = &ext3_export_ops;
-	sb->s_xattr = ext3_xattr_handlers;
+	sb->s_op = &ext4_sops;
+	sb->s_export_op = &ext4_export_ops;
+	sb->s_xattr = ext4_xattr_handlers;
 #ifdef CONFIG_QUOTA
-	sb->s_qcop = &ext3_qctl_operations;
-	sb->dq_op = &ext3_quota_operations;
+	sb->s_qcop = &ext4_qctl_operations;
+	sb->dq_op = &ext4_quota_operations;
 #endif
 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
 
 	sb->s_root = NULL;
 
 	needs_recovery = (es->s_last_orphan != 0 ||
-			  EXT3_HAS_INCOMPAT_FEATURE(sb,
-				    EXT3_FEATURE_INCOMPAT_RECOVER));
+			  EXT4_HAS_INCOMPAT_FEATURE(sb,
+				    EXT4_FEATURE_INCOMPAT_RECOVER));
 
 	/*
 	 * The first inode we look at is the journal inode.  Don't try
 	 * root first: it may be modified in the journal!
 	 */
 	if (!test_opt(sb, NOLOAD) &&
-	    EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
-		if (ext3_load_journal(sb, es, journal_devnum))
+	    EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
+		if (ext4_load_journal(sb, es, journal_devnum))
 			goto failed_mount3;
 	} else if (journal_inum) {
-		if (ext3_create_journal(sb, es, journal_inum))
+		if (ext4_create_journal(sb, es, journal_inum))
 			goto failed_mount3;
 	} else {
 		if (!silent)
 			printk (KERN_ERR
-				"ext3: No journal on filesystem on %s\n",
+				"ext4: No journal on filesystem on %s\n",
 				sb->s_id);
 		goto failed_mount3;
 	}
@@ -1729,11 +1729,11 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 			set_opt(sbi->s_mount_opt, JOURNAL_DATA);
 		break;
 
-	case EXT3_MOUNT_ORDERED_DATA:
-	case EXT3_MOUNT_WRITEBACK_DATA:
+	case EXT4_MOUNT_ORDERED_DATA:
+	case EXT4_MOUNT_WRITEBACK_DATA:
 		if (!journal_check_available_features
 		    (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) {
-			printk(KERN_ERR "EXT3-fs: Journal does not support "
+			printk(KERN_ERR "EXT4-fs: Journal does not support "
 			       "requested data journaling mode\n");
 			goto failed_mount4;
 		}
@@ -1742,8 +1742,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	}
 
 	if (test_opt(sb, NOBH)) {
-		if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
-			printk(KERN_WARNING "EXT3-fs: Ignoring nobh option - "
+		if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
+			printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - "
 				"its supported only with writeback mode\n");
 			clear_opt(sbi->s_mount_opt, NOBH);
 		}
@@ -1753,21 +1753,21 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	 * so we can safely mount the rest of the filesystem now.
 	 */
 
-	root = iget(sb, EXT3_ROOT_INO);
+	root = iget(sb, EXT4_ROOT_INO);
 	sb->s_root = d_alloc_root(root);
 	if (!sb->s_root) {
-		printk(KERN_ERR "EXT3-fs: get root inode failed\n");
+		printk(KERN_ERR "EXT4-fs: get root inode failed\n");
 		iput(root);
 		goto failed_mount4;
 	}
 	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
 		dput(sb->s_root);
 		sb->s_root = NULL;
-		printk(KERN_ERR "EXT3-fs: corrupt root inode, run e2fsck\n");
+		printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n");
 		goto failed_mount4;
 	}
 
-	ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
+	ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY);
 	/*
 	 * akpm: core read_super() calls in here with the superblock locked.
 	 * That deadlocks, because orphan cleanup needs to lock the superblock
@@ -1776,23 +1776,23 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	 * and aviro says that's the only reason for hanging onto the
 	 * superblock lock.
 	 */
-	EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
-	ext3_orphan_cleanup(sb, es);
-	EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
+	EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
+	ext4_orphan_cleanup(sb, es);
+	EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
 	if (needs_recovery)
-		printk (KERN_INFO "EXT3-fs: recovery complete.\n");
-	ext3_mark_recovery_complete(sb, es);
-	printk (KERN_INFO "EXT3-fs: mounted filesystem with %s data mode.\n",
-		test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
-		test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
+		printk (KERN_INFO "EXT4-fs: recovery complete.\n");
+	ext4_mark_recovery_complete(sb, es);
+	printk (KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n",
+		test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal":
+		test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
 		"writeback");
 
 	lock_kernel();
 	return 0;
 
-cantfind_ext3:
+cantfind_ext4:
 	if (!silent)
-		printk(KERN_ERR "VFS: Can't find ext3 filesystem on dev %s.\n",
+		printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n",
 		       sb->s_id);
 	goto failed_mount;
 
@@ -1811,7 +1811,7 @@ failed_mount:
 	for (i = 0; i < MAXQUOTAS; i++)
 		kfree(sbi->s_qf_names[i]);
 #endif
-	ext3_blkdev_remove(sbi);
+	ext4_blkdev_remove(sbi);
 	brelse(bh);
 out_fail:
 	sb->s_fs_info = NULL;
@@ -1825,13 +1825,13 @@ out_fail:
  * initial mount, once the journal has been initialised but before we've
  * done any recovery; and again on any subsequent remount.
  */
-static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
+static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
 {
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
 	if (sbi->s_commit_interval)
 		journal->j_commit_interval = sbi->s_commit_interval;
-	/* We could also set up an ext3-specific default for the commit
+	/* We could also set up an ext4-specific default for the commit
 	 * interval here, but for now we'll just fall back to the jbd
 	 * default. */
 
@@ -1843,7 +1843,7 @@ static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
 	spin_unlock(&journal->j_state_lock);
 }
 
-static journal_t *ext3_get_journal(struct super_block *sb,
+static journal_t *ext4_get_journal(struct super_block *sb,
 				   unsigned int journal_inum)
 {
 	struct inode *journal_inode;
@@ -1855,55 +1855,55 @@ static journal_t *ext3_get_journal(struct super_block *sb,
 
 	journal_inode = iget(sb, journal_inum);
 	if (!journal_inode) {
-		printk(KERN_ERR "EXT3-fs: no journal found.\n");
+		printk(KERN_ERR "EXT4-fs: no journal found.\n");
 		return NULL;
 	}
 	if (!journal_inode->i_nlink) {
 		make_bad_inode(journal_inode);
 		iput(journal_inode);
-		printk(KERN_ERR "EXT3-fs: journal inode is deleted.\n");
+		printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n");
 		return NULL;
 	}
 
 	jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
 		  journal_inode, journal_inode->i_size);
 	if (is_bad_inode(journal_inode) || !S_ISREG(journal_inode->i_mode)) {
-		printk(KERN_ERR "EXT3-fs: invalid journal inode.\n");
+		printk(KERN_ERR "EXT4-fs: invalid journal inode.\n");
 		iput(journal_inode);
 		return NULL;
 	}
 
 	journal = journal_init_inode(journal_inode);
 	if (!journal) {
-		printk(KERN_ERR "EXT3-fs: Could not load journal inode\n");
+		printk(KERN_ERR "EXT4-fs: Could not load journal inode\n");
 		iput(journal_inode);
 		return NULL;
 	}
 	journal->j_private = sb;
-	ext3_init_journal_params(sb, journal);
+	ext4_init_journal_params(sb, journal);
 	return journal;
 }
 
-static journal_t *ext3_get_dev_journal(struct super_block *sb,
+static journal_t *ext4_get_dev_journal(struct super_block *sb,
 				       dev_t j_dev)
 {
 	struct buffer_head * bh;
 	journal_t *journal;
-	ext3_fsblk_t start;
-	ext3_fsblk_t len;
+	ext4_fsblk_t start;
+	ext4_fsblk_t len;
 	int hblock, blocksize;
-	ext3_fsblk_t sb_block;
+	ext4_fsblk_t sb_block;
 	unsigned long offset;
-	struct ext3_super_block * es;
+	struct ext4_super_block * es;
 	struct block_device *bdev;
 
-	bdev = ext3_blkdev_get(j_dev);
+	bdev = ext4_blkdev_get(j_dev);
 	if (bdev == NULL)
 		return NULL;
 
 	if (bd_claim(bdev, sb)) {
 		printk(KERN_ERR
-		        "EXT3: failed to claim external journal device.\n");
+		        "EXT4: failed to claim external journal device.\n");
 		blkdev_put(bdev);
 		return NULL;
 	}
@@ -1912,31 +1912,31 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
 	hblock = bdev_hardsect_size(bdev);
 	if (blocksize < hblock) {
 		printk(KERN_ERR
-			"EXT3-fs: blocksize too small for journal device.\n");
+			"EXT4-fs: blocksize too small for journal device.\n");
 		goto out_bdev;
 	}
 
-	sb_block = EXT3_MIN_BLOCK_SIZE / blocksize;
-	offset = EXT3_MIN_BLOCK_SIZE % blocksize;
+	sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
+	offset = EXT4_MIN_BLOCK_SIZE % blocksize;
 	set_blocksize(bdev, blocksize);
 	if (!(bh = __bread(bdev, sb_block, blocksize))) {
-		printk(KERN_ERR "EXT3-fs: couldn't read superblock of "
+		printk(KERN_ERR "EXT4-fs: couldn't read superblock of "
 		       "external journal\n");
 		goto out_bdev;
 	}
 
-	es = (struct ext3_super_block *) (((char *)bh->b_data) + offset);
-	if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) ||
+	es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
+	if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
 	    !(le32_to_cpu(es->s_feature_incompat) &
-	      EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
-		printk(KERN_ERR "EXT3-fs: external journal has "
+	      EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
+		printk(KERN_ERR "EXT4-fs: external journal has "
 					"bad superblock\n");
 		brelse(bh);
 		goto out_bdev;
 	}
 
-	if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
-		printk(KERN_ERR "EXT3-fs: journal UUID does not match\n");
+	if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
+		printk(KERN_ERR "EXT4-fs: journal UUID does not match\n");
 		brelse(bh);
 		goto out_bdev;
 	}
@@ -1948,34 +1948,34 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
 	journal = journal_init_dev(bdev, sb->s_bdev,
 					start, len, blocksize);
 	if (!journal) {
-		printk(KERN_ERR "EXT3-fs: failed to create device journal\n");
+		printk(KERN_ERR "EXT4-fs: failed to create device journal\n");
 		goto out_bdev;
 	}
 	journal->j_private = sb;
 	ll_rw_block(READ, 1, &journal->j_sb_buffer);
 	wait_on_buffer(journal->j_sb_buffer);
 	if (!buffer_uptodate(journal->j_sb_buffer)) {
-		printk(KERN_ERR "EXT3-fs: I/O error on journal device\n");
+		printk(KERN_ERR "EXT4-fs: I/O error on journal device\n");
 		goto out_journal;
 	}
 	if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
-		printk(KERN_ERR "EXT3-fs: External journal has more than one "
+		printk(KERN_ERR "EXT4-fs: External journal has more than one "
 					"user (unsupported) - %d\n",
 			be32_to_cpu(journal->j_superblock->s_nr_users));
 		goto out_journal;
 	}
-	EXT3_SB(sb)->journal_bdev = bdev;
-	ext3_init_journal_params(sb, journal);
+	EXT4_SB(sb)->journal_bdev = bdev;
+	ext4_init_journal_params(sb, journal);
 	return journal;
 out_journal:
 	journal_destroy(journal);
 out_bdev:
-	ext3_blkdev_put(bdev);
+	ext4_blkdev_put(bdev);
 	return NULL;
 }
 
-static int ext3_load_journal(struct super_block *sb,
-			     struct ext3_super_block *es,
+static int ext4_load_journal(struct super_block *sb,
+			     struct ext4_super_block *es,
 			     unsigned long journal_devnum)
 {
 	journal_t *journal;
@@ -1986,7 +1986,7 @@ static int ext3_load_journal(struct super_block *sb,
 
 	if (journal_devnum &&
 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
-		printk(KERN_INFO "EXT3-fs: external journal device major/minor "
+		printk(KERN_INFO "EXT4-fs: external journal device major/minor "
 			"numbers have changed\n");
 		journal_dev = new_decode_dev(journal_devnum);
 	} else
@@ -2000,56 +2000,56 @@ static int ext3_load_journal(struct super_block *sb,
 	 * can get read-write access to the device.
 	 */
 
-	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) {
+	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
 		if (sb->s_flags & MS_RDONLY) {
-			printk(KERN_INFO "EXT3-fs: INFO: recovery "
+			printk(KERN_INFO "EXT4-fs: INFO: recovery "
 					"required on readonly filesystem.\n");
 			if (really_read_only) {
-				printk(KERN_ERR "EXT3-fs: write access "
+				printk(KERN_ERR "EXT4-fs: write access "
 					"unavailable, cannot proceed.\n");
 				return -EROFS;
 			}
-			printk (KERN_INFO "EXT3-fs: write access will "
+			printk (KERN_INFO "EXT4-fs: write access will "
 					"be enabled during recovery.\n");
 		}
 	}
 
 	if (journal_inum && journal_dev) {
-		printk(KERN_ERR "EXT3-fs: filesystem has both journal "
+		printk(KERN_ERR "EXT4-fs: filesystem has both journal "
 		       "and inode journals!\n");
 		return -EINVAL;
 	}
 
 	if (journal_inum) {
-		if (!(journal = ext3_get_journal(sb, journal_inum)))
+		if (!(journal = ext4_get_journal(sb, journal_inum)))
 			return -EINVAL;
 	} else {
-		if (!(journal = ext3_get_dev_journal(sb, journal_dev)))
+		if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
 			return -EINVAL;
 	}
 
 	if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
 		err = journal_update_format(journal);
 		if (err)  {
-			printk(KERN_ERR "EXT3-fs: error updating journal.\n");
+			printk(KERN_ERR "EXT4-fs: error updating journal.\n");
 			journal_destroy(journal);
 			return err;
 		}
 	}
 
-	if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER))
+	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
 		err = journal_wipe(journal, !really_read_only);
 	if (!err)
 		err = journal_load(journal);
 
 	if (err) {
-		printk(KERN_ERR "EXT3-fs: error loading journal.\n");
+		printk(KERN_ERR "EXT4-fs: error loading journal.\n");
 		journal_destroy(journal);
 		return err;
 	}
 
-	EXT3_SB(sb)->s_journal = journal;
-	ext3_clear_journal_err(sb, es);
+	EXT4_SB(sb)->s_journal = journal;
+	ext4_clear_journal_err(sb, es);
 
 	if (journal_devnum &&
 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
@@ -2057,62 +2057,62 @@ static int ext3_load_journal(struct super_block *sb,
 		sb->s_dirt = 1;
 
 		/* Make sure we flush the recovery flag to disk. */
-		ext3_commit_super(sb, es, 1);
+		ext4_commit_super(sb, es, 1);
 	}
 
 	return 0;
 }
 
-static int ext3_create_journal(struct super_block * sb,
-			       struct ext3_super_block * es,
+static int ext4_create_journal(struct super_block * sb,
+			       struct ext4_super_block * es,
 			       unsigned int journal_inum)
 {
 	journal_t *journal;
 
 	if (sb->s_flags & MS_RDONLY) {
-		printk(KERN_ERR "EXT3-fs: readonly filesystem when trying to "
+		printk(KERN_ERR "EXT4-fs: readonly filesystem when trying to "
 				"create journal.\n");
 		return -EROFS;
 	}
 
-	if (!(journal = ext3_get_journal(sb, journal_inum)))
+	if (!(journal = ext4_get_journal(sb, journal_inum)))
 		return -EINVAL;
 
-	printk(KERN_INFO "EXT3-fs: creating new journal on inode %u\n",
+	printk(KERN_INFO "EXT4-fs: creating new journal on inode %u\n",
 	       journal_inum);
 
 	if (journal_create(journal)) {
-		printk(KERN_ERR "EXT3-fs: error creating journal.\n");
+		printk(KERN_ERR "EXT4-fs: error creating journal.\n");
 		journal_destroy(journal);
 		return -EIO;
 	}
 
-	EXT3_SB(sb)->s_journal = journal;
+	EXT4_SB(sb)->s_journal = journal;
 
-	ext3_update_dynamic_rev(sb);
-	EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
-	EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL);
+	ext4_update_dynamic_rev(sb);
+	EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+	EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL);
 
 	es->s_journal_inum = cpu_to_le32(journal_inum);
 	sb->s_dirt = 1;
 
 	/* Make sure we flush the recovery flag to disk. */
-	ext3_commit_super(sb, es, 1);
+	ext4_commit_super(sb, es, 1);
 
 	return 0;
 }
 
-static void ext3_commit_super (struct super_block * sb,
-			       struct ext3_super_block * es,
+static void ext4_commit_super (struct super_block * sb,
+			       struct ext4_super_block * es,
 			       int sync)
 {
-	struct buffer_head *sbh = EXT3_SB(sb)->s_sbh;
+	struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
 
 	if (!sbh)
 		return;
 	es->s_wtime = cpu_to_le32(get_seconds());
-	es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb));
-	es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb));
+	es->s_free_blocks_count = cpu_to_le32(ext4_count_free_blocks(sb));
+	es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
 	BUFFER_TRACE(sbh, "marking dirty");
 	mark_buffer_dirty(sbh);
 	if (sync)
@@ -2125,18 +2125,18 @@ static void ext3_commit_super (struct super_block * sb,
  * remounting) the filesystem readonly, then we will end up with a
  * consistent fs on disk.  Record that fact.
  */
-static void ext3_mark_recovery_complete(struct super_block * sb,
-					struct ext3_super_block * es)
+static void ext4_mark_recovery_complete(struct super_block * sb,
+					struct ext4_super_block * es)
 {
-	journal_t *journal = EXT3_SB(sb)->s_journal;
+	journal_t *journal = EXT4_SB(sb)->s_journal;
 
 	journal_lock_updates(journal);
 	journal_flush(journal);
-	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
+	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
 	    sb->s_flags & MS_RDONLY) {
-		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 		sb->s_dirt = 0;
-		ext3_commit_super(sb, es, 1);
+		ext4_commit_super(sb, es, 1);
 	}
 	journal_unlock_updates(journal);
 }
@@ -2146,33 +2146,33 @@ static void ext3_mark_recovery_complete(struct super_block * sb,
  * has recorded an error from a previous lifetime, move that error to the
  * main filesystem now.
  */
-static void ext3_clear_journal_err(struct super_block * sb,
-				   struct ext3_super_block * es)
+static void ext4_clear_journal_err(struct super_block * sb,
+				   struct ext4_super_block * es)
 {
 	journal_t *journal;
 	int j_errno;
 	const char *errstr;
 
-	journal = EXT3_SB(sb)->s_journal;
+	journal = EXT4_SB(sb)->s_journal;
 
 	/*
 	 * Now check for any error status which may have been recorded in the
-	 * journal by a prior ext3_error() or ext3_abort()
+	 * journal by a prior ext4_error() or ext4_abort()
 	 */
 
 	j_errno = journal_errno(journal);
 	if (j_errno) {
 		char nbuf[16];
 
-		errstr = ext3_decode_error(sb, j_errno, nbuf);
-		ext3_warning(sb, __FUNCTION__, "Filesystem error recorded "
+		errstr = ext4_decode_error(sb, j_errno, nbuf);
+		ext4_warning(sb, __FUNCTION__, "Filesystem error recorded "
 			     "from previous mount: %s", errstr);
-		ext3_warning(sb, __FUNCTION__, "Marking fs in need of "
+		ext4_warning(sb, __FUNCTION__, "Marking fs in need of "
 			     "filesystem check.");
 
-		EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
-		es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
-		ext3_commit_super (sb, es, 1);
+		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+		ext4_commit_super (sb, es, 1);
 
 		journal_clear_err(journal);
 	}
@@ -2182,7 +2182,7 @@ static void ext3_clear_journal_err(struct super_block * sb,
  * Force the running and committing transactions to commit,
  * and wait on the commit.
  */
-int ext3_force_commit(struct super_block *sb)
+int ext4_force_commit(struct super_block *sb)
 {
 	journal_t *journal;
 	int ret;
@@ -2190,14 +2190,14 @@ int ext3_force_commit(struct super_block *sb)
 	if (sb->s_flags & MS_RDONLY)
 		return 0;
 
-	journal = EXT3_SB(sb)->s_journal;
+	journal = EXT4_SB(sb)->s_journal;
 	sb->s_dirt = 0;
-	ret = ext3_journal_force_commit(journal);
+	ret = ext4_journal_force_commit(journal);
 	return ret;
 }
 
 /*
- * Ext3 always journals updates to the superblock itself, so we don't
+ * Ext4 always journals updates to the superblock itself, so we don't
  * have to propagate any other updates to the superblock on disk at this
  * point.  Just start an async writeback to get the buffers on their way
  * to the disk.
@@ -2205,21 +2205,21 @@ int ext3_force_commit(struct super_block *sb)
  * This implicitly triggers the writebehind on sync().
  */
 
-static void ext3_write_super (struct super_block * sb)
+static void ext4_write_super (struct super_block * sb)
 {
 	if (mutex_trylock(&sb->s_lock) != 0)
 		BUG();
 	sb->s_dirt = 0;
 }
 
-static int ext3_sync_fs(struct super_block *sb, int wait)
+static int ext4_sync_fs(struct super_block *sb, int wait)
 {
 	tid_t target;
 
 	sb->s_dirt = 0;
-	if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
+	if (journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
 		if (wait)
-			log_wait_commit(EXT3_SB(sb)->s_journal, target);
+			log_wait_commit(EXT4_SB(sb)->s_journal, target);
 	}
 	return 0;
 }
@@ -2228,20 +2228,20 @@ static int ext3_sync_fs(struct super_block *sb, int wait)
  * LVM calls this function before a (read-only) snapshot is created.  This
  * gives us a chance to flush the journal completely and mark the fs clean.
  */
-static void ext3_write_super_lockfs(struct super_block *sb)
+static void ext4_write_super_lockfs(struct super_block *sb)
 {
 	sb->s_dirt = 0;
 
 	if (!(sb->s_flags & MS_RDONLY)) {
-		journal_t *journal = EXT3_SB(sb)->s_journal;
+		journal_t *journal = EXT4_SB(sb)->s_journal;
 
 		/* Now we set up the journal barrier. */
 		journal_lock_updates(journal);
 		journal_flush(journal);
 
 		/* Journal blocked and flushed, clear needs_recovery flag. */
-		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
-		ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
+		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+		ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
 	}
 }
 
@@ -2249,25 +2249,25 @@ static void ext3_write_super_lockfs(struct super_block *sb)
  * Called by LVM after the snapshot is done.  We need to reset the RECOVER
  * flag here, even though the filesystem is not technically dirty yet.
  */
-static void ext3_unlockfs(struct super_block *sb)
+static void ext4_unlockfs(struct super_block *sb)
 {
 	if (!(sb->s_flags & MS_RDONLY)) {
 		lock_super(sb);
 		/* Reser the needs_recovery flag before the fs is unlocked. */
-		EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
-		ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
+		EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+		ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
 		unlock_super(sb);
-		journal_unlock_updates(EXT3_SB(sb)->s_journal);
+		journal_unlock_updates(EXT4_SB(sb)->s_journal);
 	}
 }
 
-static int ext3_remount (struct super_block * sb, int * flags, char * data)
+static int ext4_remount (struct super_block * sb, int * flags, char * data)
 {
-	struct ext3_super_block * es;
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
-	ext3_fsblk_t n_blocks_count = 0;
+	struct ext4_super_block * es;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	ext4_fsblk_t n_blocks_count = 0;
 	unsigned long old_sb_flags;
-	struct ext3_mount_options old_opts;
+	struct ext4_mount_options old_opts;
 	int err;
 #ifdef CONFIG_QUOTA
 	int i;
@@ -2293,19 +2293,19 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
 		goto restore_opts;
 	}
 
-	if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
-		ext3_abort(sb, __FUNCTION__, "Abort forced by user");
+	if (sbi->s_mount_opt & EXT4_MOUNT_ABORT)
+		ext4_abort(sb, __FUNCTION__, "Abort forced by user");
 
 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
-		((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
+		((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
 
 	es = sbi->s_es;
 
-	ext3_init_journal_params(sb, sbi->s_journal);
+	ext4_init_journal_params(sb, sbi->s_journal);
 
 	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
 		n_blocks_count > le32_to_cpu(es->s_blocks_count)) {
-		if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) {
+		if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) {
 			err = -EROFS;
 			goto restore_opts;
 		}
@@ -2322,16 +2322,16 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
 			 * readonly, and if so set the rdonly flag and then
 			 * mark the partition as valid again.
 			 */
-			if (!(es->s_state & cpu_to_le16(EXT3_VALID_FS)) &&
-			    (sbi->s_mount_state & EXT3_VALID_FS))
+			if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
+			    (sbi->s_mount_state & EXT4_VALID_FS))
 				es->s_state = cpu_to_le16(sbi->s_mount_state);
 
-			ext3_mark_recovery_complete(sb, es);
+			ext4_mark_recovery_complete(sb, es);
 		} else {
 			__le32 ret;
-			if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
-					~EXT3_FEATURE_RO_COMPAT_SUPP))) {
-				printk(KERN_WARNING "EXT3-fs: %s: couldn't "
+			if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
+					~EXT4_FEATURE_RO_COMPAT_SUPP))) {
+				printk(KERN_WARNING "EXT4-fs: %s: couldn't "
 				       "remount RDWR because of unsupported "
 				       "optional features (%x).\n",
 				       sb->s_id, le32_to_cpu(ret));
@@ -2344,11 +2344,11 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
 			 * been changed by e2fsck since we originally mounted
 			 * the partition.)
 			 */
-			ext3_clear_journal_err(sb, es);
+			ext4_clear_journal_err(sb, es);
 			sbi->s_mount_state = le16_to_cpu(es->s_state);
-			if ((err = ext3_group_extend(sb, es, n_blocks_count)))
+			if ((err = ext4_group_extend(sb, es, n_blocks_count)))
 				goto restore_opts;
-			if (!ext3_setup_super (sb, es, 0))
+			if (!ext4_setup_super (sb, es, 0))
 				sb->s_flags &= ~MS_RDONLY;
 		}
 	}
@@ -2378,19 +2378,19 @@ restore_opts:
 	return err;
 }
 
-static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
+static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
 {
 	struct super_block *sb = dentry->d_sb;
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
-	struct ext3_super_block *es = sbi->s_es;
-	ext3_fsblk_t overhead;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_super_block *es = sbi->s_es;
+	ext4_fsblk_t overhead;
 	int i;
 
 	if (test_opt (sb, MINIX_DF))
 		overhead = 0;
 	else {
 		unsigned long ngroups;
-		ngroups = EXT3_SB(sb)->s_groups_count;
+		ngroups = EXT4_SB(sb)->s_groups_count;
 		smp_rmb();
 
 		/*
@@ -2409,8 +2409,8 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
 		 * feature is turned on, then not all groups have this.
 		 */
 		for (i = 0; i < ngroups; i++) {
-			overhead += ext3_bg_has_super(sb, i) +
-				ext3_bg_num_gdb(sb, i);
+			overhead += ext4_bg_has_super(sb, i) +
+				ext4_bg_num_gdb(sb, i);
 			cond_resched();
 		}
 
@@ -2418,10 +2418,10 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
 		 * Every block group has an inode bitmap, a block
 		 * bitmap, and an inode table.
 		 */
-		overhead += (ngroups * (2 + EXT3_SB(sb)->s_itb_per_group));
+		overhead += (ngroups * (2 + EXT4_SB(sb)->s_itb_per_group));
 	}
 
-	buf->f_type = EXT3_SUPER_MAGIC;
+	buf->f_type = EXT4_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
 	buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead;
 	buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter);
@@ -2430,14 +2430,14 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
 		buf->f_bavail = 0;
 	buf->f_files = le32_to_cpu(es->s_inodes_count);
 	buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter);
-	buf->f_namelen = EXT3_NAME_LEN;
+	buf->f_namelen = EXT4_NAME_LEN;
 	return 0;
 }
 
 /* Helper function for writing quotas on sync - we need to start transaction before quota file
  * is locked for write. Otherwise the are possible deadlocks:
  * Process 1                         Process 2
- * ext3_create()                     quota_sync()
+ * ext4_create()                     quota_sync()
  *   journal_start()                   write_dquot()
  *   DQUOT_INIT()                        down(dqio_mutex)
  *     down(dqio_mutex)                    journal_start()
@@ -2451,111 +2451,111 @@ static inline struct inode *dquot_to_inode(struct dquot *dquot)
 	return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
 }
 
-static int ext3_dquot_initialize(struct inode *inode, int type)
+static int ext4_dquot_initialize(struct inode *inode, int type)
 {
 	handle_t *handle;
 	int ret, err;
 
 	/* We may create quota structure so we need to reserve enough blocks */
-	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS(inode->i_sb));
+	handle = ext4_journal_start(inode, 2*EXT4_QUOTA_INIT_BLOCKS(inode->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_initialize(inode, type);
-	err = ext3_journal_stop(handle);
+	err = ext4_journal_stop(handle);
 	if (!ret)
 		ret = err;
 	return ret;
 }
 
-static int ext3_dquot_drop(struct inode *inode)
+static int ext4_dquot_drop(struct inode *inode)
 {
 	handle_t *handle;
 	int ret, err;
 
 	/* We may delete quota structure so we need to reserve enough blocks */
-	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_DEL_BLOCKS(inode->i_sb));
+	handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_drop(inode);
-	err = ext3_journal_stop(handle);
+	err = ext4_journal_stop(handle);
 	if (!ret)
 		ret = err;
 	return ret;
 }
 
-static int ext3_write_dquot(struct dquot *dquot)
+static int ext4_write_dquot(struct dquot *dquot)
 {
 	int ret, err;
 	handle_t *handle;
 	struct inode *inode;
 
 	inode = dquot_to_inode(dquot);
-	handle = ext3_journal_start(inode,
-					EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
+	handle = ext4_journal_start(inode,
+					EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_commit(dquot);
-	err = ext3_journal_stop(handle);
+	err = ext4_journal_stop(handle);
 	if (!ret)
 		ret = err;
 	return ret;
 }
 
-static int ext3_acquire_dquot(struct dquot *dquot)
+static int ext4_acquire_dquot(struct dquot *dquot)
 {
 	int ret, err;
 	handle_t *handle;
 
-	handle = ext3_journal_start(dquot_to_inode(dquot),
-					EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb));
+	handle = ext4_journal_start(dquot_to_inode(dquot),
+					EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_acquire(dquot);
-	err = ext3_journal_stop(handle);
+	err = ext4_journal_stop(handle);
 	if (!ret)
 		ret = err;
 	return ret;
 }
 
-static int ext3_release_dquot(struct dquot *dquot)
+static int ext4_release_dquot(struct dquot *dquot)
 {
 	int ret, err;
 	handle_t *handle;
 
-	handle = ext3_journal_start(dquot_to_inode(dquot),
-					EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb));
+	handle = ext4_journal_start(dquot_to_inode(dquot),
+					EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_release(dquot);
-	err = ext3_journal_stop(handle);
+	err = ext4_journal_stop(handle);
 	if (!ret)
 		ret = err;
 	return ret;
 }
 
-static int ext3_mark_dquot_dirty(struct dquot *dquot)
+static int ext4_mark_dquot_dirty(struct dquot *dquot)
 {
 	/* Are we journalling quotas? */
-	if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
-	    EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
+	if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
+	    EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
 		dquot_mark_dquot_dirty(dquot);
-		return ext3_write_dquot(dquot);
+		return ext4_write_dquot(dquot);
 	} else {
 		return dquot_mark_dquot_dirty(dquot);
 	}
 }
 
-static int ext3_write_info(struct super_block *sb, int type)
+static int ext4_write_info(struct super_block *sb, int type)
 {
 	int ret, err;
 	handle_t *handle;
 
 	/* Data block + inode block */
-	handle = ext3_journal_start(sb->s_root->d_inode, 2);
+	handle = ext4_journal_start(sb->s_root->d_inode, 2);
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_commit_info(sb, type);
-	err = ext3_journal_stop(handle);
+	err = ext4_journal_stop(handle);
 	if (!ret)
 		ret = err;
 	return ret;
@@ -2565,16 +2565,16 @@ static int ext3_write_info(struct super_block *sb, int type)
  * Turn on quotas during mount time - we need to find
  * the quota file and such...
  */
-static int ext3_quota_on_mount(struct super_block *sb, int type)
+static int ext4_quota_on_mount(struct super_block *sb, int type)
 {
-	return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
-			EXT3_SB(sb)->s_jquota_fmt, type);
+	return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
+			EXT4_SB(sb)->s_jquota_fmt, type);
 }
 
 /*
  * Standard function to be called on quota_on
  */
-static int ext3_quota_on(struct super_block *sb, int type, int format_id,
+static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 			 char *path)
 {
 	int err;
@@ -2583,8 +2583,8 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
 	if (!test_opt(sb, QUOTA))
 		return -EINVAL;
 	/* Not journalling quota? */
-	if (!EXT3_SB(sb)->s_qf_names[USRQUOTA] &&
-	    !EXT3_SB(sb)->s_qf_names[GRPQUOTA])
+	if (!EXT4_SB(sb)->s_qf_names[USRQUOTA] &&
+	    !EXT4_SB(sb)->s_qf_names[GRPQUOTA])
 		return vfs_quota_on(sb, type, format_id, path);
 	err = path_lookup(path, LOOKUP_FOLLOW, &nd);
 	if (err)
@@ -2597,7 +2597,7 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
 	/* Quotafile not of fs root? */
 	if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode)
 		printk(KERN_WARNING
-			"EXT3-fs: Quota file not on filesystem root. "
+			"EXT4-fs: Quota file not on filesystem root. "
 			"Journalled quota will not work.\n");
 	path_release(&nd);
 	return vfs_quota_on(sb, type, format_id, path);
@@ -2607,11 +2607,11 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
  * acquiring the locks... As quota files are never truncated and quota code
  * itself serializes the operations (and noone else should touch the files)
  * we don't have to be afraid of races */
-static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
+static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
 			       size_t len, loff_t off)
 {
 	struct inode *inode = sb_dqopt(sb)->files[type];
-	sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
+	sector_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
 	int err = 0;
 	int offset = off & (sb->s_blocksize - 1);
 	int tocopy;
@@ -2627,7 +2627,7 @@ static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
 	while (toread > 0) {
 		tocopy = sb->s_blocksize - offset < toread ?
 				sb->s_blocksize - offset : toread;
-		bh = ext3_bread(NULL, inode, blk, 0, &err);
+		bh = ext4_bread(NULL, inode, blk, 0, &err);
 		if (err)
 			return err;
 		if (!bh)	/* A hole? */
@@ -2645,15 +2645,15 @@ static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
 
 /* Write to quotafile (we know the transaction is already started and has
  * enough credits) */
-static ssize_t ext3_quota_write(struct super_block *sb, int type,
+static ssize_t ext4_quota_write(struct super_block *sb, int type,
 				const char *data, size_t len, loff_t off)
 {
 	struct inode *inode = sb_dqopt(sb)->files[type];
-	sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
+	sector_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
 	int err = 0;
 	int offset = off & (sb->s_blocksize - 1);
 	int tocopy;
-	int journal_quota = EXT3_SB(sb)->s_qf_names[type] != NULL;
+	int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL;
 	size_t towrite = len;
 	struct buffer_head *bh;
 	handle_t *handle = journal_current_handle();
@@ -2662,11 +2662,11 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
 	while (towrite > 0) {
 		tocopy = sb->s_blocksize - offset < towrite ?
 				sb->s_blocksize - offset : towrite;
-		bh = ext3_bread(handle, inode, blk, 1, &err);
+		bh = ext4_bread(handle, inode, blk, 1, &err);
 		if (!bh)
 			goto out;
 		if (journal_quota) {
-			err = ext3_journal_get_write_access(handle, bh);
+			err = ext4_journal_get_write_access(handle, bh);
 			if (err) {
 				brelse(bh);
 				goto out;
@@ -2677,10 +2677,10 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
 		flush_dcache_page(bh->b_page);
 		unlock_buffer(bh);
 		if (journal_quota)
-			err = ext3_journal_dirty_metadata(handle, bh);
+			err = ext4_journal_dirty_metadata(handle, bh);
 		else {
 			/* Always do at least ordered writes for quotas */
-			err = ext3_journal_dirty_data(handle, bh);
+			err = ext4_journal_dirty_data(handle, bh);
 			mark_buffer_dirty(bh);
 		}
 		brelse(bh);
@@ -2696,59 +2696,59 @@ out:
 		return err;
 	if (inode->i_size < off+len-towrite) {
 		i_size_write(inode, off+len-towrite);
-		EXT3_I(inode)->i_disksize = inode->i_size;
+		EXT4_I(inode)->i_disksize = inode->i_size;
 	}
 	inode->i_version++;
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-	ext3_mark_inode_dirty(handle, inode);
+	ext4_mark_inode_dirty(handle, inode);
 	mutex_unlock(&inode->i_mutex);
 	return len - towrite;
 }
 
 #endif
 
-static int ext3_get_sb(struct file_system_type *fs_type,
+static int ext4_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt);
+	return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
 }
 
-static struct file_system_type ext3_fs_type = {
+static struct file_system_type ext4dev_fs_type = {
 	.owner		= THIS_MODULE,
-	.name		= "ext3",
-	.get_sb		= ext3_get_sb,
+	.name		= "ext4dev",
+	.get_sb		= ext4_get_sb,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
-static int __init init_ext3_fs(void)
+static int __init init_ext4_fs(void)
 {
-	int err = init_ext3_xattr();
+	int err = init_ext4_xattr();
 	if (err)
 		return err;
 	err = init_inodecache();
 	if (err)
 		goto out1;
-        err = register_filesystem(&ext3_fs_type);
+        err = register_filesystem(&ext4dev_fs_type);
 	if (err)
 		goto out;
 	return 0;
 out:
 	destroy_inodecache();
 out1:
-	exit_ext3_xattr();
+	exit_ext4_xattr();
 	return err;
 }
 
-static void __exit exit_ext3_fs(void)
+static void __exit exit_ext4_fs(void)
 {
-	unregister_filesystem(&ext3_fs_type);
+	unregister_filesystem(&ext4dev_fs_type);
 	destroy_inodecache();
-	exit_ext3_xattr();
+	exit_ext4_xattr();
 }
 
 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
-MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
+MODULE_DESCRIPTION("Fourth Extended Filesystem with extents");
 MODULE_LICENSE("GPL");
-module_init(init_ext3_fs)
-module_exit(exit_ext3_fs)
+module_init(init_ext4_fs)
+module_exit(exit_ext4_fs)
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index 4f79122cde67..9e4c75f912f7 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/ext3/symlink.c
+ *  linux/fs/ext4/symlink.c
  *
  * Only fast symlinks left here - the rest is done by generic code. AV, 1999
  *
@@ -14,41 +14,41 @@
  *
  *  Copyright (C) 1991, 1992  Linus Torvalds
  *
- *  ext3 symlink handling code
+ *  ext4 symlink handling code
  */
 
 #include <linux/fs.h>
 #include <linux/jbd.h>
-#include <linux/ext3_fs.h>
+#include <linux/ext4_fs.h>
 #include <linux/namei.h>
 #include "xattr.h"
 
-static void * ext3_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-	struct ext3_inode_info *ei = EXT3_I(dentry->d_inode);
+	struct ext4_inode_info *ei = EXT4_I(dentry->d_inode);
 	nd_set_link(nd, (char*)ei->i_data);
 	return NULL;
 }
 
-struct inode_operations ext3_symlink_inode_operations = {
+struct inode_operations ext4_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= page_follow_link_light,
 	.put_link	= page_put_link,
-#ifdef CONFIG_EXT3_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
-	.listxattr	= ext3_listxattr,
+	.listxattr	= ext4_listxattr,
 	.removexattr	= generic_removexattr,
 #endif
 };
 
-struct inode_operations ext3_fast_symlink_inode_operations = {
+struct inode_operations ext4_fast_symlink_inode_operations = {
 	.readlink	= generic_readlink,
-	.follow_link	= ext3_follow_link,
-#ifdef CONFIG_EXT3_FS_XATTR
+	.follow_link	= ext4_follow_link,
+#ifdef CONFIG_EXT4DEV_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
-	.listxattr	= ext3_listxattr,
+	.listxattr	= ext4_listxattr,
 	.removexattr	= generic_removexattr,
 #endif
 };
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index f86f2482f01d..d3a408154101 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1,10 +1,10 @@
 /*
- * linux/fs/ext3/xattr.c
+ * linux/fs/ext4/xattr.c
  *
  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
  *
  * Fix by Harrison Xing <harrison@mountainviewdata.com>.
- * Ext3 code with a lot of help from Eric Jarman <ejarman@acm.org>.
+ * Ext4 code with a lot of help from Eric Jarman <ejarman@acm.org>.
  * Extended attributes for symlinks and special files added per
  *  suggestion of Luka Renko <luka.renko@hermes.si>.
  * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
@@ -43,7 +43,7 @@
  *
  * Locking strategy
  * ----------------
- * EXT3_I(inode)->i_file_acl is protected by EXT3_I(inode)->xattr_sem.
+ * EXT4_I(inode)->i_file_acl is protected by EXT4_I(inode)->xattr_sem.
  * EA blocks are only changed if they are exclusive to an inode, so
  * holding xattr_sem also means that nothing but the EA block's reference
  * count can change. Multiple writers to the same block are synchronized
@@ -53,27 +53,27 @@
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
-#include <linux/ext3_jbd.h>
-#include <linux/ext3_fs.h>
+#include <linux/ext4_jbd.h>
+#include <linux/ext4_fs.h>
 #include <linux/mbcache.h>
 #include <linux/quotaops.h>
 #include <linux/rwsem.h>
 #include "xattr.h"
 #include "acl.h"
 
-#define BHDR(bh) ((struct ext3_xattr_header *)((bh)->b_data))
-#define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr))
+#define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data))
+#define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr))
 #define BFIRST(bh) ENTRY(BHDR(bh)+1)
 #define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
 
 #define IHDR(inode, raw_inode) \
-	((struct ext3_xattr_ibody_header *) \
+	((struct ext4_xattr_ibody_header *) \
 		((void *)raw_inode + \
-		 EXT3_GOOD_OLD_INODE_SIZE + \
-		 EXT3_I(inode)->i_extra_isize))
-#define IFIRST(hdr) ((struct ext3_xattr_entry *)((hdr)+1))
+		 EXT4_GOOD_OLD_INODE_SIZE + \
+		 EXT4_I(inode)->i_extra_isize))
+#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
 
-#ifdef EXT3_XATTR_DEBUG
+#ifdef EXT4_XATTR_DEBUG
 # define ea_idebug(inode, f...) do { \
 		printk(KERN_DEBUG "inode %s:%lu: ", \
 			inode->i_sb->s_id, inode->i_ino); \
@@ -93,47 +93,47 @@
 # define ea_bdebug(f...)
 #endif
 
-static void ext3_xattr_cache_insert(struct buffer_head *);
-static struct buffer_head *ext3_xattr_cache_find(struct inode *,
-						 struct ext3_xattr_header *,
+static void ext4_xattr_cache_insert(struct buffer_head *);
+static struct buffer_head *ext4_xattr_cache_find(struct inode *,
+						 struct ext4_xattr_header *,
 						 struct mb_cache_entry **);
-static void ext3_xattr_rehash(struct ext3_xattr_header *,
-			      struct ext3_xattr_entry *);
+static void ext4_xattr_rehash(struct ext4_xattr_header *,
+			      struct ext4_xattr_entry *);
 
-static struct mb_cache *ext3_xattr_cache;
+static struct mb_cache *ext4_xattr_cache;
 
-static struct xattr_handler *ext3_xattr_handler_map[] = {
-	[EXT3_XATTR_INDEX_USER]		     = &ext3_xattr_user_handler,
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-	[EXT3_XATTR_INDEX_POSIX_ACL_ACCESS]  = &ext3_xattr_acl_access_handler,
-	[EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext3_xattr_acl_default_handler,
+static struct xattr_handler *ext4_xattr_handler_map[] = {
+	[EXT4_XATTR_INDEX_USER]		     = &ext4_xattr_user_handler,
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+	[EXT4_XATTR_INDEX_POSIX_ACL_ACCESS]  = &ext4_xattr_acl_access_handler,
+	[EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler,
 #endif
-	[EXT3_XATTR_INDEX_TRUSTED]	     = &ext3_xattr_trusted_handler,
-#ifdef CONFIG_EXT3_FS_SECURITY
-	[EXT3_XATTR_INDEX_SECURITY]	     = &ext3_xattr_security_handler,
+	[EXT4_XATTR_INDEX_TRUSTED]	     = &ext4_xattr_trusted_handler,
+#ifdef CONFIG_EXT4DEV_FS_SECURITY
+	[EXT4_XATTR_INDEX_SECURITY]	     = &ext4_xattr_security_handler,
 #endif
 };
 
-struct xattr_handler *ext3_xattr_handlers[] = {
-	&ext3_xattr_user_handler,
-	&ext3_xattr_trusted_handler,
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-	&ext3_xattr_acl_access_handler,
-	&ext3_xattr_acl_default_handler,
+struct xattr_handler *ext4_xattr_handlers[] = {
+	&ext4_xattr_user_handler,
+	&ext4_xattr_trusted_handler,
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+	&ext4_xattr_acl_access_handler,
+	&ext4_xattr_acl_default_handler,
 #endif
-#ifdef CONFIG_EXT3_FS_SECURITY
-	&ext3_xattr_security_handler,
+#ifdef CONFIG_EXT4DEV_FS_SECURITY
+	&ext4_xattr_security_handler,
 #endif
 	NULL
 };
 
 static inline struct xattr_handler *
-ext3_xattr_handler(int name_index)
+ext4_xattr_handler(int name_index)
 {
 	struct xattr_handler *handler = NULL;
 
-	if (name_index > 0 && name_index < ARRAY_SIZE(ext3_xattr_handler_map))
-		handler = ext3_xattr_handler_map[name_index];
+	if (name_index > 0 && name_index < ARRAY_SIZE(ext4_xattr_handler_map))
+		handler = ext4_xattr_handler_map[name_index];
 	return handler;
 }
 
@@ -143,16 +143,16 @@ ext3_xattr_handler(int name_index)
  * dentry->d_inode->i_mutex: don't care
  */
 ssize_t
-ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
+ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
 {
-	return ext3_xattr_list(dentry->d_inode, buffer, size);
+	return ext4_xattr_list(dentry->d_inode, buffer, size);
 }
 
 static int
-ext3_xattr_check_names(struct ext3_xattr_entry *entry, void *end)
+ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end)
 {
 	while (!IS_LAST_ENTRY(entry)) {
-		struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(entry);
+		struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(entry);
 		if ((void *)next >= end)
 			return -EIO;
 		entry = next;
@@ -161,19 +161,19 @@ ext3_xattr_check_names(struct ext3_xattr_entry *entry, void *end)
 }
 
 static inline int
-ext3_xattr_check_block(struct buffer_head *bh)
+ext4_xattr_check_block(struct buffer_head *bh)
 {
 	int error;
 
-	if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
+	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
 	    BHDR(bh)->h_blocks != cpu_to_le32(1))
 		return -EIO;
-	error = ext3_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
+	error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
 	return error;
 }
 
 static inline int
-ext3_xattr_check_entry(struct ext3_xattr_entry *entry, size_t size)
+ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size)
 {
 	size_t value_size = le32_to_cpu(entry->e_value_size);
 
@@ -184,10 +184,10 @@ ext3_xattr_check_entry(struct ext3_xattr_entry *entry, size_t size)
 }
 
 static int
-ext3_xattr_find_entry(struct ext3_xattr_entry **pentry, int name_index,
+ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index,
 		      const char *name, size_t size, int sorted)
 {
-	struct ext3_xattr_entry *entry;
+	struct ext4_xattr_entry *entry;
 	size_t name_len;
 	int cmp = 1;
 
@@ -195,7 +195,7 @@ ext3_xattr_find_entry(struct ext3_xattr_entry **pentry, int name_index,
 		return -EINVAL;
 	name_len = strlen(name);
 	entry = *pentry;
-	for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) {
+	for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
 		cmp = name_index - entry->e_name_index;
 		if (!cmp)
 			cmp = name_len - entry->e_name_len;
@@ -205,17 +205,17 @@ ext3_xattr_find_entry(struct ext3_xattr_entry **pentry, int name_index,
 			break;
 	}
 	*pentry = entry;
-	if (!cmp && ext3_xattr_check_entry(entry, size))
+	if (!cmp && ext4_xattr_check_entry(entry, size))
 			return -EIO;
 	return cmp ? -ENODATA : 0;
 }
 
 static int
-ext3_xattr_block_get(struct inode *inode, int name_index, const char *name,
+ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
 		     void *buffer, size_t buffer_size)
 {
 	struct buffer_head *bh = NULL;
-	struct ext3_xattr_entry *entry;
+	struct ext4_xattr_entry *entry;
 	size_t size;
 	int error;
 
@@ -223,24 +223,24 @@ ext3_xattr_block_get(struct inode *inode, int name_index, const char *name,
 		  name_index, name, buffer, (long)buffer_size);
 
 	error = -ENODATA;
-	if (!EXT3_I(inode)->i_file_acl)
+	if (!EXT4_I(inode)->i_file_acl)
 		goto cleanup;
-	ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl);
-	bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
+	ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl);
+	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
 	if (!bh)
 		goto cleanup;
 	ea_bdebug(bh, "b_count=%d, refcount=%d",
 		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
-	if (ext3_xattr_check_block(bh)) {
-bad_block:	ext3_error(inode->i_sb, __FUNCTION__,
+	if (ext4_xattr_check_block(bh)) {
+bad_block:	ext4_error(inode->i_sb, __FUNCTION__,
 			   "inode %lu: bad block "E3FSBLK, inode->i_ino,
-			   EXT3_I(inode)->i_file_acl);
+			   EXT4_I(inode)->i_file_acl);
 		error = -EIO;
 		goto cleanup;
 	}
-	ext3_xattr_cache_insert(bh);
+	ext4_xattr_cache_insert(bh);
 	entry = BFIRST(bh);
-	error = ext3_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
+	error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
 	if (error == -EIO)
 		goto bad_block;
 	if (error)
@@ -261,30 +261,30 @@ cleanup:
 }
 
 static int
-ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
+ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
 		     void *buffer, size_t buffer_size)
 {
-	struct ext3_xattr_ibody_header *header;
-	struct ext3_xattr_entry *entry;
-	struct ext3_inode *raw_inode;
-	struct ext3_iloc iloc;
+	struct ext4_xattr_ibody_header *header;
+	struct ext4_xattr_entry *entry;
+	struct ext4_inode *raw_inode;
+	struct ext4_iloc iloc;
 	size_t size;
 	void *end;
 	int error;
 
-	if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR))
+	if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR))
 		return -ENODATA;
-	error = ext3_get_inode_loc(inode, &iloc);
+	error = ext4_get_inode_loc(inode, &iloc);
 	if (error)
 		return error;
-	raw_inode = ext3_raw_inode(&iloc);
+	raw_inode = ext4_raw_inode(&iloc);
 	header = IHDR(inode, raw_inode);
 	entry = IFIRST(header);
-	end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
-	error = ext3_xattr_check_names(entry, end);
+	end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
+	error = ext4_xattr_check_names(entry, end);
 	if (error)
 		goto cleanup;
-	error = ext3_xattr_find_entry(&entry, name_index, name,
+	error = ext4_xattr_find_entry(&entry, name_index, name,
 				      end - (void *)entry, 0);
 	if (error)
 		goto cleanup;
@@ -304,7 +304,7 @@ cleanup:
 }
 
 /*
- * ext3_xattr_get()
+ * ext4_xattr_get()
  *
  * Copy an extended attribute into the buffer
  * provided, or compute the buffer size required.
@@ -314,30 +314,30 @@ cleanup:
  * used / required on success.
  */
 int
-ext3_xattr_get(struct inode *inode, int name_index, const char *name,
+ext4_xattr_get(struct inode *inode, int name_index, const char *name,
 	       void *buffer, size_t buffer_size)
 {
 	int error;
 
-	down_read(&EXT3_I(inode)->xattr_sem);
-	error = ext3_xattr_ibody_get(inode, name_index, name, buffer,
+	down_read(&EXT4_I(inode)->xattr_sem);
+	error = ext4_xattr_ibody_get(inode, name_index, name, buffer,
 				     buffer_size);
 	if (error == -ENODATA)
-		error = ext3_xattr_block_get(inode, name_index, name, buffer,
+		error = ext4_xattr_block_get(inode, name_index, name, buffer,
 					     buffer_size);
-	up_read(&EXT3_I(inode)->xattr_sem);
+	up_read(&EXT4_I(inode)->xattr_sem);
 	return error;
 }
 
 static int
-ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
+ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry,
 			char *buffer, size_t buffer_size)
 {
 	size_t rest = buffer_size;
 
-	for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) {
+	for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
 		struct xattr_handler *handler =
-			ext3_xattr_handler(entry->e_name_index);
+			ext4_xattr_handler(entry->e_name_index);
 
 		if (handler) {
 			size_t size = handler->list(inode, buffer, rest,
@@ -355,7 +355,7 @@ ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
 }
 
 static int
-ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
 {
 	struct buffer_head *bh = NULL;
 	int error;
@@ -364,24 +364,24 @@ ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
 		  buffer, (long)buffer_size);
 
 	error = 0;
-	if (!EXT3_I(inode)->i_file_acl)
+	if (!EXT4_I(inode)->i_file_acl)
 		goto cleanup;
-	ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl);
-	bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
+	ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl);
+	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
 	error = -EIO;
 	if (!bh)
 		goto cleanup;
 	ea_bdebug(bh, "b_count=%d, refcount=%d",
 		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
-	if (ext3_xattr_check_block(bh)) {
-		ext3_error(inode->i_sb, __FUNCTION__,
+	if (ext4_xattr_check_block(bh)) {
+		ext4_error(inode->i_sb, __FUNCTION__,
 			   "inode %lu: bad block "E3FSBLK, inode->i_ino,
-			   EXT3_I(inode)->i_file_acl);
+			   EXT4_I(inode)->i_file_acl);
 		error = -EIO;
 		goto cleanup;
 	}
-	ext3_xattr_cache_insert(bh);
-	error = ext3_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size);
+	ext4_xattr_cache_insert(bh);
+	error = ext4_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size);
 
 cleanup:
 	brelse(bh);
@@ -390,26 +390,26 @@ cleanup:
 }
 
 static int
-ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
 {
-	struct ext3_xattr_ibody_header *header;
-	struct ext3_inode *raw_inode;
-	struct ext3_iloc iloc;
+	struct ext4_xattr_ibody_header *header;
+	struct ext4_inode *raw_inode;
+	struct ext4_iloc iloc;
 	void *end;
 	int error;
 
-	if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR))
+	if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR))
 		return 0;
-	error = ext3_get_inode_loc(inode, &iloc);
+	error = ext4_get_inode_loc(inode, &iloc);
 	if (error)
 		return error;
-	raw_inode = ext3_raw_inode(&iloc);
+	raw_inode = ext4_raw_inode(&iloc);
 	header = IHDR(inode, raw_inode);
-	end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
-	error = ext3_xattr_check_names(IFIRST(header), end);
+	end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
+	error = ext4_xattr_check_names(IFIRST(header), end);
 	if (error)
 		goto cleanup;
-	error = ext3_xattr_list_entries(inode, IFIRST(header),
+	error = ext4_xattr_list_entries(inode, IFIRST(header),
 					buffer, buffer_size);
 
 cleanup:
@@ -418,7 +418,7 @@ cleanup:
 }
 
 /*
- * ext3_xattr_list()
+ * ext4_xattr_list()
  *
  * Copy a list of attribute names into the buffer
  * provided, or compute the buffer size required.
@@ -428,12 +428,12 @@ cleanup:
  * used / required on success.
  */
 int
-ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
 {
 	int i_error, b_error;
 
-	down_read(&EXT3_I(inode)->xattr_sem);
-	i_error = ext3_xattr_ibody_list(inode, buffer, buffer_size);
+	down_read(&EXT4_I(inode)->xattr_sem);
+	i_error = ext4_xattr_ibody_list(inode, buffer, buffer_size);
 	if (i_error < 0) {
 		b_error = 0;
 	} else {
@@ -441,30 +441,30 @@ ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
 			buffer += i_error;
 			buffer_size -= i_error;
 		}
-		b_error = ext3_xattr_block_list(inode, buffer, buffer_size);
+		b_error = ext4_xattr_block_list(inode, buffer, buffer_size);
 		if (b_error < 0)
 			i_error = 0;
 	}
-	up_read(&EXT3_I(inode)->xattr_sem);
+	up_read(&EXT4_I(inode)->xattr_sem);
 	return i_error + b_error;
 }
 
 /*
- * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is
+ * If the EXT4_FEATURE_COMPAT_EXT_ATTR feature of this file system is
  * not set, set it.
  */
-static void ext3_xattr_update_super_block(handle_t *handle,
+static void ext4_xattr_update_super_block(handle_t *handle,
 					  struct super_block *sb)
 {
-	if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR))
+	if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR))
 		return;
 
 	lock_super(sb);
-	if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) {
-		EXT3_SB(sb)->s_es->s_feature_compat |=
-			cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR);
+	if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
+		EXT4_SB(sb)->s_es->s_feature_compat |=
+			cpu_to_le32(EXT4_FEATURE_COMPAT_EXT_ATTR);
 		sb->s_dirt = 1;
-		ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+		ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
 	}
 	unlock_super(sb);
 }
@@ -474,25 +474,25 @@ static void ext3_xattr_update_super_block(handle_t *handle,
  * it; otherwise free the block.
  */
 static void
-ext3_xattr_release_block(handle_t *handle, struct inode *inode,
+ext4_xattr_release_block(handle_t *handle, struct inode *inode,
 			 struct buffer_head *bh)
 {
 	struct mb_cache_entry *ce = NULL;
 
-	ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, bh->b_blocknr);
+	ce = mb_cache_entry_get(ext4_xattr_cache, bh->b_bdev, bh->b_blocknr);
 	if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
 		ea_bdebug(bh, "refcount now=0; freeing");
 		if (ce)
 			mb_cache_entry_free(ce);
-		ext3_free_blocks(handle, inode, bh->b_blocknr, 1);
+		ext4_free_blocks(handle, inode, bh->b_blocknr, 1);
 		get_bh(bh);
-		ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
+		ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
 	} else {
-		if (ext3_journal_get_write_access(handle, bh) == 0) {
+		if (ext4_journal_get_write_access(handle, bh) == 0) {
 			lock_buffer(bh);
 			BHDR(bh)->h_refcount = cpu_to_le32(
 				le32_to_cpu(BHDR(bh)->h_refcount) - 1);
-			ext3_journal_dirty_metadata(handle, bh);
+			ext4_journal_dirty_metadata(handle, bh);
 			if (IS_SYNC(inode))
 				handle->h_sync = 1;
 			DQUOT_FREE_BLOCK(inode, 1);
@@ -505,30 +505,30 @@ ext3_xattr_release_block(handle_t *handle, struct inode *inode,
 	}
 }
 
-struct ext3_xattr_info {
+struct ext4_xattr_info {
 	int name_index;
 	const char *name;
 	const void *value;
 	size_t value_len;
 };
 
-struct ext3_xattr_search {
-	struct ext3_xattr_entry *first;
+struct ext4_xattr_search {
+	struct ext4_xattr_entry *first;
 	void *base;
 	void *end;
-	struct ext3_xattr_entry *here;
+	struct ext4_xattr_entry *here;
 	int not_found;
 };
 
 static int
-ext3_xattr_set_entry(struct ext3_xattr_info *i, struct ext3_xattr_search *s)
+ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
 {
-	struct ext3_xattr_entry *last;
+	struct ext4_xattr_entry *last;
 	size_t free, min_offs = s->end - s->base, name_len = strlen(i->name);
 
 	/* Compute min_offs and last. */
 	last = s->first;
-	for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) {
+	for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
 		if (!last->e_value_block && last->e_value_size) {
 			size_t offs = le16_to_cpu(last->e_value_offs);
 			if (offs < min_offs)
@@ -539,20 +539,20 @@ ext3_xattr_set_entry(struct ext3_xattr_info *i, struct ext3_xattr_search *s)
 	if (!s->not_found) {
 		if (!s->here->e_value_block && s->here->e_value_size) {
 			size_t size = le32_to_cpu(s->here->e_value_size);
-			free += EXT3_XATTR_SIZE(size);
+			free += EXT4_XATTR_SIZE(size);
 		}
-		free += EXT3_XATTR_LEN(name_len);
+		free += EXT4_XATTR_LEN(name_len);
 	}
 	if (i->value) {
-		if (free < EXT3_XATTR_SIZE(i->value_len) ||
-		    free < EXT3_XATTR_LEN(name_len) +
-			   EXT3_XATTR_SIZE(i->value_len))
+		if (free < EXT4_XATTR_SIZE(i->value_len) ||
+		    free < EXT4_XATTR_LEN(name_len) +
+			   EXT4_XATTR_SIZE(i->value_len))
 			return -ENOSPC;
 	}
 
 	if (i->value && s->not_found) {
 		/* Insert the new name. */
-		size_t size = EXT3_XATTR_LEN(name_len);
+		size_t size = EXT4_XATTR_LEN(name_len);
 		size_t rest = (void *)last - (void *)s->here + sizeof(__u32);
 		memmove((void *)s->here + size, s->here, rest);
 		memset(s->here, 0, size);
@@ -564,16 +564,16 @@ ext3_xattr_set_entry(struct ext3_xattr_info *i, struct ext3_xattr_search *s)
 			void *first_val = s->base + min_offs;
 			size_t offs = le16_to_cpu(s->here->e_value_offs);
 			void *val = s->base + offs;
-			size_t size = EXT3_XATTR_SIZE(
+			size_t size = EXT4_XATTR_SIZE(
 				le32_to_cpu(s->here->e_value_size));
 
-			if (i->value && size == EXT3_XATTR_SIZE(i->value_len)) {
+			if (i->value && size == EXT4_XATTR_SIZE(i->value_len)) {
 				/* The old and the new value have the same
 				   size. Just replace. */
 				s->here->e_value_size =
 					cpu_to_le32(i->value_len);
-				memset(val + size - EXT3_XATTR_PAD, 0,
-				       EXT3_XATTR_PAD); /* Clear pad bytes. */
+				memset(val + size - EXT4_XATTR_PAD, 0,
+				       EXT4_XATTR_PAD); /* Clear pad bytes. */
 				memcpy(val, i->value, i->value_len);
 				return 0;
 			}
@@ -593,12 +593,12 @@ ext3_xattr_set_entry(struct ext3_xattr_info *i, struct ext3_xattr_search *s)
 				    last->e_value_size && o < offs)
 					last->e_value_offs =
 						cpu_to_le16(o + size);
-				last = EXT3_XATTR_NEXT(last);
+				last = EXT4_XATTR_NEXT(last);
 			}
 		}
 		if (!i->value) {
 			/* Remove the old name. */
-			size_t size = EXT3_XATTR_LEN(name_len);
+			size_t size = EXT4_XATTR_LEN(name_len);
 			last = ENTRY((void *)last - size);
 			memmove(s->here, (void *)s->here + size,
 				(void *)last - (void *)s->here + sizeof(__u32));
@@ -610,25 +610,25 @@ ext3_xattr_set_entry(struct ext3_xattr_info *i, struct ext3_xattr_search *s)
 		/* Insert the new value. */
 		s->here->e_value_size = cpu_to_le32(i->value_len);
 		if (i->value_len) {
-			size_t size = EXT3_XATTR_SIZE(i->value_len);
+			size_t size = EXT4_XATTR_SIZE(i->value_len);
 			void *val = s->base + min_offs - size;
 			s->here->e_value_offs = cpu_to_le16(min_offs - size);
-			memset(val + size - EXT3_XATTR_PAD, 0,
-			       EXT3_XATTR_PAD); /* Clear the pad bytes. */
+			memset(val + size - EXT4_XATTR_PAD, 0,
+			       EXT4_XATTR_PAD); /* Clear the pad bytes. */
 			memcpy(val, i->value, i->value_len);
 		}
 	}
 	return 0;
 }
 
-struct ext3_xattr_block_find {
-	struct ext3_xattr_search s;
+struct ext4_xattr_block_find {
+	struct ext4_xattr_search s;
 	struct buffer_head *bh;
 };
 
 static int
-ext3_xattr_block_find(struct inode *inode, struct ext3_xattr_info *i,
-		      struct ext3_xattr_block_find *bs)
+ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
+		      struct ext4_xattr_block_find *bs)
 {
 	struct super_block *sb = inode->i_sb;
 	int error;
@@ -636,19 +636,19 @@ ext3_xattr_block_find(struct inode *inode, struct ext3_xattr_info *i,
 	ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
 		  i->name_index, i->name, i->value, (long)i->value_len);
 
-	if (EXT3_I(inode)->i_file_acl) {
+	if (EXT4_I(inode)->i_file_acl) {
 		/* The inode already has an extended attribute block. */
-		bs->bh = sb_bread(sb, EXT3_I(inode)->i_file_acl);
+		bs->bh = sb_bread(sb, EXT4_I(inode)->i_file_acl);
 		error = -EIO;
 		if (!bs->bh)
 			goto cleanup;
 		ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
 			atomic_read(&(bs->bh->b_count)),
 			le32_to_cpu(BHDR(bs->bh)->h_refcount));
-		if (ext3_xattr_check_block(bs->bh)) {
-			ext3_error(sb, __FUNCTION__,
+		if (ext4_xattr_check_block(bs->bh)) {
+			ext4_error(sb, __FUNCTION__,
 				"inode %lu: bad block "E3FSBLK, inode->i_ino,
-				EXT3_I(inode)->i_file_acl);
+				EXT4_I(inode)->i_file_acl);
 			error = -EIO;
 			goto cleanup;
 		}
@@ -657,7 +657,7 @@ ext3_xattr_block_find(struct inode *inode, struct ext3_xattr_info *i,
 		bs->s.first = BFIRST(bs->bh);
 		bs->s.end = bs->bh->b_data + bs->bh->b_size;
 		bs->s.here = bs->s.first;
-		error = ext3_xattr_find_entry(&bs->s.here, i->name_index,
+		error = ext4_xattr_find_entry(&bs->s.here, i->name_index,
 					      i->name, bs->bh->b_size, 1);
 		if (error && error != -ENODATA)
 			goto cleanup;
@@ -670,22 +670,22 @@ cleanup:
 }
 
 static int
-ext3_xattr_block_set(handle_t *handle, struct inode *inode,
-		     struct ext3_xattr_info *i,
-		     struct ext3_xattr_block_find *bs)
+ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+		     struct ext4_xattr_info *i,
+		     struct ext4_xattr_block_find *bs)
 {
 	struct super_block *sb = inode->i_sb;
 	struct buffer_head *new_bh = NULL;
-	struct ext3_xattr_search *s = &bs->s;
+	struct ext4_xattr_search *s = &bs->s;
 	struct mb_cache_entry *ce = NULL;
 	int error;
 
-#define header(x) ((struct ext3_xattr_header *)(x))
+#define header(x) ((struct ext4_xattr_header *)(x))
 
 	if (i->value && i->value_len > sb->s_blocksize)
 		return -ENOSPC;
 	if (s->base) {
-		ce = mb_cache_entry_get(ext3_xattr_cache, bs->bh->b_bdev,
+		ce = mb_cache_entry_get(ext4_xattr_cache, bs->bh->b_bdev,
 					bs->bh->b_blocknr);
 		if (header(s->base)->h_refcount == cpu_to_le32(1)) {
 			if (ce) {
@@ -693,22 +693,22 @@ ext3_xattr_block_set(handle_t *handle, struct inode *inode,
 				ce = NULL;
 			}
 			ea_bdebug(bs->bh, "modifying in-place");
-			error = ext3_journal_get_write_access(handle, bs->bh);
+			error = ext4_journal_get_write_access(handle, bs->bh);
 			if (error)
 				goto cleanup;
 			lock_buffer(bs->bh);
-			error = ext3_xattr_set_entry(i, s);
+			error = ext4_xattr_set_entry(i, s);
 			if (!error) {
 				if (!IS_LAST_ENTRY(s->first))
-					ext3_xattr_rehash(header(s->base),
+					ext4_xattr_rehash(header(s->base),
 							  s->here);
-				ext3_xattr_cache_insert(bs->bh);
+				ext4_xattr_cache_insert(bs->bh);
 			}
 			unlock_buffer(bs->bh);
 			if (error == -EIO)
 				goto bad_block;
 			if (!error)
-				error = ext3_journal_dirty_metadata(handle,
+				error = ext4_journal_dirty_metadata(handle,
 								    bs->bh);
 			if (error)
 				goto cleanup;
@@ -739,7 +739,7 @@ ext3_xattr_block_set(handle_t *handle, struct inode *inode,
 		if (s->base == NULL)
 			goto cleanup;
 		memset(s->base, 0, sb->s_blocksize);
-		header(s->base)->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
+		header(s->base)->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
 		header(s->base)->h_blocks = cpu_to_le32(1);
 		header(s->base)->h_refcount = cpu_to_le32(1);
 		s->first = ENTRY(header(s->base)+1);
@@ -747,17 +747,17 @@ ext3_xattr_block_set(handle_t *handle, struct inode *inode,
 		s->end = s->base + sb->s_blocksize;
 	}
 
-	error = ext3_xattr_set_entry(i, s);
+	error = ext4_xattr_set_entry(i, s);
 	if (error == -EIO)
 		goto bad_block;
 	if (error)
 		goto cleanup;
 	if (!IS_LAST_ENTRY(s->first))
-		ext3_xattr_rehash(header(s->base), s->here);
+		ext4_xattr_rehash(header(s->base), s->here);
 
 inserted:
 	if (!IS_LAST_ENTRY(s->first)) {
-		new_bh = ext3_xattr_cache_find(inode, header(s->base), &ce);
+		new_bh = ext4_xattr_cache_find(inode, header(s->base), &ce);
 		if (new_bh) {
 			/* We found an identical block in the cache. */
 			if (new_bh == bs->bh)
@@ -768,7 +768,7 @@ inserted:
 				error = -EDQUOT;
 				if (DQUOT_ALLOC_BLOCK(inode, 1))
 					goto cleanup;
-				error = ext3_journal_get_write_access(handle,
+				error = ext4_journal_get_write_access(handle,
 								      new_bh);
 				if (error)
 					goto cleanup_dquot;
@@ -778,7 +778,7 @@ inserted:
 				ea_bdebug(new_bh, "reusing; refcount now=%d",
 					le32_to_cpu(BHDR(new_bh)->h_refcount));
 				unlock_buffer(new_bh);
-				error = ext3_journal_dirty_metadata(handle,
+				error = ext4_journal_dirty_metadata(handle,
 								    new_bh);
 				if (error)
 					goto cleanup_dquot;
@@ -792,11 +792,11 @@ inserted:
 			get_bh(new_bh);
 		} else {
 			/* We need to allocate a new block */
-			ext3_fsblk_t goal = le32_to_cpu(
-					EXT3_SB(sb)->s_es->s_first_data_block) +
-				(ext3_fsblk_t)EXT3_I(inode)->i_block_group *
-				EXT3_BLOCKS_PER_GROUP(sb);
-			ext3_fsblk_t block = ext3_new_block(handle, inode,
+			ext4_fsblk_t goal = le32_to_cpu(
+					EXT4_SB(sb)->s_es->s_first_data_block) +
+				(ext4_fsblk_t)EXT4_I(inode)->i_block_group *
+				EXT4_BLOCKS_PER_GROUP(sb);
+			ext4_fsblk_t block = ext4_new_block(handle, inode,
 							goal, &error);
 			if (error)
 				goto cleanup;
@@ -805,12 +805,12 @@ inserted:
 			new_bh = sb_getblk(sb, block);
 			if (!new_bh) {
 getblk_failed:
-				ext3_free_blocks(handle, inode, block, 1);
+				ext4_free_blocks(handle, inode, block, 1);
 				error = -EIO;
 				goto cleanup;
 			}
 			lock_buffer(new_bh);
-			error = ext3_journal_get_create_access(handle, new_bh);
+			error = ext4_journal_get_create_access(handle, new_bh);
 			if (error) {
 				unlock_buffer(new_bh);
 				goto getblk_failed;
@@ -818,19 +818,19 @@ getblk_failed:
 			memcpy(new_bh->b_data, s->base, new_bh->b_size);
 			set_buffer_uptodate(new_bh);
 			unlock_buffer(new_bh);
-			ext3_xattr_cache_insert(new_bh);
-			error = ext3_journal_dirty_metadata(handle, new_bh);
+			ext4_xattr_cache_insert(new_bh);
+			error = ext4_journal_dirty_metadata(handle, new_bh);
 			if (error)
 				goto cleanup;
 		}
 	}
 
 	/* Update the inode. */
-	EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
+	EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
 
 	/* Drop the previous xattr block. */
 	if (bs->bh && bs->bh != new_bh)
-		ext3_xattr_release_block(handle, inode, bs->bh);
+		ext4_xattr_release_block(handle, inode, bs->bh);
 	error = 0;
 
 cleanup:
@@ -847,40 +847,40 @@ cleanup_dquot:
 	goto cleanup;
 
 bad_block:
-	ext3_error(inode->i_sb, __FUNCTION__,
+	ext4_error(inode->i_sb, __FUNCTION__,
 		   "inode %lu: bad block "E3FSBLK, inode->i_ino,
-		   EXT3_I(inode)->i_file_acl);
+		   EXT4_I(inode)->i_file_acl);
 	goto cleanup;
 
 #undef header
 }
 
-struct ext3_xattr_ibody_find {
-	struct ext3_xattr_search s;
-	struct ext3_iloc iloc;
+struct ext4_xattr_ibody_find {
+	struct ext4_xattr_search s;
+	struct ext4_iloc iloc;
 };
 
 static int
-ext3_xattr_ibody_find(struct inode *inode, struct ext3_xattr_info *i,
-		      struct ext3_xattr_ibody_find *is)
+ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
+		      struct ext4_xattr_ibody_find *is)
 {
-	struct ext3_xattr_ibody_header *header;
-	struct ext3_inode *raw_inode;
+	struct ext4_xattr_ibody_header *header;
+	struct ext4_inode *raw_inode;
 	int error;
 
-	if (EXT3_I(inode)->i_extra_isize == 0)
+	if (EXT4_I(inode)->i_extra_isize == 0)
 		return 0;
-	raw_inode = ext3_raw_inode(&is->iloc);
+	raw_inode = ext4_raw_inode(&is->iloc);
 	header = IHDR(inode, raw_inode);
 	is->s.base = is->s.first = IFIRST(header);
 	is->s.here = is->s.first;
-	is->s.end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
-	if (EXT3_I(inode)->i_state & EXT3_STATE_XATTR) {
-		error = ext3_xattr_check_names(IFIRST(header), is->s.end);
+	is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
+	if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) {
+		error = ext4_xattr_check_names(IFIRST(header), is->s.end);
 		if (error)
 			return error;
 		/* Find the named attribute. */
-		error = ext3_xattr_find_entry(&is->s.here, i->name_index,
+		error = ext4_xattr_find_entry(&is->s.here, i->name_index,
 					      i->name, is->s.end -
 					      (void *)is->s.base, 0);
 		if (error && error != -ENODATA)
@@ -891,32 +891,32 @@ ext3_xattr_ibody_find(struct inode *inode, struct ext3_xattr_info *i,
 }
 
 static int
-ext3_xattr_ibody_set(handle_t *handle, struct inode *inode,
-		     struct ext3_xattr_info *i,
-		     struct ext3_xattr_ibody_find *is)
+ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
+		     struct ext4_xattr_info *i,
+		     struct ext4_xattr_ibody_find *is)
 {
-	struct ext3_xattr_ibody_header *header;
-	struct ext3_xattr_search *s = &is->s;
+	struct ext4_xattr_ibody_header *header;
+	struct ext4_xattr_search *s = &is->s;
 	int error;
 
-	if (EXT3_I(inode)->i_extra_isize == 0)
+	if (EXT4_I(inode)->i_extra_isize == 0)
 		return -ENOSPC;
-	error = ext3_xattr_set_entry(i, s);
+	error = ext4_xattr_set_entry(i, s);
 	if (error)
 		return error;
-	header = IHDR(inode, ext3_raw_inode(&is->iloc));
+	header = IHDR(inode, ext4_raw_inode(&is->iloc));
 	if (!IS_LAST_ENTRY(s->first)) {
-		header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
-		EXT3_I(inode)->i_state |= EXT3_STATE_XATTR;
+		header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
+		EXT4_I(inode)->i_state |= EXT4_STATE_XATTR;
 	} else {
 		header->h_magic = cpu_to_le32(0);
-		EXT3_I(inode)->i_state &= ~EXT3_STATE_XATTR;
+		EXT4_I(inode)->i_state &= ~EXT4_STATE_XATTR;
 	}
 	return 0;
 }
 
 /*
- * ext3_xattr_set_handle()
+ * ext4_xattr_set_handle()
  *
  * Create, replace or remove an extended attribute for this inode. Buffer
  * is NULL to remove an existing extended attribute, and non-NULL to
@@ -928,21 +928,21 @@ ext3_xattr_ibody_set(handle_t *handle, struct inode *inode,
  * Returns 0, or a negative error number on failure.
  */
 int
-ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
+ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
 		      const char *name, const void *value, size_t value_len,
 		      int flags)
 {
-	struct ext3_xattr_info i = {
+	struct ext4_xattr_info i = {
 		.name_index = name_index,
 		.name = name,
 		.value = value,
 		.value_len = value_len,
 
 	};
-	struct ext3_xattr_ibody_find is = {
+	struct ext4_xattr_ibody_find is = {
 		.s = { .not_found = -ENODATA, },
 	};
-	struct ext3_xattr_block_find bs = {
+	struct ext4_xattr_block_find bs = {
 		.s = { .not_found = -ENODATA, },
 	};
 	int error;
@@ -951,22 +951,22 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
 		return -EINVAL;
 	if (strlen(name) > 255)
 		return -ERANGE;
-	down_write(&EXT3_I(inode)->xattr_sem);
-	error = ext3_get_inode_loc(inode, &is.iloc);
+	down_write(&EXT4_I(inode)->xattr_sem);
+	error = ext4_get_inode_loc(inode, &is.iloc);
 	if (error)
 		goto cleanup;
 
-	if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) {
-		struct ext3_inode *raw_inode = ext3_raw_inode(&is.iloc);
-		memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
-		EXT3_I(inode)->i_state &= ~EXT3_STATE_NEW;
+	if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) {
+		struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
+		memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
+		EXT4_I(inode)->i_state &= ~EXT4_STATE_NEW;
 	}
 
-	error = ext3_xattr_ibody_find(inode, &i, &is);
+	error = ext4_xattr_ibody_find(inode, &i, &is);
 	if (error)
 		goto cleanup;
 	if (is.s.not_found)
-		error = ext3_xattr_block_find(inode, &i, &bs);
+		error = ext4_xattr_block_find(inode, &i, &bs);
 	if (error)
 		goto cleanup;
 	if (is.s.not_found && bs.s.not_found) {
@@ -981,36 +981,36 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
 		if (flags & XATTR_CREATE)
 			goto cleanup;
 	}
-	error = ext3_journal_get_write_access(handle, is.iloc.bh);
+	error = ext4_journal_get_write_access(handle, is.iloc.bh);
 	if (error)
 		goto cleanup;
 	if (!value) {
 		if (!is.s.not_found)
-			error = ext3_xattr_ibody_set(handle, inode, &i, &is);
+			error = ext4_xattr_ibody_set(handle, inode, &i, &is);
 		else if (!bs.s.not_found)
-			error = ext3_xattr_block_set(handle, inode, &i, &bs);
+			error = ext4_xattr_block_set(handle, inode, &i, &bs);
 	} else {
-		error = ext3_xattr_ibody_set(handle, inode, &i, &is);
+		error = ext4_xattr_ibody_set(handle, inode, &i, &is);
 		if (!error && !bs.s.not_found) {
 			i.value = NULL;
-			error = ext3_xattr_block_set(handle, inode, &i, &bs);
+			error = ext4_xattr_block_set(handle, inode, &i, &bs);
 		} else if (error == -ENOSPC) {
-			error = ext3_xattr_block_set(handle, inode, &i, &bs);
+			error = ext4_xattr_block_set(handle, inode, &i, &bs);
 			if (error)
 				goto cleanup;
 			if (!is.s.not_found) {
 				i.value = NULL;
-				error = ext3_xattr_ibody_set(handle, inode, &i,
+				error = ext4_xattr_ibody_set(handle, inode, &i,
 							     &is);
 			}
 		}
 	}
 	if (!error) {
-		ext3_xattr_update_super_block(handle, inode->i_sb);
+		ext4_xattr_update_super_block(handle, inode->i_sb);
 		inode->i_ctime = CURRENT_TIME_SEC;
-		error = ext3_mark_iloc_dirty(handle, inode, &is.iloc);
+		error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
 		/*
-		 * The bh is consumed by ext3_mark_iloc_dirty, even with
+		 * The bh is consumed by ext4_mark_iloc_dirty, even with
 		 * error != 0.
 		 */
 		is.iloc.bh = NULL;
@@ -1021,37 +1021,37 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
 cleanup:
 	brelse(is.iloc.bh);
 	brelse(bs.bh);
-	up_write(&EXT3_I(inode)->xattr_sem);
+	up_write(&EXT4_I(inode)->xattr_sem);
 	return error;
 }
 
 /*
- * ext3_xattr_set()
+ * ext4_xattr_set()
  *
- * Like ext3_xattr_set_handle, but start from an inode. This extended
+ * Like ext4_xattr_set_handle, but start from an inode. This extended
  * attribute modification is a filesystem transaction by itself.
  *
  * Returns 0, or a negative error number on failure.
  */
 int
-ext3_xattr_set(struct inode *inode, int name_index, const char *name,
+ext4_xattr_set(struct inode *inode, int name_index, const char *name,
 	       const void *value, size_t value_len, int flags)
 {
 	handle_t *handle;
 	int error, retries = 0;
 
 retry:
-	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
+	handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
 	if (IS_ERR(handle)) {
 		error = PTR_ERR(handle);
 	} else {
 		int error2;
 
-		error = ext3_xattr_set_handle(handle, inode, name_index, name,
+		error = ext4_xattr_set_handle(handle, inode, name_index, name,
 					      value, value_len, flags);
-		error2 = ext3_journal_stop(handle);
+		error2 = ext4_journal_stop(handle);
 		if (error == -ENOSPC &&
-		    ext3_should_retry_alloc(inode->i_sb, &retries))
+		    ext4_should_retry_alloc(inode->i_sb, &retries))
 			goto retry;
 		if (error == 0)
 			error = error2;
@@ -1061,53 +1061,53 @@ retry:
 }
 
 /*
- * ext3_xattr_delete_inode()
+ * ext4_xattr_delete_inode()
  *
  * Free extended attribute resources associated with this inode. This
  * is called immediately before an inode is freed. We have exclusive
  * access to the inode.
  */
 void
-ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
+ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
 {
 	struct buffer_head *bh = NULL;
 
-	if (!EXT3_I(inode)->i_file_acl)
+	if (!EXT4_I(inode)->i_file_acl)
 		goto cleanup;
-	bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
+	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
 	if (!bh) {
-		ext3_error(inode->i_sb, __FUNCTION__,
+		ext4_error(inode->i_sb, __FUNCTION__,
 			"inode %lu: block "E3FSBLK" read error", inode->i_ino,
-			EXT3_I(inode)->i_file_acl);
+			EXT4_I(inode)->i_file_acl);
 		goto cleanup;
 	}
-	if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
+	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
 	    BHDR(bh)->h_blocks != cpu_to_le32(1)) {
-		ext3_error(inode->i_sb, __FUNCTION__,
+		ext4_error(inode->i_sb, __FUNCTION__,
 			"inode %lu: bad block "E3FSBLK, inode->i_ino,
-			EXT3_I(inode)->i_file_acl);
+			EXT4_I(inode)->i_file_acl);
 		goto cleanup;
 	}
-	ext3_xattr_release_block(handle, inode, bh);
-	EXT3_I(inode)->i_file_acl = 0;
+	ext4_xattr_release_block(handle, inode, bh);
+	EXT4_I(inode)->i_file_acl = 0;
 
 cleanup:
 	brelse(bh);
 }
 
 /*
- * ext3_xattr_put_super()
+ * ext4_xattr_put_super()
  *
  * This is called when a file system is unmounted.
  */
 void
-ext3_xattr_put_super(struct super_block *sb)
+ext4_xattr_put_super(struct super_block *sb)
 {
 	mb_cache_shrink(sb->s_bdev);
 }
 
 /*
- * ext3_xattr_cache_insert()
+ * ext4_xattr_cache_insert()
  *
  * Create a new entry in the extended attribute cache, and insert
  * it unless such an entry is already in the cache.
@@ -1115,13 +1115,13 @@ ext3_xattr_put_super(struct super_block *sb)
  * Returns 0, or a negative error number on failure.
  */
 static void
-ext3_xattr_cache_insert(struct buffer_head *bh)
+ext4_xattr_cache_insert(struct buffer_head *bh)
 {
 	__u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
 	struct mb_cache_entry *ce;
 	int error;
 
-	ce = mb_cache_entry_alloc(ext3_xattr_cache);
+	ce = mb_cache_entry_alloc(ext4_xattr_cache);
 	if (!ce) {
 		ea_bdebug(bh, "out of memory");
 		return;
@@ -1140,7 +1140,7 @@ ext3_xattr_cache_insert(struct buffer_head *bh)
 }
 
 /*
- * ext3_xattr_cmp()
+ * ext4_xattr_cmp()
  *
  * Compare two extended attribute blocks for equality.
  *
@@ -1148,10 +1148,10 @@ ext3_xattr_cache_insert(struct buffer_head *bh)
  * a negative error number on errors.
  */
 static int
-ext3_xattr_cmp(struct ext3_xattr_header *header1,
-	       struct ext3_xattr_header *header2)
+ext4_xattr_cmp(struct ext4_xattr_header *header1,
+	       struct ext4_xattr_header *header2)
 {
-	struct ext3_xattr_entry *entry1, *entry2;
+	struct ext4_xattr_entry *entry1, *entry2;
 
 	entry1 = ENTRY(header1+1);
 	entry2 = ENTRY(header2+1);
@@ -1171,8 +1171,8 @@ ext3_xattr_cmp(struct ext3_xattr_header *header1,
 			   le32_to_cpu(entry1->e_value_size)))
 			return 1;
 
-		entry1 = EXT3_XATTR_NEXT(entry1);
-		entry2 = EXT3_XATTR_NEXT(entry2);
+		entry1 = EXT4_XATTR_NEXT(entry1);
+		entry2 = EXT4_XATTR_NEXT(entry2);
 	}
 	if (!IS_LAST_ENTRY(entry2))
 		return 1;
@@ -1180,7 +1180,7 @@ ext3_xattr_cmp(struct ext3_xattr_header *header1,
 }
 
 /*
- * ext3_xattr_cache_find()
+ * ext4_xattr_cache_find()
  *
  * Find an identical extended attribute block.
  *
@@ -1188,7 +1188,7 @@ ext3_xattr_cmp(struct ext3_xattr_header *header1,
  * not found or an error occurred.
  */
 static struct buffer_head *
-ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header,
+ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
 		      struct mb_cache_entry **pce)
 {
 	__u32 hash = le32_to_cpu(header->h_hash);
@@ -1198,7 +1198,7 @@ ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header,
 		return NULL;  /* never share */
 	ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
 again:
-	ce = mb_cache_entry_find_first(ext3_xattr_cache, 0,
+	ce = mb_cache_entry_find_first(ext4_xattr_cache, 0,
 				       inode->i_sb->s_bdev, hash);
 	while (ce) {
 		struct buffer_head *bh;
@@ -1210,16 +1210,16 @@ again:
 		}
 		bh = sb_bread(inode->i_sb, ce->e_block);
 		if (!bh) {
-			ext3_error(inode->i_sb, __FUNCTION__,
+			ext4_error(inode->i_sb, __FUNCTION__,
 				"inode %lu: block %lu read error",
 				inode->i_ino, (unsigned long) ce->e_block);
 		} else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
-				EXT3_XATTR_REFCOUNT_MAX) {
+				EXT4_XATTR_REFCOUNT_MAX) {
 			ea_idebug(inode, "block %lu refcount %d>=%d",
 				  (unsigned long) ce->e_block,
 				  le32_to_cpu(BHDR(bh)->h_refcount),
-					  EXT3_XATTR_REFCOUNT_MAX);
-		} else if (ext3_xattr_cmp(header, BHDR(bh)) == 0) {
+					  EXT4_XATTR_REFCOUNT_MAX);
+		} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
 			*pce = ce;
 			return bh;
 		}
@@ -1233,12 +1233,12 @@ again:
 #define VALUE_HASH_SHIFT 16
 
 /*
- * ext3_xattr_hash_entry()
+ * ext4_xattr_hash_entry()
  *
  * Compute the hash of an extended attribute.
  */
-static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header,
-					 struct ext3_xattr_entry *entry)
+static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
+					 struct ext4_xattr_entry *entry)
 {
 	__u32 hash = 0;
 	char *name = entry->e_name;
@@ -1254,7 +1254,7 @@ static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header,
 		__le32 *value = (__le32 *)((char *)header +
 			le16_to_cpu(entry->e_value_offs));
 		for (n = (le32_to_cpu(entry->e_value_size) +
-		     EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) {
+		     EXT4_XATTR_ROUND) >> EXT4_XATTR_PAD_BITS; n; n--) {
 			hash = (hash << VALUE_HASH_SHIFT) ^
 			       (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
 			       le32_to_cpu(*value++);
@@ -1269,17 +1269,17 @@ static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header,
 #define BLOCK_HASH_SHIFT 16
 
 /*
- * ext3_xattr_rehash()
+ * ext4_xattr_rehash()
  *
  * Re-compute the extended attribute hash value after an entry has changed.
  */
-static void ext3_xattr_rehash(struct ext3_xattr_header *header,
-			      struct ext3_xattr_entry *entry)
+static void ext4_xattr_rehash(struct ext4_xattr_header *header,
+			      struct ext4_xattr_entry *entry)
 {
-	struct ext3_xattr_entry *here;
+	struct ext4_xattr_entry *here;
 	__u32 hash = 0;
 
-	ext3_xattr_hash_entry(header, entry);
+	ext4_xattr_hash_entry(header, entry);
 	here = ENTRY(header+1);
 	while (!IS_LAST_ENTRY(here)) {
 		if (!here->e_hash) {
@@ -1290,7 +1290,7 @@ static void ext3_xattr_rehash(struct ext3_xattr_header *header,
 		hash = (hash << BLOCK_HASH_SHIFT) ^
 		       (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
 		       le32_to_cpu(here->e_hash);
-		here = EXT3_XATTR_NEXT(here);
+		here = EXT4_XATTR_NEXT(here);
 	}
 	header->h_hash = cpu_to_le32(hash);
 }
@@ -1298,20 +1298,20 @@ static void ext3_xattr_rehash(struct ext3_xattr_header *header,
 #undef BLOCK_HASH_SHIFT
 
 int __init
-init_ext3_xattr(void)
+init_ext4_xattr(void)
 {
-	ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL,
+	ext4_xattr_cache = mb_cache_create("ext4_xattr", NULL,
 		sizeof(struct mb_cache_entry) +
 		sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
-	if (!ext3_xattr_cache)
+	if (!ext4_xattr_cache)
 		return -ENOMEM;
 	return 0;
 }
 
 void
-exit_ext3_xattr(void)
+exit_ext4_xattr(void)
 {
-	if (ext3_xattr_cache)
-		mb_cache_destroy(ext3_xattr_cache);
-	ext3_xattr_cache = NULL;
+	if (ext4_xattr_cache)
+		mb_cache_destroy(ext4_xattr_cache);
+	ext4_xattr_cache = NULL;
 }
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 6b1ae1c6182c..79432b35398f 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -1,7 +1,7 @@
 /*
-  File: fs/ext3/xattr.h
+  File: fs/ext4/xattr.h
 
-  On-disk format of extended attributes for the ext3 filesystem.
+  On-disk format of extended attributes for the ext4 filesystem.
 
   (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
 */
@@ -9,20 +9,20 @@
 #include <linux/xattr.h>
 
 /* Magic value in attribute blocks */
-#define EXT3_XATTR_MAGIC		0xEA020000
+#define EXT4_XATTR_MAGIC		0xEA020000
 
 /* Maximum number of references to one attribute block */
-#define EXT3_XATTR_REFCOUNT_MAX		1024
+#define EXT4_XATTR_REFCOUNT_MAX		1024
 
 /* Name indexes */
-#define EXT3_XATTR_INDEX_USER			1
-#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS	2
-#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT	3
-#define EXT3_XATTR_INDEX_TRUSTED		4
-#define	EXT3_XATTR_INDEX_LUSTRE			5
-#define EXT3_XATTR_INDEX_SECURITY	        6
-
-struct ext3_xattr_header {
+#define EXT4_XATTR_INDEX_USER			1
+#define EXT4_XATTR_INDEX_POSIX_ACL_ACCESS	2
+#define EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT	3
+#define EXT4_XATTR_INDEX_TRUSTED		4
+#define	EXT4_XATTR_INDEX_LUSTRE			5
+#define EXT4_XATTR_INDEX_SECURITY	        6
+
+struct ext4_xattr_header {
 	__le32	h_magic;	/* magic number for identification */
 	__le32	h_refcount;	/* reference count */
 	__le32	h_blocks;	/* number of disk blocks used */
@@ -30,11 +30,11 @@ struct ext3_xattr_header {
 	__u32	h_reserved[4];	/* zero right now */
 };
 
-struct ext3_xattr_ibody_header {
+struct ext4_xattr_ibody_header {
 	__le32	h_magic;	/* magic number for identification */
 };
 
-struct ext3_xattr_entry {
+struct ext4_xattr_entry {
 	__u8	e_name_len;	/* length of name */
 	__u8	e_name_index;	/* attribute name index */
 	__le16	e_value_offs;	/* offset in disk block of value */
@@ -44,100 +44,100 @@ struct ext3_xattr_entry {
 	char	e_name[0];	/* attribute name */
 };
 
-#define EXT3_XATTR_PAD_BITS		2
-#define EXT3_XATTR_PAD		(1<<EXT3_XATTR_PAD_BITS)
-#define EXT3_XATTR_ROUND		(EXT3_XATTR_PAD-1)
-#define EXT3_XATTR_LEN(name_len) \
-	(((name_len) + EXT3_XATTR_ROUND + \
-	sizeof(struct ext3_xattr_entry)) & ~EXT3_XATTR_ROUND)
-#define EXT3_XATTR_NEXT(entry) \
-	( (struct ext3_xattr_entry *)( \
-	  (char *)(entry) + EXT3_XATTR_LEN((entry)->e_name_len)) )
-#define EXT3_XATTR_SIZE(size) \
-	(((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND)
+#define EXT4_XATTR_PAD_BITS		2
+#define EXT4_XATTR_PAD		(1<<EXT4_XATTR_PAD_BITS)
+#define EXT4_XATTR_ROUND		(EXT4_XATTR_PAD-1)
+#define EXT4_XATTR_LEN(name_len) \
+	(((name_len) + EXT4_XATTR_ROUND + \
+	sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND)
+#define EXT4_XATTR_NEXT(entry) \
+	( (struct ext4_xattr_entry *)( \
+	  (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)) )
+#define EXT4_XATTR_SIZE(size) \
+	(((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND)
 
-# ifdef CONFIG_EXT3_FS_XATTR
+# ifdef CONFIG_EXT4DEV_FS_XATTR
 
-extern struct xattr_handler ext3_xattr_user_handler;
-extern struct xattr_handler ext3_xattr_trusted_handler;
-extern struct xattr_handler ext3_xattr_acl_access_handler;
-extern struct xattr_handler ext3_xattr_acl_default_handler;
-extern struct xattr_handler ext3_xattr_security_handler;
+extern struct xattr_handler ext4_xattr_user_handler;
+extern struct xattr_handler ext4_xattr_trusted_handler;
+extern struct xattr_handler ext4_xattr_acl_access_handler;
+extern struct xattr_handler ext4_xattr_acl_default_handler;
+extern struct xattr_handler ext4_xattr_security_handler;
 
-extern ssize_t ext3_listxattr(struct dentry *, char *, size_t);
+extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
 
-extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
-extern int ext3_xattr_list(struct inode *, char *, size_t);
-extern int ext3_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
-extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
+extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
+extern int ext4_xattr_list(struct inode *, char *, size_t);
+extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
+extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
 
-extern void ext3_xattr_delete_inode(handle_t *, struct inode *);
-extern void ext3_xattr_put_super(struct super_block *);
+extern void ext4_xattr_delete_inode(handle_t *, struct inode *);
+extern void ext4_xattr_put_super(struct super_block *);
 
-extern int init_ext3_xattr(void);
-extern void exit_ext3_xattr(void);
+extern int init_ext4_xattr(void);
+extern void exit_ext4_xattr(void);
 
-extern struct xattr_handler *ext3_xattr_handlers[];
+extern struct xattr_handler *ext4_xattr_handlers[];
 
-# else  /* CONFIG_EXT3_FS_XATTR */
+# else  /* CONFIG_EXT4DEV_FS_XATTR */
 
 static inline int
-ext3_xattr_get(struct inode *inode, int name_index, const char *name,
+ext4_xattr_get(struct inode *inode, int name_index, const char *name,
 	       void *buffer, size_t size, int flags)
 {
 	return -EOPNOTSUPP;
 }
 
 static inline int
-ext3_xattr_list(struct inode *inode, void *buffer, size_t size)
+ext4_xattr_list(struct inode *inode, void *buffer, size_t size)
 {
 	return -EOPNOTSUPP;
 }
 
 static inline int
-ext3_xattr_set(struct inode *inode, int name_index, const char *name,
+ext4_xattr_set(struct inode *inode, int name_index, const char *name,
 	       const void *value, size_t size, int flags)
 {
 	return -EOPNOTSUPP;
 }
 
 static inline int
-ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
+ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
 	       const char *name, const void *value, size_t size, int flags)
 {
 	return -EOPNOTSUPP;
 }
 
 static inline void
-ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
+ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
 {
 }
 
 static inline void
-ext3_xattr_put_super(struct super_block *sb)
+ext4_xattr_put_super(struct super_block *sb)
 {
 }
 
 static inline int
-init_ext3_xattr(void)
+init_ext4_xattr(void)
 {
 	return 0;
 }
 
 static inline void
-exit_ext3_xattr(void)
+exit_ext4_xattr(void)
 {
 }
 
-#define ext3_xattr_handlers	NULL
+#define ext4_xattr_handlers	NULL
 
-# endif  /* CONFIG_EXT3_FS_XATTR */
+# endif  /* CONFIG_EXT4DEV_FS_XATTR */
 
-#ifdef CONFIG_EXT3_FS_SECURITY
-extern int ext3_init_security(handle_t *handle, struct inode *inode,
+#ifdef CONFIG_EXT4DEV_FS_SECURITY
+extern int ext4_init_security(handle_t *handle, struct inode *inode,
 				struct inode *dir);
 #else
-static inline int ext3_init_security(handle_t *handle, struct inode *inode,
+static inline int ext4_init_security(handle_t *handle, struct inode *inode,
 				struct inode *dir)
 {
 	return 0;
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index b9c40c15647b..d84b1dabeb16 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -1,5 +1,5 @@
 /*
- * linux/fs/ext3/xattr_security.c
+ * linux/fs/ext4/xattr_security.c
  * Handler for storing security labels as extended attributes.
  */
 
@@ -7,13 +7,13 @@
 #include <linux/string.h>
 #include <linux/fs.h>
 #include <linux/smp_lock.h>
-#include <linux/ext3_jbd.h>
-#include <linux/ext3_fs.h>
+#include <linux/ext4_jbd.h>
+#include <linux/ext4_fs.h>
 #include <linux/security.h>
 #include "xattr.h"
 
 static size_t
-ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
+ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size,
 			 const char *name, size_t name_len)
 {
 	const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
@@ -29,27 +29,27 @@ ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext3_xattr_security_get(struct inode *inode, const char *name,
+ext4_xattr_security_get(struct inode *inode, const char *name,
 		       void *buffer, size_t size)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext3_xattr_get(inode, EXT3_XATTR_INDEX_SECURITY, name,
+	return ext4_xattr_get(inode, EXT4_XATTR_INDEX_SECURITY, name,
 			      buffer, size);
 }
 
 static int
-ext3_xattr_security_set(struct inode *inode, const char *name,
+ext4_xattr_security_set(struct inode *inode, const char *name,
 		       const void *value, size_t size, int flags)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext3_xattr_set(inode, EXT3_XATTR_INDEX_SECURITY, name,
+	return ext4_xattr_set(inode, EXT4_XATTR_INDEX_SECURITY, name,
 			      value, size, flags);
 }
 
 int
-ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir)
+ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir)
 {
 	int err;
 	size_t len;
@@ -62,16 +62,16 @@ ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir)
 			return 0;
 		return err;
 	}
-	err = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_SECURITY,
+	err = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_SECURITY,
 				    name, value, len, 0);
 	kfree(name);
 	kfree(value);
 	return err;
 }
 
-struct xattr_handler ext3_xattr_security_handler = {
+struct xattr_handler ext4_xattr_security_handler = {
 	.prefix	= XATTR_SECURITY_PREFIX,
-	.list	= ext3_xattr_security_list,
-	.get	= ext3_xattr_security_get,
-	.set	= ext3_xattr_security_set,
+	.list	= ext4_xattr_security_list,
+	.get	= ext4_xattr_security_get,
+	.set	= ext4_xattr_security_set,
 };
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index 86d91f1186dc..11bd58c95a61 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -1,5 +1,5 @@
 /*
- * linux/fs/ext3/xattr_trusted.c
+ * linux/fs/ext4/xattr_trusted.c
  * Handler for trusted extended attributes.
  *
  * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
@@ -10,14 +10,14 @@
 #include <linux/capability.h>
 #include <linux/fs.h>
 #include <linux/smp_lock.h>
-#include <linux/ext3_jbd.h>
-#include <linux/ext3_fs.h>
+#include <linux/ext4_jbd.h>
+#include <linux/ext4_fs.h>
 #include "xattr.h"
 
 #define XATTR_TRUSTED_PREFIX "trusted."
 
 static size_t
-ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
+ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
 			const char *name, size_t name_len)
 {
 	const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1;
@@ -35,28 +35,28 @@ ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext3_xattr_trusted_get(struct inode *inode, const char *name,
+ext4_xattr_trusted_get(struct inode *inode, const char *name,
 		       void *buffer, size_t size)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, name,
+	return ext4_xattr_get(inode, EXT4_XATTR_INDEX_TRUSTED, name,
 			      buffer, size);
 }
 
 static int
-ext3_xattr_trusted_set(struct inode *inode, const char *name,
+ext4_xattr_trusted_set(struct inode *inode, const char *name,
 		       const void *value, size_t size, int flags)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext3_xattr_set(inode, EXT3_XATTR_INDEX_TRUSTED, name,
+	return ext4_xattr_set(inode, EXT4_XATTR_INDEX_TRUSTED, name,
 			      value, size, flags);
 }
 
-struct xattr_handler ext3_xattr_trusted_handler = {
+struct xattr_handler ext4_xattr_trusted_handler = {
 	.prefix	= XATTR_TRUSTED_PREFIX,
-	.list	= ext3_xattr_trusted_list,
-	.get	= ext3_xattr_trusted_get,
-	.set	= ext3_xattr_trusted_set,
+	.list	= ext4_xattr_trusted_list,
+	.get	= ext4_xattr_trusted_get,
+	.set	= ext4_xattr_trusted_set,
 };
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index a85a0a17c4fd..9c5a665e0837 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -1,5 +1,5 @@
 /*
- * linux/fs/ext3/xattr_user.c
+ * linux/fs/ext4/xattr_user.c
  * Handler for extended user attributes.
  *
  * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
@@ -9,14 +9,14 @@
 #include <linux/string.h>
 #include <linux/fs.h>
 #include <linux/smp_lock.h>
-#include <linux/ext3_jbd.h>
-#include <linux/ext3_fs.h>
+#include <linux/ext4_jbd.h>
+#include <linux/ext4_fs.h>
 #include "xattr.h"
 
 #define XATTR_USER_PREFIX "user."
 
 static size_t
-ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
+ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size,
 		     const char *name, size_t name_len)
 {
 	const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1;
@@ -34,31 +34,31 @@ ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext3_xattr_user_get(struct inode *inode, const char *name,
+ext4_xattr_user_get(struct inode *inode, const char *name,
 		    void *buffer, size_t size)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
 	if (!test_opt(inode->i_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, buffer, size);
+	return ext4_xattr_get(inode, EXT4_XATTR_INDEX_USER, name, buffer, size);
 }
 
 static int
-ext3_xattr_user_set(struct inode *inode, const char *name,
+ext4_xattr_user_set(struct inode *inode, const char *name,
 		    const void *value, size_t size, int flags)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
 	if (!test_opt(inode->i_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	return ext3_xattr_set(inode, EXT3_XATTR_INDEX_USER, name,
+	return ext4_xattr_set(inode, EXT4_XATTR_INDEX_USER, name,
 			      value, size, flags);
 }
 
-struct xattr_handler ext3_xattr_user_handler = {
+struct xattr_handler ext4_xattr_user_handler = {
 	.prefix	= XATTR_USER_PREFIX,
-	.list	= ext3_xattr_user_list,
-	.get	= ext3_xattr_user_get,
-	.set	= ext3_xattr_user_set,
+	.list	= ext4_xattr_user_list,
+	.get	= ext4_xattr_user_get,
+	.set	= ext4_xattr_user_set,
 };
-- 
cgit v1.2.3


From 02ea2104c55b625cf5b5d9ba8586a4fc17920f5c Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Wed, 11 Oct 2006 01:20:56 -0700
Subject: [PATCH] ext4: enable building of ext4

Originally part of a patch from Mingming Cao and Randy Dunlap.  Reorganized
by Shaggy.

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Mingming Cao<cmm@us.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/Kconfig  | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 fs/Makefile |  1 +
 2 files changed, 72 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index 599de54451af..ac9ba1c30935 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -140,6 +140,73 @@ config EXT3_FS_SECURITY
 	  If you are not using a security module that requires using
 	  extended attributes for file security labels, say N.
 
+config EXT4DEV_FS
+	tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+        select JBD
+        help
+	  Ext4dev is a predecessor filesystem of the next generation
+	  extended fs ext4, based on ext3 filesystem code. It will be
+	  renamed ext4 fs later, once ext4dev is mature and stabilized.
+
+          Unlike the change from ext2 filesystem to ext3 filesystem,
+          the on-disk format of ext4dev is not the same as ext3 any more:
+	  it is based on extent maps and it supports 48-bit physical block
+          numbers. These combined on-disk format changes will allow
+	  ext4dev/ext4 to handle more than 16 TB filesystem volumes --
+	  a hard limit that ext3 cannot overcome without changing the
+          on-disk format.
+
+	  Other than extent maps and 48-bit block numbers, ext4dev also is
+          likely to have other new features such as persistent preallocation,
+	  high resolution time stamps, and larger file support etc.  These
+          features will be added to ext4dev gradually.
+
+	  To compile this file system support as a module, choose M here. The
+	  module will be called ext4dev.  Be aware, however, that the filesystem
+	  of your root partition (the one containing the directory /) cannot
+	  be compiled as a module, and so this could be dangerous.
+
+	  If unsure, say N.
+
+config EXT4DEV_FS_XATTR
+	bool "Ext4dev extended attributes"
+	depends on EXT4DEV_FS
+	default y
+	help
+	  Extended attributes are name:value pairs associated with inodes by
+	  the kernel or by users (see the attr(5) manual page, or visit
+	  <http://acl.bestbits.at/> for details).
+
+	  If unsure, say N.
+
+	  You need this for POSIX ACL support on ext4dev/ext4.
+
+config EXT4DEV_FS_POSIX_ACL
+	bool "Ext4dev POSIX Access Control Lists"
+	depends on EXT4DEV_FS_XATTR
+	select FS_POSIX_ACL
+	help
+	  POSIX Access Control Lists (ACLs) support permissions for users and
+	  groups beyond the owner/group/world scheme.
+
+	  To learn more about Access Control Lists, visit the POSIX ACLs for
+	  Linux website <http://acl.bestbits.at/>.
+
+	  If you don't know what Access Control Lists are, say N
+
+config EXT4DEV_FS_SECURITY
+	bool "Ext4dev Security Labels"
+	depends on EXT4DEV_FS_XATTR
+	help
+	  Security labels support alternative access control models
+	  implemented by security modules like SELinux.  This option
+	  enables an extended attribute handler for file security
+	  labels in the ext4dev/ext4 filesystem.
+
+	  If you are not using a security module that requires using
+	  extended attributes for file security labels, say N.
+
 config JBD
 	tristate
 	help
@@ -173,11 +240,11 @@ config JBD_DEBUG
 	  "echo 0 > /proc/sys/fs/jbd-debug".
 
 config FS_MBCACHE
-# Meta block cache for Extended Attributes (ext2/ext3)
+# Meta block cache for Extended Attributes (ext2/ext3/ext4)
 	tristate
-	depends on EXT2_FS_XATTR || EXT3_FS_XATTR
-	default y if EXT2_FS=y || EXT3_FS=y
-	default m if EXT2_FS=m || EXT3_FS=m
+	depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4DEV_FS_XATTR
+	default y if EXT2_FS=y || EXT3_FS=y || EXT4DEV_FS=y
+	default m if EXT2_FS=m || EXT3_FS=m || EXT4DEV_FS=m
 
 config REISERFS_FS
 	tristate "Reiserfs support"
diff --git a/fs/Makefile b/fs/Makefile
index df614eacee86..64396af37b2a 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -62,6 +62,7 @@ obj-$(CONFIG_DLM)		+= dlm/
 # Do not add any filesystems before this line
 obj-$(CONFIG_REISERFS_FS)	+= reiserfs/
 obj-$(CONFIG_EXT3_FS)		+= ext3/ # Before ext2 so root fs can be ext3
+obj-$(CONFIG_EXT4DEV_FS)	+= ext4/ # Before ext2 so root fs can be ext4dev
 obj-$(CONFIG_JBD)		+= jbd/
 obj-$(CONFIG_EXT2_FS)		+= ext2/
 obj-$(CONFIG_CRAMFS)		+= cramfs/
-- 
cgit v1.2.3


From 470decc613ab2048b619a01028072d932d9086ee Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Wed, 11 Oct 2006 01:20:57 -0700
Subject: [PATCH] jbd2: initial copy of files from jbd

This is a simple copy of the files in fs/jbd to fs/jbd2 and
/usr/incude/linux/[ext4_]jbd.h to /usr/include/[ext4_]jbd2.h

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd2/Makefile      |    7 +
 fs/jbd2/checkpoint.c  |  697 +++++++++++++++++
 fs/jbd2/commit.c      |  911 ++++++++++++++++++++++
 fs/jbd2/journal.c     | 2072 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/jbd2/recovery.c    |  592 ++++++++++++++
 fs/jbd2/revoke.c      |  703 +++++++++++++++++
 fs/jbd2/transaction.c | 2080 +++++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 7062 insertions(+)
 create mode 100644 fs/jbd2/Makefile
 create mode 100644 fs/jbd2/checkpoint.c
 create mode 100644 fs/jbd2/commit.c
 create mode 100644 fs/jbd2/journal.c
 create mode 100644 fs/jbd2/recovery.c
 create mode 100644 fs/jbd2/revoke.c
 create mode 100644 fs/jbd2/transaction.c

(limited to 'fs')

diff --git a/fs/jbd2/Makefile b/fs/jbd2/Makefile
new file mode 100644
index 000000000000..54aca4868a36
--- /dev/null
+++ b/fs/jbd2/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the linux journaling routines.
+#
+
+obj-$(CONFIG_JBD) += jbd.o
+
+jbd-objs := transaction.o commit.o recovery.o checkpoint.o revoke.o journal.o
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
new file mode 100644
index 000000000000..0208cc7ac5d0
--- /dev/null
+++ b/fs/jbd2/checkpoint.c
@@ -0,0 +1,697 @@
+/*
+ * linux/fs/checkpoint.c
+ *
+ * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
+ *
+ * Copyright 1999 Red Hat Software --- All Rights Reserved
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ *
+ * Checkpoint routines for the generic filesystem journaling code.
+ * Part of the ext2fs journaling system.
+ *
+ * Checkpointing is the process of ensuring that a section of the log is
+ * committed fully to disk, so that that portion of the log can be
+ * reused.
+ */
+
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+
+/*
+ * Unlink a buffer from a transaction checkpoint list.
+ *
+ * Called with j_list_lock held.
+ */
+static inline void __buffer_unlink_first(struct journal_head *jh)
+{
+	transaction_t *transaction = jh->b_cp_transaction;
+
+	jh->b_cpnext->b_cpprev = jh->b_cpprev;
+	jh->b_cpprev->b_cpnext = jh->b_cpnext;
+	if (transaction->t_checkpoint_list == jh) {
+		transaction->t_checkpoint_list = jh->b_cpnext;
+		if (transaction->t_checkpoint_list == jh)
+			transaction->t_checkpoint_list = NULL;
+	}
+}
+
+/*
+ * Unlink a buffer from a transaction checkpoint(io) list.
+ *
+ * Called with j_list_lock held.
+ */
+static inline void __buffer_unlink(struct journal_head *jh)
+{
+	transaction_t *transaction = jh->b_cp_transaction;
+
+	__buffer_unlink_first(jh);
+	if (transaction->t_checkpoint_io_list == jh) {
+		transaction->t_checkpoint_io_list = jh->b_cpnext;
+		if (transaction->t_checkpoint_io_list == jh)
+			transaction->t_checkpoint_io_list = NULL;
+	}
+}
+
+/*
+ * Move a buffer from the checkpoint list to the checkpoint io list
+ *
+ * Called with j_list_lock held
+ */
+static inline void __buffer_relink_io(struct journal_head *jh)
+{
+	transaction_t *transaction = jh->b_cp_transaction;
+
+	__buffer_unlink_first(jh);
+
+	if (!transaction->t_checkpoint_io_list) {
+		jh->b_cpnext = jh->b_cpprev = jh;
+	} else {
+		jh->b_cpnext = transaction->t_checkpoint_io_list;
+		jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
+		jh->b_cpprev->b_cpnext = jh;
+		jh->b_cpnext->b_cpprev = jh;
+	}
+	transaction->t_checkpoint_io_list = jh;
+}
+
+/*
+ * Try to release a checkpointed buffer from its transaction.
+ * Returns 1 if we released it and 2 if we also released the
+ * whole transaction.
+ *
+ * Requires j_list_lock
+ * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
+ */
+static int __try_to_free_cp_buf(struct journal_head *jh)
+{
+	int ret = 0;
+	struct buffer_head *bh = jh2bh(jh);
+
+	if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
+		JBUFFER_TRACE(jh, "remove from checkpoint list");
+		ret = __journal_remove_checkpoint(jh) + 1;
+		jbd_unlock_bh_state(bh);
+		journal_remove_journal_head(bh);
+		BUFFER_TRACE(bh, "release");
+		__brelse(bh);
+	} else {
+		jbd_unlock_bh_state(bh);
+	}
+	return ret;
+}
+
+/*
+ * __log_wait_for_space: wait until there is space in the journal.
+ *
+ * Called under j-state_lock *only*.  It will be unlocked if we have to wait
+ * for a checkpoint to free up some space in the log.
+ */
+void __log_wait_for_space(journal_t *journal)
+{
+	int nblocks;
+	assert_spin_locked(&journal->j_state_lock);
+
+	nblocks = jbd_space_needed(journal);
+	while (__log_space_left(journal) < nblocks) {
+		if (journal->j_flags & JFS_ABORT)
+			return;
+		spin_unlock(&journal->j_state_lock);
+		mutex_lock(&journal->j_checkpoint_mutex);
+
+		/*
+		 * Test again, another process may have checkpointed while we
+		 * were waiting for the checkpoint lock
+		 */
+		spin_lock(&journal->j_state_lock);
+		nblocks = jbd_space_needed(journal);
+		if (__log_space_left(journal) < nblocks) {
+			spin_unlock(&journal->j_state_lock);
+			log_do_checkpoint(journal);
+			spin_lock(&journal->j_state_lock);
+		}
+		mutex_unlock(&journal->j_checkpoint_mutex);
+	}
+}
+
+/*
+ * We were unable to perform jbd_trylock_bh_state() inside j_list_lock.
+ * The caller must restart a list walk.  Wait for someone else to run
+ * jbd_unlock_bh_state().
+ */
+static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
+	__releases(journal->j_list_lock)
+{
+	get_bh(bh);
+	spin_unlock(&journal->j_list_lock);
+	jbd_lock_bh_state(bh);
+	jbd_unlock_bh_state(bh);
+	put_bh(bh);
+}
+
+/*
+ * Clean up transaction's list of buffers submitted for io.
+ * We wait for any pending IO to complete and remove any clean
+ * buffers. Note that we take the buffers in the opposite ordering
+ * from the one in which they were submitted for IO.
+ *
+ * Called with j_list_lock held.
+ */
+static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
+{
+	struct journal_head *jh;
+	struct buffer_head *bh;
+	tid_t this_tid;
+	int released = 0;
+
+	this_tid = transaction->t_tid;
+restart:
+	/* Did somebody clean up the transaction in the meanwhile? */
+	if (journal->j_checkpoint_transactions != transaction ||
+			transaction->t_tid != this_tid)
+		return;
+	while (!released && transaction->t_checkpoint_io_list) {
+		jh = transaction->t_checkpoint_io_list;
+		bh = jh2bh(jh);
+		if (!jbd_trylock_bh_state(bh)) {
+			jbd_sync_bh(journal, bh);
+			spin_lock(&journal->j_list_lock);
+			goto restart;
+		}
+		if (buffer_locked(bh)) {
+			atomic_inc(&bh->b_count);
+			spin_unlock(&journal->j_list_lock);
+			jbd_unlock_bh_state(bh);
+			wait_on_buffer(bh);
+			/* the journal_head may have gone by now */
+			BUFFER_TRACE(bh, "brelse");
+			__brelse(bh);
+			spin_lock(&journal->j_list_lock);
+			goto restart;
+		}
+		/*
+		 * Now in whatever state the buffer currently is, we know that
+		 * it has been written out and so we can drop it from the list
+		 */
+		released = __journal_remove_checkpoint(jh);
+		jbd_unlock_bh_state(bh);
+		journal_remove_journal_head(bh);
+		__brelse(bh);
+	}
+}
+
+#define NR_BATCH	64
+
+static void
+__flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
+{
+	int i;
+
+	ll_rw_block(SWRITE, *batch_count, bhs);
+	for (i = 0; i < *batch_count; i++) {
+		struct buffer_head *bh = bhs[i];
+		clear_buffer_jwrite(bh);
+		BUFFER_TRACE(bh, "brelse");
+		__brelse(bh);
+	}
+	*batch_count = 0;
+}
+
+/*
+ * Try to flush one buffer from the checkpoint list to disk.
+ *
+ * Return 1 if something happened which requires us to abort the current
+ * scan of the checkpoint list.
+ *
+ * Called with j_list_lock held and drops it if 1 is returned
+ * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
+ */
+static int __process_buffer(journal_t *journal, struct journal_head *jh,
+			struct buffer_head **bhs, int *batch_count)
+{
+	struct buffer_head *bh = jh2bh(jh);
+	int ret = 0;
+
+	if (buffer_locked(bh)) {
+		atomic_inc(&bh->b_count);
+		spin_unlock(&journal->j_list_lock);
+		jbd_unlock_bh_state(bh);
+		wait_on_buffer(bh);
+		/* the journal_head may have gone by now */
+		BUFFER_TRACE(bh, "brelse");
+		__brelse(bh);
+		ret = 1;
+	} else if (jh->b_transaction != NULL) {
+		transaction_t *t = jh->b_transaction;
+		tid_t tid = t->t_tid;
+
+		spin_unlock(&journal->j_list_lock);
+		jbd_unlock_bh_state(bh);
+		log_start_commit(journal, tid);
+		log_wait_commit(journal, tid);
+		ret = 1;
+	} else if (!buffer_dirty(bh)) {
+		J_ASSERT_JH(jh, !buffer_jbddirty(bh));
+		BUFFER_TRACE(bh, "remove from checkpoint");
+		__journal_remove_checkpoint(jh);
+		spin_unlock(&journal->j_list_lock);
+		jbd_unlock_bh_state(bh);
+		journal_remove_journal_head(bh);
+		__brelse(bh);
+		ret = 1;
+	} else {
+		/*
+		 * Important: we are about to write the buffer, and
+		 * possibly block, while still holding the journal lock.
+		 * We cannot afford to let the transaction logic start
+		 * messing around with this buffer before we write it to
+		 * disk, as that would break recoverability.
+		 */
+		BUFFER_TRACE(bh, "queue");
+		get_bh(bh);
+		J_ASSERT_BH(bh, !buffer_jwrite(bh));
+		set_buffer_jwrite(bh);
+		bhs[*batch_count] = bh;
+		__buffer_relink_io(jh);
+		jbd_unlock_bh_state(bh);
+		(*batch_count)++;
+		if (*batch_count == NR_BATCH) {
+			spin_unlock(&journal->j_list_lock);
+			__flush_batch(journal, bhs, batch_count);
+			ret = 1;
+		}
+	}
+	return ret;
+}
+
+/*
+ * Perform an actual checkpoint. We take the first transaction on the
+ * list of transactions to be checkpointed and send all its buffers
+ * to disk. We submit larger chunks of data at once.
+ *
+ * The journal should be locked before calling this function.
+ */
+int log_do_checkpoint(journal_t *journal)
+{
+	transaction_t *transaction;
+	tid_t this_tid;
+	int result;
+
+	jbd_debug(1, "Start checkpoint\n");
+
+	/*
+	 * First thing: if there are any transactions in the log which
+	 * don't need checkpointing, just eliminate them from the
+	 * journal straight away.
+	 */
+	result = cleanup_journal_tail(journal);
+	jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
+	if (result <= 0)
+		return result;
+
+	/*
+	 * OK, we need to start writing disk blocks.  Take one transaction
+	 * and write it.
+	 */
+	spin_lock(&journal->j_list_lock);
+	if (!journal->j_checkpoint_transactions)
+		goto out;
+	transaction = journal->j_checkpoint_transactions;
+	this_tid = transaction->t_tid;
+restart:
+	/*
+	 * If someone cleaned up this transaction while we slept, we're
+	 * done (maybe it's a new transaction, but it fell at the same
+	 * address).
+	 */
+	if (journal->j_checkpoint_transactions == transaction &&
+			transaction->t_tid == this_tid) {
+		int batch_count = 0;
+		struct buffer_head *bhs[NR_BATCH];
+		struct journal_head *jh;
+		int retry = 0;
+
+		while (!retry && transaction->t_checkpoint_list) {
+			struct buffer_head *bh;
+
+			jh = transaction->t_checkpoint_list;
+			bh = jh2bh(jh);
+			if (!jbd_trylock_bh_state(bh)) {
+				jbd_sync_bh(journal, bh);
+				retry = 1;
+				break;
+			}
+			retry = __process_buffer(journal, jh, bhs,&batch_count);
+			if (!retry && lock_need_resched(&journal->j_list_lock)){
+				spin_unlock(&journal->j_list_lock);
+				retry = 1;
+				break;
+			}
+		}
+
+		if (batch_count) {
+			if (!retry) {
+				spin_unlock(&journal->j_list_lock);
+				retry = 1;
+			}
+			__flush_batch(journal, bhs, &batch_count);
+		}
+
+		if (retry) {
+			spin_lock(&journal->j_list_lock);
+			goto restart;
+		}
+		/*
+		 * Now we have cleaned up the first transaction's checkpoint
+		 * list. Let's clean up the second one
+		 */
+		__wait_cp_io(journal, transaction);
+	}
+out:
+	spin_unlock(&journal->j_list_lock);
+	result = cleanup_journal_tail(journal);
+	if (result < 0)
+		return result;
+	return 0;
+}
+
+/*
+ * Check the list of checkpoint transactions for the journal to see if
+ * we have already got rid of any since the last update of the log tail
+ * in the journal superblock.  If so, we can instantly roll the
+ * superblock forward to remove those transactions from the log.
+ *
+ * Return <0 on error, 0 on success, 1 if there was nothing to clean up.
+ *
+ * Called with the journal lock held.
+ *
+ * This is the only part of the journaling code which really needs to be
+ * aware of transaction aborts.  Checkpointing involves writing to the
+ * main filesystem area rather than to the journal, so it can proceed
+ * even in abort state, but we must not update the journal superblock if
+ * we have an abort error outstanding.
+ */
+
+int cleanup_journal_tail(journal_t *journal)
+{
+	transaction_t * transaction;
+	tid_t		first_tid;
+	unsigned long	blocknr, freed;
+
+	/* OK, work out the oldest transaction remaining in the log, and
+	 * the log block it starts at.
+	 *
+	 * If the log is now empty, we need to work out which is the
+	 * next transaction ID we will write, and where it will
+	 * start. */
+
+	spin_lock(&journal->j_state_lock);
+	spin_lock(&journal->j_list_lock);
+	transaction = journal->j_checkpoint_transactions;
+	if (transaction) {
+		first_tid = transaction->t_tid;
+		blocknr = transaction->t_log_start;
+	} else if ((transaction = journal->j_committing_transaction) != NULL) {
+		first_tid = transaction->t_tid;
+		blocknr = transaction->t_log_start;
+	} else if ((transaction = journal->j_running_transaction) != NULL) {
+		first_tid = transaction->t_tid;
+		blocknr = journal->j_head;
+	} else {
+		first_tid = journal->j_transaction_sequence;
+		blocknr = journal->j_head;
+	}
+	spin_unlock(&journal->j_list_lock);
+	J_ASSERT(blocknr != 0);
+
+	/* If the oldest pinned transaction is at the tail of the log
+           already then there's not much we can do right now. */
+	if (journal->j_tail_sequence == first_tid) {
+		spin_unlock(&journal->j_state_lock);
+		return 1;
+	}
+
+	/* OK, update the superblock to recover the freed space.
+	 * Physical blocks come first: have we wrapped beyond the end of
+	 * the log?  */
+	freed = blocknr - journal->j_tail;
+	if (blocknr < journal->j_tail)
+		freed = freed + journal->j_last - journal->j_first;
+
+	jbd_debug(1,
+		  "Cleaning journal tail from %d to %d (offset %lu), "
+		  "freeing %lu\n",
+		  journal->j_tail_sequence, first_tid, blocknr, freed);
+
+	journal->j_free += freed;
+	journal->j_tail_sequence = first_tid;
+	journal->j_tail = blocknr;
+	spin_unlock(&journal->j_state_lock);
+	if (!(journal->j_flags & JFS_ABORT))
+		journal_update_superblock(journal, 1);
+	return 0;
+}
+
+
+/* Checkpoint list management */
+
+/*
+ * journal_clean_one_cp_list
+ *
+ * Find all the written-back checkpoint buffers in the given list and release them.
+ *
+ * Called with the journal locked.
+ * Called with j_list_lock held.
+ * Returns number of bufers reaped (for debug)
+ */
+
+static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
+{
+	struct journal_head *last_jh;
+	struct journal_head *next_jh = jh;
+	int ret, freed = 0;
+
+	*released = 0;
+	if (!jh)
+		return 0;
+
+	last_jh = jh->b_cpprev;
+	do {
+		jh = next_jh;
+		next_jh = jh->b_cpnext;
+		/* Use trylock because of the ranking */
+		if (jbd_trylock_bh_state(jh2bh(jh))) {
+			ret = __try_to_free_cp_buf(jh);
+			if (ret) {
+				freed++;
+				if (ret == 2) {
+					*released = 1;
+					return freed;
+				}
+			}
+		}
+		/*
+		 * This function only frees up some memory
+		 * if possible so we dont have an obligation
+		 * to finish processing. Bail out if preemption
+		 * requested:
+		 */
+		if (need_resched())
+			return freed;
+	} while (jh != last_jh);
+
+	return freed;
+}
+
+/*
+ * journal_clean_checkpoint_list
+ *
+ * Find all the written-back checkpoint buffers in the journal and release them.
+ *
+ * Called with the journal locked.
+ * Called with j_list_lock held.
+ * Returns number of buffers reaped (for debug)
+ */
+
+int __journal_clean_checkpoint_list(journal_t *journal)
+{
+	transaction_t *transaction, *last_transaction, *next_transaction;
+	int ret = 0;
+	int released;
+
+	transaction = journal->j_checkpoint_transactions;
+	if (!transaction)
+		goto out;
+
+	last_transaction = transaction->t_cpprev;
+	next_transaction = transaction;
+	do {
+		transaction = next_transaction;
+		next_transaction = transaction->t_cpnext;
+		ret += journal_clean_one_cp_list(transaction->
+				t_checkpoint_list, &released);
+		/*
+		 * This function only frees up some memory if possible so we
+		 * dont have an obligation to finish processing. Bail out if
+		 * preemption requested:
+		 */
+		if (need_resched())
+			goto out;
+		if (released)
+			continue;
+		/*
+		 * It is essential that we are as careful as in the case of
+		 * t_checkpoint_list with removing the buffer from the list as
+		 * we can possibly see not yet submitted buffers on io_list
+		 */
+		ret += journal_clean_one_cp_list(transaction->
+				t_checkpoint_io_list, &released);
+		if (need_resched())
+			goto out;
+	} while (transaction != last_transaction);
+out:
+	return ret;
+}
+
+/*
+ * journal_remove_checkpoint: called after a buffer has been committed
+ * to disk (either by being write-back flushed to disk, or being
+ * committed to the log).
+ *
+ * We cannot safely clean a transaction out of the log until all of the
+ * buffer updates committed in that transaction have safely been stored
+ * elsewhere on disk.  To achieve this, all of the buffers in a
+ * transaction need to be maintained on the transaction's checkpoint
+ * lists until they have been rewritten, at which point this function is
+ * called to remove the buffer from the existing transaction's
+ * checkpoint lists.
+ *
+ * The function returns 1 if it frees the transaction, 0 otherwise.
+ *
+ * This function is called with the journal locked.
+ * This function is called with j_list_lock held.
+ * This function is called with jbd_lock_bh_state(jh2bh(jh))
+ */
+
+int __journal_remove_checkpoint(struct journal_head *jh)
+{
+	transaction_t *transaction;
+	journal_t *journal;
+	int ret = 0;
+
+	JBUFFER_TRACE(jh, "entry");
+
+	if ((transaction = jh->b_cp_transaction) == NULL) {
+		JBUFFER_TRACE(jh, "not on transaction");
+		goto out;
+	}
+	journal = transaction->t_journal;
+
+	__buffer_unlink(jh);
+	jh->b_cp_transaction = NULL;
+
+	if (transaction->t_checkpoint_list != NULL ||
+	    transaction->t_checkpoint_io_list != NULL)
+		goto out;
+	JBUFFER_TRACE(jh, "transaction has no more buffers");
+
+	/*
+	 * There is one special case to worry about: if we have just pulled the
+	 * buffer off a committing transaction's forget list, then even if the
+	 * checkpoint list is empty, the transaction obviously cannot be
+	 * dropped!
+	 *
+	 * The locking here around j_committing_transaction is a bit sleazy.
+	 * See the comment at the end of journal_commit_transaction().
+	 */
+	if (transaction == journal->j_committing_transaction) {
+		JBUFFER_TRACE(jh, "belongs to committing transaction");
+		goto out;
+	}
+
+	/* OK, that was the last buffer for the transaction: we can now
+	   safely remove this transaction from the log */
+
+	__journal_drop_transaction(journal, transaction);
+
+	/* Just in case anybody was waiting for more transactions to be
+           checkpointed... */
+	wake_up(&journal->j_wait_logspace);
+	ret = 1;
+out:
+	JBUFFER_TRACE(jh, "exit");
+	return ret;
+}
+
+/*
+ * journal_insert_checkpoint: put a committed buffer onto a checkpoint
+ * list so that we know when it is safe to clean the transaction out of
+ * the log.
+ *
+ * Called with the journal locked.
+ * Called with j_list_lock held.
+ */
+void __journal_insert_checkpoint(struct journal_head *jh,
+			       transaction_t *transaction)
+{
+	JBUFFER_TRACE(jh, "entry");
+	J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
+	J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
+
+	jh->b_cp_transaction = transaction;
+
+	if (!transaction->t_checkpoint_list) {
+		jh->b_cpnext = jh->b_cpprev = jh;
+	} else {
+		jh->b_cpnext = transaction->t_checkpoint_list;
+		jh->b_cpprev = transaction->t_checkpoint_list->b_cpprev;
+		jh->b_cpprev->b_cpnext = jh;
+		jh->b_cpnext->b_cpprev = jh;
+	}
+	transaction->t_checkpoint_list = jh;
+}
+
+/*
+ * We've finished with this transaction structure: adios...
+ *
+ * The transaction must have no links except for the checkpoint by this
+ * point.
+ *
+ * Called with the journal locked.
+ * Called with j_list_lock held.
+ */
+
+void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
+{
+	assert_spin_locked(&journal->j_list_lock);
+	if (transaction->t_cpnext) {
+		transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
+		transaction->t_cpprev->t_cpnext = transaction->t_cpnext;
+		if (journal->j_checkpoint_transactions == transaction)
+			journal->j_checkpoint_transactions =
+				transaction->t_cpnext;
+		if (journal->j_checkpoint_transactions == transaction)
+			journal->j_checkpoint_transactions = NULL;
+	}
+
+	J_ASSERT(transaction->t_state == T_FINISHED);
+	J_ASSERT(transaction->t_buffers == NULL);
+	J_ASSERT(transaction->t_sync_datalist == NULL);
+	J_ASSERT(transaction->t_forget == NULL);
+	J_ASSERT(transaction->t_iobuf_list == NULL);
+	J_ASSERT(transaction->t_shadow_list == NULL);
+	J_ASSERT(transaction->t_log_list == NULL);
+	J_ASSERT(transaction->t_checkpoint_list == NULL);
+	J_ASSERT(transaction->t_checkpoint_io_list == NULL);
+	J_ASSERT(transaction->t_updates == 0);
+	J_ASSERT(journal->j_committing_transaction != transaction);
+	J_ASSERT(journal->j_running_transaction != transaction);
+
+	jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
+	kfree(transaction);
+}
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
new file mode 100644
index 000000000000..10be51290a27
--- /dev/null
+++ b/fs/jbd2/commit.c
@@ -0,0 +1,911 @@
+/*
+ * linux/fs/jbd/commit.c
+ *
+ * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
+ *
+ * Copyright 1998 Red Hat corp --- All Rights Reserved
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ *
+ * Journal commit routines for the generic filesystem journaling code;
+ * part of the ext2fs journaling system.
+ */
+
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+
+/*
+ * Default IO end handler for temporary BJ_IO buffer_heads.
+ */
+static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
+{
+	BUFFER_TRACE(bh, "");
+	if (uptodate)
+		set_buffer_uptodate(bh);
+	else
+		clear_buffer_uptodate(bh);
+	unlock_buffer(bh);
+}
+
+/*
+ * When an ext3-ordered file is truncated, it is possible that many pages are
+ * not sucessfully freed, because they are attached to a committing transaction.
+ * After the transaction commits, these pages are left on the LRU, with no
+ * ->mapping, and with attached buffers.  These pages are trivially reclaimable
+ * by the VM, but their apparent absence upsets the VM accounting, and it makes
+ * the numbers in /proc/meminfo look odd.
+ *
+ * So here, we have a buffer which has just come off the forget list.  Look to
+ * see if we can strip all buffers from the backing page.
+ *
+ * Called under lock_journal(), and possibly under journal_datalist_lock.  The
+ * caller provided us with a ref against the buffer, and we drop that here.
+ */
+static void release_buffer_page(struct buffer_head *bh)
+{
+	struct page *page;
+
+	if (buffer_dirty(bh))
+		goto nope;
+	if (atomic_read(&bh->b_count) != 1)
+		goto nope;
+	page = bh->b_page;
+	if (!page)
+		goto nope;
+	if (page->mapping)
+		goto nope;
+
+	/* OK, it's a truncated page */
+	if (TestSetPageLocked(page))
+		goto nope;
+
+	page_cache_get(page);
+	__brelse(bh);
+	try_to_free_buffers(page);
+	unlock_page(page);
+	page_cache_release(page);
+	return;
+
+nope:
+	__brelse(bh);
+}
+
+/*
+ * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
+ * held.  For ranking reasons we must trylock.  If we lose, schedule away and
+ * return 0.  j_list_lock is dropped in this case.
+ */
+static int inverted_lock(journal_t *journal, struct buffer_head *bh)
+{
+	if (!jbd_trylock_bh_state(bh)) {
+		spin_unlock(&journal->j_list_lock);
+		schedule();
+		return 0;
+	}
+	return 1;
+}
+
+/* Done it all: now write the commit record.  We should have
+ * cleaned up our previous buffers by now, so if we are in abort
+ * mode we can now just skip the rest of the journal write
+ * entirely.
+ *
+ * Returns 1 if the journal needs to be aborted or 0 on success
+ */
+static int journal_write_commit_record(journal_t *journal,
+					transaction_t *commit_transaction)
+{
+	struct journal_head *descriptor;
+	struct buffer_head *bh;
+	int i, ret;
+	int barrier_done = 0;
+
+	if (is_journal_aborted(journal))
+		return 0;
+
+	descriptor = journal_get_descriptor_buffer(journal);
+	if (!descriptor)
+		return 1;
+
+	bh = jh2bh(descriptor);
+
+	/* AKPM: buglet - add `i' to tmp! */
+	for (i = 0; i < bh->b_size; i += 512) {
+		journal_header_t *tmp = (journal_header_t*)bh->b_data;
+		tmp->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
+		tmp->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
+		tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
+	}
+
+	JBUFFER_TRACE(descriptor, "write commit block");
+	set_buffer_dirty(bh);
+	if (journal->j_flags & JFS_BARRIER) {
+		set_buffer_ordered(bh);
+		barrier_done = 1;
+	}
+	ret = sync_dirty_buffer(bh);
+	/* is it possible for another commit to fail at roughly
+	 * the same time as this one?  If so, we don't want to
+	 * trust the barrier flag in the super, but instead want
+	 * to remember if we sent a barrier request
+	 */
+	if (ret == -EOPNOTSUPP && barrier_done) {
+		char b[BDEVNAME_SIZE];
+
+		printk(KERN_WARNING
+			"JBD: barrier-based sync failed on %s - "
+			"disabling barriers\n",
+			bdevname(journal->j_dev, b));
+		spin_lock(&journal->j_state_lock);
+		journal->j_flags &= ~JFS_BARRIER;
+		spin_unlock(&journal->j_state_lock);
+
+		/* And try again, without the barrier */
+		clear_buffer_ordered(bh);
+		set_buffer_uptodate(bh);
+		set_buffer_dirty(bh);
+		ret = sync_dirty_buffer(bh);
+	}
+	put_bh(bh);		/* One for getblk() */
+	journal_put_journal_head(descriptor);
+
+	return (ret == -EIO);
+}
+
+static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
+{
+	int i;
+
+	for (i = 0; i < bufs; i++) {
+		wbuf[i]->b_end_io = end_buffer_write_sync;
+		/* We use-up our safety reference in submit_bh() */
+		submit_bh(WRITE, wbuf[i]);
+	}
+}
+
+/*
+ *  Submit all the data buffers to disk
+ */
+static void journal_submit_data_buffers(journal_t *journal,
+				transaction_t *commit_transaction)
+{
+	struct journal_head *jh;
+	struct buffer_head *bh;
+	int locked;
+	int bufs = 0;
+	struct buffer_head **wbuf = journal->j_wbuf;
+
+	/*
+	 * Whenever we unlock the journal and sleep, things can get added
+	 * onto ->t_sync_datalist, so we have to keep looping back to
+	 * write_out_data until we *know* that the list is empty.
+	 *
+	 * Cleanup any flushed data buffers from the data list.  Even in
+	 * abort mode, we want to flush this out as soon as possible.
+	 */
+write_out_data:
+	cond_resched();
+	spin_lock(&journal->j_list_lock);
+
+	while (commit_transaction->t_sync_datalist) {
+		jh = commit_transaction->t_sync_datalist;
+		bh = jh2bh(jh);
+		locked = 0;
+
+		/* Get reference just to make sure buffer does not disappear
+		 * when we are forced to drop various locks */
+		get_bh(bh);
+		/* If the buffer is dirty, we need to submit IO and hence
+		 * we need the buffer lock. We try to lock the buffer without
+		 * blocking. If we fail, we need to drop j_list_lock and do
+		 * blocking lock_buffer().
+		 */
+		if (buffer_dirty(bh)) {
+			if (test_set_buffer_locked(bh)) {
+				BUFFER_TRACE(bh, "needs blocking lock");
+				spin_unlock(&journal->j_list_lock);
+				/* Write out all data to prevent deadlocks */
+				journal_do_submit_data(wbuf, bufs);
+				bufs = 0;
+				lock_buffer(bh);
+				spin_lock(&journal->j_list_lock);
+			}
+			locked = 1;
+		}
+		/* We have to get bh_state lock. Again out of order, sigh. */
+		if (!inverted_lock(journal, bh)) {
+			jbd_lock_bh_state(bh);
+			spin_lock(&journal->j_list_lock);
+		}
+		/* Someone already cleaned up the buffer? */
+		if (!buffer_jbd(bh)
+			|| jh->b_transaction != commit_transaction
+			|| jh->b_jlist != BJ_SyncData) {
+			jbd_unlock_bh_state(bh);
+			if (locked)
+				unlock_buffer(bh);
+			BUFFER_TRACE(bh, "already cleaned up");
+			put_bh(bh);
+			continue;
+		}
+		if (locked && test_clear_buffer_dirty(bh)) {
+			BUFFER_TRACE(bh, "needs writeout, adding to array");
+			wbuf[bufs++] = bh;
+			__journal_file_buffer(jh, commit_transaction,
+						BJ_Locked);
+			jbd_unlock_bh_state(bh);
+			if (bufs == journal->j_wbufsize) {
+				spin_unlock(&journal->j_list_lock);
+				journal_do_submit_data(wbuf, bufs);
+				bufs = 0;
+				goto write_out_data;
+			}
+		}
+		else {
+			BUFFER_TRACE(bh, "writeout complete: unfile");
+			__journal_unfile_buffer(jh);
+			jbd_unlock_bh_state(bh);
+			if (locked)
+				unlock_buffer(bh);
+			journal_remove_journal_head(bh);
+			/* Once for our safety reference, once for
+			 * journal_remove_journal_head() */
+			put_bh(bh);
+			put_bh(bh);
+		}
+
+		if (lock_need_resched(&journal->j_list_lock)) {
+			spin_unlock(&journal->j_list_lock);
+			goto write_out_data;
+		}
+	}
+	spin_unlock(&journal->j_list_lock);
+	journal_do_submit_data(wbuf, bufs);
+}
+
+/*
+ * journal_commit_transaction
+ *
+ * The primary function for committing a transaction to the log.  This
+ * function is called by the journal thread to begin a complete commit.
+ */
+void journal_commit_transaction(journal_t *journal)
+{
+	transaction_t *commit_transaction;
+	struct journal_head *jh, *new_jh, *descriptor;
+	struct buffer_head **wbuf = journal->j_wbuf;
+	int bufs;
+	int flags;
+	int err;
+	unsigned long blocknr;
+	char *tagp = NULL;
+	journal_header_t *header;
+	journal_block_tag_t *tag = NULL;
+	int space_left = 0;
+	int first_tag = 0;
+	int tag_flag;
+	int i;
+
+	/*
+	 * First job: lock down the current transaction and wait for
+	 * all outstanding updates to complete.
+	 */
+
+#ifdef COMMIT_STATS
+	spin_lock(&journal->j_list_lock);
+	summarise_journal_usage(journal);
+	spin_unlock(&journal->j_list_lock);
+#endif
+
+	/* Do we need to erase the effects of a prior journal_flush? */
+	if (journal->j_flags & JFS_FLUSHED) {
+		jbd_debug(3, "super block updated\n");
+		journal_update_superblock(journal, 1);
+	} else {
+		jbd_debug(3, "superblock not updated\n");
+	}
+
+	J_ASSERT(journal->j_running_transaction != NULL);
+	J_ASSERT(journal->j_committing_transaction == NULL);
+
+	commit_transaction = journal->j_running_transaction;
+	J_ASSERT(commit_transaction->t_state == T_RUNNING);
+
+	jbd_debug(1, "JBD: starting commit of transaction %d\n",
+			commit_transaction->t_tid);
+
+	spin_lock(&journal->j_state_lock);
+	commit_transaction->t_state = T_LOCKED;
+
+	spin_lock(&commit_transaction->t_handle_lock);
+	while (commit_transaction->t_updates) {
+		DEFINE_WAIT(wait);
+
+		prepare_to_wait(&journal->j_wait_updates, &wait,
+					TASK_UNINTERRUPTIBLE);
+		if (commit_transaction->t_updates) {
+			spin_unlock(&commit_transaction->t_handle_lock);
+			spin_unlock(&journal->j_state_lock);
+			schedule();
+			spin_lock(&journal->j_state_lock);
+			spin_lock(&commit_transaction->t_handle_lock);
+		}
+		finish_wait(&journal->j_wait_updates, &wait);
+	}
+	spin_unlock(&commit_transaction->t_handle_lock);
+
+	J_ASSERT (commit_transaction->t_outstanding_credits <=
+			journal->j_max_transaction_buffers);
+
+	/*
+	 * First thing we are allowed to do is to discard any remaining
+	 * BJ_Reserved buffers.  Note, it is _not_ permissible to assume
+	 * that there are no such buffers: if a large filesystem
+	 * operation like a truncate needs to split itself over multiple
+	 * transactions, then it may try to do a journal_restart() while
+	 * there are still BJ_Reserved buffers outstanding.  These must
+	 * be released cleanly from the current transaction.
+	 *
+	 * In this case, the filesystem must still reserve write access
+	 * again before modifying the buffer in the new transaction, but
+	 * we do not require it to remember exactly which old buffers it
+	 * has reserved.  This is consistent with the existing behaviour
+	 * that multiple journal_get_write_access() calls to the same
+	 * buffer are perfectly permissable.
+	 */
+	while (commit_transaction->t_reserved_list) {
+		jh = commit_transaction->t_reserved_list;
+		JBUFFER_TRACE(jh, "reserved, unused: refile");
+		/*
+		 * A journal_get_undo_access()+journal_release_buffer() may
+		 * leave undo-committed data.
+		 */
+		if (jh->b_committed_data) {
+			struct buffer_head *bh = jh2bh(jh);
+
+			jbd_lock_bh_state(bh);
+			jbd_slab_free(jh->b_committed_data, bh->b_size);
+			jh->b_committed_data = NULL;
+			jbd_unlock_bh_state(bh);
+		}
+		journal_refile_buffer(journal, jh);
+	}
+
+	/*
+	 * Now try to drop any written-back buffers from the journal's
+	 * checkpoint lists.  We do this *before* commit because it potentially
+	 * frees some memory
+	 */
+	spin_lock(&journal->j_list_lock);
+	__journal_clean_checkpoint_list(journal);
+	spin_unlock(&journal->j_list_lock);
+
+	jbd_debug (3, "JBD: commit phase 1\n");
+
+	/*
+	 * Switch to a new revoke table.
+	 */
+	journal_switch_revoke_table(journal);
+
+	commit_transaction->t_state = T_FLUSH;
+	journal->j_committing_transaction = commit_transaction;
+	journal->j_running_transaction = NULL;
+	commit_transaction->t_log_start = journal->j_head;
+	wake_up(&journal->j_wait_transaction_locked);
+	spin_unlock(&journal->j_state_lock);
+
+	jbd_debug (3, "JBD: commit phase 2\n");
+
+	/*
+	 * First, drop modified flag: all accesses to the buffers
+	 * will be tracked for a new trasaction only -bzzz
+	 */
+	spin_lock(&journal->j_list_lock);
+	if (commit_transaction->t_buffers) {
+		new_jh = jh = commit_transaction->t_buffers->b_tnext;
+		do {
+			J_ASSERT_JH(new_jh, new_jh->b_modified == 1 ||
+					new_jh->b_modified == 0);
+			new_jh->b_modified = 0;
+			new_jh = new_jh->b_tnext;
+		} while (new_jh != jh);
+	}
+	spin_unlock(&journal->j_list_lock);
+
+	/*
+	 * Now start flushing things to disk, in the order they appear
+	 * on the transaction lists.  Data blocks go first.
+	 */
+	err = 0;
+	journal_submit_data_buffers(journal, commit_transaction);
+
+	/*
+	 * Wait for all previously submitted IO to complete.
+	 */
+	spin_lock(&journal->j_list_lock);
+	while (commit_transaction->t_locked_list) {
+		struct buffer_head *bh;
+
+		jh = commit_transaction->t_locked_list->b_tprev;
+		bh = jh2bh(jh);
+		get_bh(bh);
+		if (buffer_locked(bh)) {
+			spin_unlock(&journal->j_list_lock);
+			wait_on_buffer(bh);
+			if (unlikely(!buffer_uptodate(bh)))
+				err = -EIO;
+			spin_lock(&journal->j_list_lock);
+		}
+		if (!inverted_lock(journal, bh)) {
+			put_bh(bh);
+			spin_lock(&journal->j_list_lock);
+			continue;
+		}
+		if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
+			__journal_unfile_buffer(jh);
+			jbd_unlock_bh_state(bh);
+			journal_remove_journal_head(bh);
+			put_bh(bh);
+		} else {
+			jbd_unlock_bh_state(bh);
+		}
+		put_bh(bh);
+		cond_resched_lock(&journal->j_list_lock);
+	}
+	spin_unlock(&journal->j_list_lock);
+
+	if (err)
+		__journal_abort_hard(journal);
+
+	journal_write_revoke_records(journal, commit_transaction);
+
+	jbd_debug(3, "JBD: commit phase 2\n");
+
+	/*
+	 * If we found any dirty or locked buffers, then we should have
+	 * looped back up to the write_out_data label.  If there weren't
+	 * any then journal_clean_data_list should have wiped the list
+	 * clean by now, so check that it is in fact empty.
+	 */
+	J_ASSERT (commit_transaction->t_sync_datalist == NULL);
+
+	jbd_debug (3, "JBD: commit phase 3\n");
+
+	/*
+	 * Way to go: we have now written out all of the data for a
+	 * transaction!  Now comes the tricky part: we need to write out
+	 * metadata.  Loop over the transaction's entire buffer list:
+	 */
+	commit_transaction->t_state = T_COMMIT;
+
+	descriptor = NULL;
+	bufs = 0;
+	while (commit_transaction->t_buffers) {
+
+		/* Find the next buffer to be journaled... */
+
+		jh = commit_transaction->t_buffers;
+
+		/* If we're in abort mode, we just un-journal the buffer and
+		   release it for background writing. */
+
+		if (is_journal_aborted(journal)) {
+			JBUFFER_TRACE(jh, "journal is aborting: refile");
+			journal_refile_buffer(journal, jh);
+			/* If that was the last one, we need to clean up
+			 * any descriptor buffers which may have been
+			 * already allocated, even if we are now
+			 * aborting. */
+			if (!commit_transaction->t_buffers)
+				goto start_journal_io;
+			continue;
+		}
+
+		/* Make sure we have a descriptor block in which to
+		   record the metadata buffer. */
+
+		if (!descriptor) {
+			struct buffer_head *bh;
+
+			J_ASSERT (bufs == 0);
+
+			jbd_debug(4, "JBD: get descriptor\n");
+
+			descriptor = journal_get_descriptor_buffer(journal);
+			if (!descriptor) {
+				__journal_abort_hard(journal);
+				continue;
+			}
+
+			bh = jh2bh(descriptor);
+			jbd_debug(4, "JBD: got buffer %llu (%p)\n",
+				(unsigned long long)bh->b_blocknr, bh->b_data);
+			header = (journal_header_t *)&bh->b_data[0];
+			header->h_magic     = cpu_to_be32(JFS_MAGIC_NUMBER);
+			header->h_blocktype = cpu_to_be32(JFS_DESCRIPTOR_BLOCK);
+			header->h_sequence  = cpu_to_be32(commit_transaction->t_tid);
+
+			tagp = &bh->b_data[sizeof(journal_header_t)];
+			space_left = bh->b_size - sizeof(journal_header_t);
+			first_tag = 1;
+			set_buffer_jwrite(bh);
+			set_buffer_dirty(bh);
+			wbuf[bufs++] = bh;
+
+			/* Record it so that we can wait for IO
+                           completion later */
+			BUFFER_TRACE(bh, "ph3: file as descriptor");
+			journal_file_buffer(descriptor, commit_transaction,
+					BJ_LogCtl);
+		}
+
+		/* Where is the buffer to be written? */
+
+		err = journal_next_log_block(journal, &blocknr);
+		/* If the block mapping failed, just abandon the buffer
+		   and repeat this loop: we'll fall into the
+		   refile-on-abort condition above. */
+		if (err) {
+			__journal_abort_hard(journal);
+			continue;
+		}
+
+		/*
+		 * start_this_handle() uses t_outstanding_credits to determine
+		 * the free space in the log, but this counter is changed
+		 * by journal_next_log_block() also.
+		 */
+		commit_transaction->t_outstanding_credits--;
+
+		/* Bump b_count to prevent truncate from stumbling over
+                   the shadowed buffer!  @@@ This can go if we ever get
+                   rid of the BJ_IO/BJ_Shadow pairing of buffers. */
+		atomic_inc(&jh2bh(jh)->b_count);
+
+		/* Make a temporary IO buffer with which to write it out
+                   (this will requeue both the metadata buffer and the
+                   temporary IO buffer). new_bh goes on BJ_IO*/
+
+		set_bit(BH_JWrite, &jh2bh(jh)->b_state);
+		/*
+		 * akpm: journal_write_metadata_buffer() sets
+		 * new_bh->b_transaction to commit_transaction.
+		 * We need to clean this up before we release new_bh
+		 * (which is of type BJ_IO)
+		 */
+		JBUFFER_TRACE(jh, "ph3: write metadata");
+		flags = journal_write_metadata_buffer(commit_transaction,
+						      jh, &new_jh, blocknr);
+		set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
+		wbuf[bufs++] = jh2bh(new_jh);
+
+		/* Record the new block's tag in the current descriptor
+                   buffer */
+
+		tag_flag = 0;
+		if (flags & 1)
+			tag_flag |= JFS_FLAG_ESCAPE;
+		if (!first_tag)
+			tag_flag |= JFS_FLAG_SAME_UUID;
+
+		tag = (journal_block_tag_t *) tagp;
+		tag->t_blocknr = cpu_to_be32(jh2bh(jh)->b_blocknr);
+		tag->t_flags = cpu_to_be32(tag_flag);
+		tagp += sizeof(journal_block_tag_t);
+		space_left -= sizeof(journal_block_tag_t);
+
+		if (first_tag) {
+			memcpy (tagp, journal->j_uuid, 16);
+			tagp += 16;
+			space_left -= 16;
+			first_tag = 0;
+		}
+
+		/* If there's no more to do, or if the descriptor is full,
+		   let the IO rip! */
+
+		if (bufs == journal->j_wbufsize ||
+		    commit_transaction->t_buffers == NULL ||
+		    space_left < sizeof(journal_block_tag_t) + 16) {
+
+			jbd_debug(4, "JBD: Submit %d IOs\n", bufs);
+
+			/* Write an end-of-descriptor marker before
+                           submitting the IOs.  "tag" still points to
+                           the last tag we set up. */
+
+			tag->t_flags |= cpu_to_be32(JFS_FLAG_LAST_TAG);
+
+start_journal_io:
+			for (i = 0; i < bufs; i++) {
+				struct buffer_head *bh = wbuf[i];
+				lock_buffer(bh);
+				clear_buffer_dirty(bh);
+				set_buffer_uptodate(bh);
+				bh->b_end_io = journal_end_buffer_io_sync;
+				submit_bh(WRITE, bh);
+			}
+			cond_resched();
+
+			/* Force a new descriptor to be generated next
+                           time round the loop. */
+			descriptor = NULL;
+			bufs = 0;
+		}
+	}
+
+	/* Lo and behold: we have just managed to send a transaction to
+           the log.  Before we can commit it, wait for the IO so far to
+           complete.  Control buffers being written are on the
+           transaction's t_log_list queue, and metadata buffers are on
+           the t_iobuf_list queue.
+
+	   Wait for the buffers in reverse order.  That way we are
+	   less likely to be woken up until all IOs have completed, and
+	   so we incur less scheduling load.
+	*/
+
+	jbd_debug(3, "JBD: commit phase 4\n");
+
+	/*
+	 * akpm: these are BJ_IO, and j_list_lock is not needed.
+	 * See __journal_try_to_free_buffer.
+	 */
+wait_for_iobuf:
+	while (commit_transaction->t_iobuf_list != NULL) {
+		struct buffer_head *bh;
+
+		jh = commit_transaction->t_iobuf_list->b_tprev;
+		bh = jh2bh(jh);
+		if (buffer_locked(bh)) {
+			wait_on_buffer(bh);
+			goto wait_for_iobuf;
+		}
+		if (cond_resched())
+			goto wait_for_iobuf;
+
+		if (unlikely(!buffer_uptodate(bh)))
+			err = -EIO;
+
+		clear_buffer_jwrite(bh);
+
+		JBUFFER_TRACE(jh, "ph4: unfile after journal write");
+		journal_unfile_buffer(journal, jh);
+
+		/*
+		 * ->t_iobuf_list should contain only dummy buffer_heads
+		 * which were created by journal_write_metadata_buffer().
+		 */
+		BUFFER_TRACE(bh, "dumping temporary bh");
+		journal_put_journal_head(jh);
+		__brelse(bh);
+		J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
+		free_buffer_head(bh);
+
+		/* We also have to unlock and free the corresponding
+                   shadowed buffer */
+		jh = commit_transaction->t_shadow_list->b_tprev;
+		bh = jh2bh(jh);
+		clear_bit(BH_JWrite, &bh->b_state);
+		J_ASSERT_BH(bh, buffer_jbddirty(bh));
+
+		/* The metadata is now released for reuse, but we need
+                   to remember it against this transaction so that when
+                   we finally commit, we can do any checkpointing
+                   required. */
+		JBUFFER_TRACE(jh, "file as BJ_Forget");
+		journal_file_buffer(jh, commit_transaction, BJ_Forget);
+		/* Wake up any transactions which were waiting for this
+		   IO to complete */
+		wake_up_bit(&bh->b_state, BH_Unshadow);
+		JBUFFER_TRACE(jh, "brelse shadowed buffer");
+		__brelse(bh);
+	}
+
+	J_ASSERT (commit_transaction->t_shadow_list == NULL);
+
+	jbd_debug(3, "JBD: commit phase 5\n");
+
+	/* Here we wait for the revoke record and descriptor record buffers */
+ wait_for_ctlbuf:
+	while (commit_transaction->t_log_list != NULL) {
+		struct buffer_head *bh;
+
+		jh = commit_transaction->t_log_list->b_tprev;
+		bh = jh2bh(jh);
+		if (buffer_locked(bh)) {
+			wait_on_buffer(bh);
+			goto wait_for_ctlbuf;
+		}
+		if (cond_resched())
+			goto wait_for_ctlbuf;
+
+		if (unlikely(!buffer_uptodate(bh)))
+			err = -EIO;
+
+		BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
+		clear_buffer_jwrite(bh);
+		journal_unfile_buffer(journal, jh);
+		journal_put_journal_head(jh);
+		__brelse(bh);		/* One for getblk */
+		/* AKPM: bforget here */
+	}
+
+	jbd_debug(3, "JBD: commit phase 6\n");
+
+	if (journal_write_commit_record(journal, commit_transaction))
+		err = -EIO;
+
+	if (err)
+		__journal_abort_hard(journal);
+
+	/* End of a transaction!  Finally, we can do checkpoint
+           processing: any buffers committed as a result of this
+           transaction can be removed from any checkpoint list it was on
+           before. */
+
+	jbd_debug(3, "JBD: commit phase 7\n");
+
+	J_ASSERT(commit_transaction->t_sync_datalist == NULL);
+	J_ASSERT(commit_transaction->t_buffers == NULL);
+	J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
+	J_ASSERT(commit_transaction->t_iobuf_list == NULL);
+	J_ASSERT(commit_transaction->t_shadow_list == NULL);
+	J_ASSERT(commit_transaction->t_log_list == NULL);
+
+restart_loop:
+	/*
+	 * As there are other places (journal_unmap_buffer()) adding buffers
+	 * to this list we have to be careful and hold the j_list_lock.
+	 */
+	spin_lock(&journal->j_list_lock);
+	while (commit_transaction->t_forget) {
+		transaction_t *cp_transaction;
+		struct buffer_head *bh;
+
+		jh = commit_transaction->t_forget;
+		spin_unlock(&journal->j_list_lock);
+		bh = jh2bh(jh);
+		jbd_lock_bh_state(bh);
+		J_ASSERT_JH(jh,	jh->b_transaction == commit_transaction ||
+			jh->b_transaction == journal->j_running_transaction);
+
+		/*
+		 * If there is undo-protected committed data against
+		 * this buffer, then we can remove it now.  If it is a
+		 * buffer needing such protection, the old frozen_data
+		 * field now points to a committed version of the
+		 * buffer, so rotate that field to the new committed
+		 * data.
+		 *
+		 * Otherwise, we can just throw away the frozen data now.
+		 */
+		if (jh->b_committed_data) {
+			jbd_slab_free(jh->b_committed_data, bh->b_size);
+			jh->b_committed_data = NULL;
+			if (jh->b_frozen_data) {
+				jh->b_committed_data = jh->b_frozen_data;
+				jh->b_frozen_data = NULL;
+			}
+		} else if (jh->b_frozen_data) {
+			jbd_slab_free(jh->b_frozen_data, bh->b_size);
+			jh->b_frozen_data = NULL;
+		}
+
+		spin_lock(&journal->j_list_lock);
+		cp_transaction = jh->b_cp_transaction;
+		if (cp_transaction) {
+			JBUFFER_TRACE(jh, "remove from old cp transaction");
+			__journal_remove_checkpoint(jh);
+		}
+
+		/* Only re-checkpoint the buffer_head if it is marked
+		 * dirty.  If the buffer was added to the BJ_Forget list
+		 * by journal_forget, it may no longer be dirty and
+		 * there's no point in keeping a checkpoint record for
+		 * it. */
+
+		/* A buffer which has been freed while still being
+		 * journaled by a previous transaction may end up still
+		 * being dirty here, but we want to avoid writing back
+		 * that buffer in the future now that the last use has
+		 * been committed.  That's not only a performance gain,
+		 * it also stops aliasing problems if the buffer is left
+		 * behind for writeback and gets reallocated for another
+		 * use in a different page. */
+		if (buffer_freed(bh)) {
+			clear_buffer_freed(bh);
+			clear_buffer_jbddirty(bh);
+		}
+
+		if (buffer_jbddirty(bh)) {
+			JBUFFER_TRACE(jh, "add to new checkpointing trans");
+			__journal_insert_checkpoint(jh, commit_transaction);
+			JBUFFER_TRACE(jh, "refile for checkpoint writeback");
+			__journal_refile_buffer(jh);
+			jbd_unlock_bh_state(bh);
+		} else {
+			J_ASSERT_BH(bh, !buffer_dirty(bh));
+			/* The buffer on BJ_Forget list and not jbddirty means
+			 * it has been freed by this transaction and hence it
+			 * could not have been reallocated until this
+			 * transaction has committed. *BUT* it could be
+			 * reallocated once we have written all the data to
+			 * disk and before we process the buffer on BJ_Forget
+			 * list. */
+			JBUFFER_TRACE(jh, "refile or unfile freed buffer");
+			__journal_refile_buffer(jh);
+			if (!jh->b_transaction) {
+				jbd_unlock_bh_state(bh);
+				 /* needs a brelse */
+				journal_remove_journal_head(bh);
+				release_buffer_page(bh);
+			} else
+				jbd_unlock_bh_state(bh);
+		}
+		cond_resched_lock(&journal->j_list_lock);
+	}
+	spin_unlock(&journal->j_list_lock);
+	/*
+	 * This is a bit sleazy.  We borrow j_list_lock to protect
+	 * journal->j_committing_transaction in __journal_remove_checkpoint.
+	 * Really, __journal_remove_checkpoint should be using j_state_lock but
+	 * it's a bit hassle to hold that across __journal_remove_checkpoint
+	 */
+	spin_lock(&journal->j_state_lock);
+	spin_lock(&journal->j_list_lock);
+	/*
+	 * Now recheck if some buffers did not get attached to the transaction
+	 * while the lock was dropped...
+	 */
+	if (commit_transaction->t_forget) {
+		spin_unlock(&journal->j_list_lock);
+		spin_unlock(&journal->j_state_lock);
+		goto restart_loop;
+	}
+
+	/* Done with this transaction! */
+
+	jbd_debug(3, "JBD: commit phase 8\n");
+
+	J_ASSERT(commit_transaction->t_state == T_COMMIT);
+
+	commit_transaction->t_state = T_FINISHED;
+	J_ASSERT(commit_transaction == journal->j_committing_transaction);
+	journal->j_commit_sequence = commit_transaction->t_tid;
+	journal->j_committing_transaction = NULL;
+	spin_unlock(&journal->j_state_lock);
+
+	if (commit_transaction->t_checkpoint_list == NULL) {
+		__journal_drop_transaction(journal, commit_transaction);
+	} else {
+		if (journal->j_checkpoint_transactions == NULL) {
+			journal->j_checkpoint_transactions = commit_transaction;
+			commit_transaction->t_cpnext = commit_transaction;
+			commit_transaction->t_cpprev = commit_transaction;
+		} else {
+			commit_transaction->t_cpnext =
+				journal->j_checkpoint_transactions;
+			commit_transaction->t_cpprev =
+				commit_transaction->t_cpnext->t_cpprev;
+			commit_transaction->t_cpnext->t_cpprev =
+				commit_transaction;
+			commit_transaction->t_cpprev->t_cpnext =
+				commit_transaction;
+		}
+	}
+	spin_unlock(&journal->j_list_lock);
+
+	jbd_debug(1, "JBD: commit %d complete, head %d\n",
+		  journal->j_commit_sequence, journal->j_tail_sequence);
+
+	wake_up(&journal->j_wait_done_commit);
+}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
new file mode 100644
index 000000000000..c518dd8fe60a
--- /dev/null
+++ b/fs/jbd2/journal.c
@@ -0,0 +1,2072 @@
+/*
+ * linux/fs/jbd/journal.c
+ *
+ * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
+ *
+ * Copyright 1998 Red Hat corp --- All Rights Reserved
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ *
+ * Generic filesystem journal-writing code; part of the ext2fs
+ * journaling system.
+ *
+ * This file manages journals: areas of disk reserved for logging
+ * transactional updates.  This includes the kernel journaling thread
+ * which is responsible for scheduling updates to the log.
+ *
+ * We do not actually manage the physical storage of the journal in this
+ * file: that is left to a per-journal policy function, which allows us
+ * to store the journal within a filesystem-specified area for ext2
+ * journaling (ext2 can use a reserved inode for storing the log).
+ */
+
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/suspend.h>
+#include <linux/pagemap.h>
+#include <linux/kthread.h>
+#include <linux/poison.h>
+#include <linux/proc_fs.h>
+
+#include <asm/uaccess.h>
+#include <asm/page.h>
+
+EXPORT_SYMBOL(journal_start);
+EXPORT_SYMBOL(journal_restart);
+EXPORT_SYMBOL(journal_extend);
+EXPORT_SYMBOL(journal_stop);
+EXPORT_SYMBOL(journal_lock_updates);
+EXPORT_SYMBOL(journal_unlock_updates);
+EXPORT_SYMBOL(journal_get_write_access);
+EXPORT_SYMBOL(journal_get_create_access);
+EXPORT_SYMBOL(journal_get_undo_access);
+EXPORT_SYMBOL(journal_dirty_data);
+EXPORT_SYMBOL(journal_dirty_metadata);
+EXPORT_SYMBOL(journal_release_buffer);
+EXPORT_SYMBOL(journal_forget);
+#if 0
+EXPORT_SYMBOL(journal_sync_buffer);
+#endif
+EXPORT_SYMBOL(journal_flush);
+EXPORT_SYMBOL(journal_revoke);
+
+EXPORT_SYMBOL(journal_init_dev);
+EXPORT_SYMBOL(journal_init_inode);
+EXPORT_SYMBOL(journal_update_format);
+EXPORT_SYMBOL(journal_check_used_features);
+EXPORT_SYMBOL(journal_check_available_features);
+EXPORT_SYMBOL(journal_set_features);
+EXPORT_SYMBOL(journal_create);
+EXPORT_SYMBOL(journal_load);
+EXPORT_SYMBOL(journal_destroy);
+EXPORT_SYMBOL(journal_update_superblock);
+EXPORT_SYMBOL(journal_abort);
+EXPORT_SYMBOL(journal_errno);
+EXPORT_SYMBOL(journal_ack_err);
+EXPORT_SYMBOL(journal_clear_err);
+EXPORT_SYMBOL(log_wait_commit);
+EXPORT_SYMBOL(journal_start_commit);
+EXPORT_SYMBOL(journal_force_commit_nested);
+EXPORT_SYMBOL(journal_wipe);
+EXPORT_SYMBOL(journal_blocks_per_page);
+EXPORT_SYMBOL(journal_invalidatepage);
+EXPORT_SYMBOL(journal_try_to_free_buffers);
+EXPORT_SYMBOL(journal_force_commit);
+
+static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
+static void __journal_abort_soft (journal_t *journal, int errno);
+static int journal_create_jbd_slab(size_t slab_size);
+
+/*
+ * Helper function used to manage commit timeouts
+ */
+
+static void commit_timeout(unsigned long __data)
+{
+	struct task_struct * p = (struct task_struct *) __data;
+
+	wake_up_process(p);
+}
+
+/*
+ * kjournald: The main thread function used to manage a logging device
+ * journal.
+ *
+ * This kernel thread is responsible for two things:
+ *
+ * 1) COMMIT:  Every so often we need to commit the current state of the
+ *    filesystem to disk.  The journal thread is responsible for writing
+ *    all of the metadata buffers to disk.
+ *
+ * 2) CHECKPOINT: We cannot reuse a used section of the log file until all
+ *    of the data in that part of the log has been rewritten elsewhere on
+ *    the disk.  Flushing these old buffers to reclaim space in the log is
+ *    known as checkpointing, and this thread is responsible for that job.
+ */
+
+static int kjournald(void *arg)
+{
+	journal_t *journal = arg;
+	transaction_t *transaction;
+
+	/*
+	 * Set up an interval timer which can be used to trigger a commit wakeup
+	 * after the commit interval expires
+	 */
+	setup_timer(&journal->j_commit_timer, commit_timeout,
+			(unsigned long)current);
+
+	/* Record that the journal thread is running */
+	journal->j_task = current;
+	wake_up(&journal->j_wait_done_commit);
+
+	printk(KERN_INFO "kjournald starting.  Commit interval %ld seconds\n",
+			journal->j_commit_interval / HZ);
+
+	/*
+	 * And now, wait forever for commit wakeup events.
+	 */
+	spin_lock(&journal->j_state_lock);
+
+loop:
+	if (journal->j_flags & JFS_UNMOUNT)
+		goto end_loop;
+
+	jbd_debug(1, "commit_sequence=%d, commit_request=%d\n",
+		journal->j_commit_sequence, journal->j_commit_request);
+
+	if (journal->j_commit_sequence != journal->j_commit_request) {
+		jbd_debug(1, "OK, requests differ\n");
+		spin_unlock(&journal->j_state_lock);
+		del_timer_sync(&journal->j_commit_timer);
+		journal_commit_transaction(journal);
+		spin_lock(&journal->j_state_lock);
+		goto loop;
+	}
+
+	wake_up(&journal->j_wait_done_commit);
+	if (freezing(current)) {
+		/*
+		 * The simpler the better. Flushing journal isn't a
+		 * good idea, because that depends on threads that may
+		 * be already stopped.
+		 */
+		jbd_debug(1, "Now suspending kjournald\n");
+		spin_unlock(&journal->j_state_lock);
+		refrigerator();
+		spin_lock(&journal->j_state_lock);
+	} else {
+		/*
+		 * We assume on resume that commits are already there,
+		 * so we don't sleep
+		 */
+		DEFINE_WAIT(wait);
+		int should_sleep = 1;
+
+		prepare_to_wait(&journal->j_wait_commit, &wait,
+				TASK_INTERRUPTIBLE);
+		if (journal->j_commit_sequence != journal->j_commit_request)
+			should_sleep = 0;
+		transaction = journal->j_running_transaction;
+		if (transaction && time_after_eq(jiffies,
+						transaction->t_expires))
+			should_sleep = 0;
+		if (journal->j_flags & JFS_UNMOUNT)
+			should_sleep = 0;
+		if (should_sleep) {
+			spin_unlock(&journal->j_state_lock);
+			schedule();
+			spin_lock(&journal->j_state_lock);
+		}
+		finish_wait(&journal->j_wait_commit, &wait);
+	}
+
+	jbd_debug(1, "kjournald wakes\n");
+
+	/*
+	 * Were we woken up by a commit wakeup event?
+	 */
+	transaction = journal->j_running_transaction;
+	if (transaction && time_after_eq(jiffies, transaction->t_expires)) {
+		journal->j_commit_request = transaction->t_tid;
+		jbd_debug(1, "woke because of timeout\n");
+	}
+	goto loop;
+
+end_loop:
+	spin_unlock(&journal->j_state_lock);
+	del_timer_sync(&journal->j_commit_timer);
+	journal->j_task = NULL;
+	wake_up(&journal->j_wait_done_commit);
+	jbd_debug(1, "Journal thread exiting.\n");
+	return 0;
+}
+
+static void journal_start_thread(journal_t *journal)
+{
+	kthread_run(kjournald, journal, "kjournald");
+	wait_event(journal->j_wait_done_commit, journal->j_task != 0);
+}
+
+static void journal_kill_thread(journal_t *journal)
+{
+	spin_lock(&journal->j_state_lock);
+	journal->j_flags |= JFS_UNMOUNT;
+
+	while (journal->j_task) {
+		wake_up(&journal->j_wait_commit);
+		spin_unlock(&journal->j_state_lock);
+		wait_event(journal->j_wait_done_commit, journal->j_task == 0);
+		spin_lock(&journal->j_state_lock);
+	}
+	spin_unlock(&journal->j_state_lock);
+}
+
+/*
+ * journal_write_metadata_buffer: write a metadata buffer to the journal.
+ *
+ * Writes a metadata buffer to a given disk block.  The actual IO is not
+ * performed but a new buffer_head is constructed which labels the data
+ * to be written with the correct destination disk block.
+ *
+ * Any magic-number escaping which needs to be done will cause a
+ * copy-out here.  If the buffer happens to start with the
+ * JFS_MAGIC_NUMBER, then we can't write it to the log directly: the
+ * magic number is only written to the log for descripter blocks.  In
+ * this case, we copy the data and replace the first word with 0, and we
+ * return a result code which indicates that this buffer needs to be
+ * marked as an escaped buffer in the corresponding log descriptor
+ * block.  The missing word can then be restored when the block is read
+ * during recovery.
+ *
+ * If the source buffer has already been modified by a new transaction
+ * since we took the last commit snapshot, we use the frozen copy of
+ * that data for IO.  If we end up using the existing buffer_head's data
+ * for the write, then we *have* to lock the buffer to prevent anyone
+ * else from using and possibly modifying it while the IO is in
+ * progress.
+ *
+ * The function returns a pointer to the buffer_heads to be used for IO.
+ *
+ * We assume that the journal has already been locked in this function.
+ *
+ * Return value:
+ *  <0: Error
+ * >=0: Finished OK
+ *
+ * On success:
+ * Bit 0 set == escape performed on the data
+ * Bit 1 set == buffer copy-out performed (kfree the data after IO)
+ */
+
+int journal_write_metadata_buffer(transaction_t *transaction,
+				  struct journal_head  *jh_in,
+				  struct journal_head **jh_out,
+				  unsigned long blocknr)
+{
+	int need_copy_out = 0;
+	int done_copy_out = 0;
+	int do_escape = 0;
+	char *mapped_data;
+	struct buffer_head *new_bh;
+	struct journal_head *new_jh;
+	struct page *new_page;
+	unsigned int new_offset;
+	struct buffer_head *bh_in = jh2bh(jh_in);
+
+	/*
+	 * The buffer really shouldn't be locked: only the current committing
+	 * transaction is allowed to write it, so nobody else is allowed
+	 * to do any IO.
+	 *
+	 * akpm: except if we're journalling data, and write() output is
+	 * also part of a shared mapping, and another thread has
+	 * decided to launch a writepage() against this buffer.
+	 */
+	J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
+
+	new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
+
+	/*
+	 * If a new transaction has already done a buffer copy-out, then
+	 * we use that version of the data for the commit.
+	 */
+	jbd_lock_bh_state(bh_in);
+repeat:
+	if (jh_in->b_frozen_data) {
+		done_copy_out = 1;
+		new_page = virt_to_page(jh_in->b_frozen_data);
+		new_offset = offset_in_page(jh_in->b_frozen_data);
+	} else {
+		new_page = jh2bh(jh_in)->b_page;
+		new_offset = offset_in_page(jh2bh(jh_in)->b_data);
+	}
+
+	mapped_data = kmap_atomic(new_page, KM_USER0);
+	/*
+	 * Check for escaping
+	 */
+	if (*((__be32 *)(mapped_data + new_offset)) ==
+				cpu_to_be32(JFS_MAGIC_NUMBER)) {
+		need_copy_out = 1;
+		do_escape = 1;
+	}
+	kunmap_atomic(mapped_data, KM_USER0);
+
+	/*
+	 * Do we need to do a data copy?
+	 */
+	if (need_copy_out && !done_copy_out) {
+		char *tmp;
+
+		jbd_unlock_bh_state(bh_in);
+		tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS);
+		jbd_lock_bh_state(bh_in);
+		if (jh_in->b_frozen_data) {
+			jbd_slab_free(tmp, bh_in->b_size);
+			goto repeat;
+		}
+
+		jh_in->b_frozen_data = tmp;
+		mapped_data = kmap_atomic(new_page, KM_USER0);
+		memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size);
+		kunmap_atomic(mapped_data, KM_USER0);
+
+		new_page = virt_to_page(tmp);
+		new_offset = offset_in_page(tmp);
+		done_copy_out = 1;
+	}
+
+	/*
+	 * Did we need to do an escaping?  Now we've done all the
+	 * copying, we can finally do so.
+	 */
+	if (do_escape) {
+		mapped_data = kmap_atomic(new_page, KM_USER0);
+		*((unsigned int *)(mapped_data + new_offset)) = 0;
+		kunmap_atomic(mapped_data, KM_USER0);
+	}
+
+	/* keep subsequent assertions sane */
+	new_bh->b_state = 0;
+	init_buffer(new_bh, NULL, NULL);
+	atomic_set(&new_bh->b_count, 1);
+	jbd_unlock_bh_state(bh_in);
+
+	new_jh = journal_add_journal_head(new_bh);	/* This sleeps */
+
+	set_bh_page(new_bh, new_page, new_offset);
+	new_jh->b_transaction = NULL;
+	new_bh->b_size = jh2bh(jh_in)->b_size;
+	new_bh->b_bdev = transaction->t_journal->j_dev;
+	new_bh->b_blocknr = blocknr;
+	set_buffer_mapped(new_bh);
+	set_buffer_dirty(new_bh);
+
+	*jh_out = new_jh;
+
+	/*
+	 * The to-be-written buffer needs to get moved to the io queue,
+	 * and the original buffer whose contents we are shadowing or
+	 * copying is moved to the transaction's shadow queue.
+	 */
+	JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
+	journal_file_buffer(jh_in, transaction, BJ_Shadow);
+	JBUFFER_TRACE(new_jh, "file as BJ_IO");
+	journal_file_buffer(new_jh, transaction, BJ_IO);
+
+	return do_escape | (done_copy_out << 1);
+}
+
+/*
+ * Allocation code for the journal file.  Manage the space left in the
+ * journal, so that we can begin checkpointing when appropriate.
+ */
+
+/*
+ * __log_space_left: Return the number of free blocks left in the journal.
+ *
+ * Called with the journal already locked.
+ *
+ * Called under j_state_lock
+ */
+
+int __log_space_left(journal_t *journal)
+{
+	int left = journal->j_free;
+
+	assert_spin_locked(&journal->j_state_lock);
+
+	/*
+	 * Be pessimistic here about the number of those free blocks which
+	 * might be required for log descriptor control blocks.
+	 */
+
+#define MIN_LOG_RESERVED_BLOCKS 32 /* Allow for rounding errors */
+
+	left -= MIN_LOG_RESERVED_BLOCKS;
+
+	if (left <= 0)
+		return 0;
+	left -= (left >> 3);
+	return left;
+}
+
+/*
+ * Called under j_state_lock.  Returns true if a transaction was started.
+ */
+int __log_start_commit(journal_t *journal, tid_t target)
+{
+	/*
+	 * Are we already doing a recent enough commit?
+	 */
+	if (!tid_geq(journal->j_commit_request, target)) {
+		/*
+		 * We want a new commit: OK, mark the request and wakup the
+		 * commit thread.  We do _not_ do the commit ourselves.
+		 */
+
+		journal->j_commit_request = target;
+		jbd_debug(1, "JBD: requesting commit %d/%d\n",
+			  journal->j_commit_request,
+			  journal->j_commit_sequence);
+		wake_up(&journal->j_wait_commit);
+		return 1;
+	}
+	return 0;
+}
+
+int log_start_commit(journal_t *journal, tid_t tid)
+{
+	int ret;
+
+	spin_lock(&journal->j_state_lock);
+	ret = __log_start_commit(journal, tid);
+	spin_unlock(&journal->j_state_lock);
+	return ret;
+}
+
+/*
+ * Force and wait upon a commit if the calling process is not within
+ * transaction.  This is used for forcing out undo-protected data which contains
+ * bitmaps, when the fs is running out of space.
+ *
+ * We can only force the running transaction if we don't have an active handle;
+ * otherwise, we will deadlock.
+ *
+ * Returns true if a transaction was started.
+ */
+int journal_force_commit_nested(journal_t *journal)
+{
+	transaction_t *transaction = NULL;
+	tid_t tid;
+
+	spin_lock(&journal->j_state_lock);
+	if (journal->j_running_transaction && !current->journal_info) {
+		transaction = journal->j_running_transaction;
+		__log_start_commit(journal, transaction->t_tid);
+	} else if (journal->j_committing_transaction)
+		transaction = journal->j_committing_transaction;
+
+	if (!transaction) {
+		spin_unlock(&journal->j_state_lock);
+		return 0;	/* Nothing to retry */
+	}
+
+	tid = transaction->t_tid;
+	spin_unlock(&journal->j_state_lock);
+	log_wait_commit(journal, tid);
+	return 1;
+}
+
+/*
+ * Start a commit of the current running transaction (if any).  Returns true
+ * if a transaction was started, and fills its tid in at *ptid
+ */
+int journal_start_commit(journal_t *journal, tid_t *ptid)
+{
+	int ret = 0;
+
+	spin_lock(&journal->j_state_lock);
+	if (journal->j_running_transaction) {
+		tid_t tid = journal->j_running_transaction->t_tid;
+
+		ret = __log_start_commit(journal, tid);
+		if (ret && ptid)
+			*ptid = tid;
+	} else if (journal->j_committing_transaction && ptid) {
+		/*
+		 * If ext3_write_super() recently started a commit, then we
+		 * have to wait for completion of that transaction
+		 */
+		*ptid = journal->j_committing_transaction->t_tid;
+		ret = 1;
+	}
+	spin_unlock(&journal->j_state_lock);
+	return ret;
+}
+
+/*
+ * Wait for a specified commit to complete.
+ * The caller may not hold the journal lock.
+ */
+int log_wait_commit(journal_t *journal, tid_t tid)
+{
+	int err = 0;
+
+#ifdef CONFIG_JBD_DEBUG
+	spin_lock(&journal->j_state_lock);
+	if (!tid_geq(journal->j_commit_request, tid)) {
+		printk(KERN_EMERG
+		       "%s: error: j_commit_request=%d, tid=%d\n",
+		       __FUNCTION__, journal->j_commit_request, tid);
+	}
+	spin_unlock(&journal->j_state_lock);
+#endif
+	spin_lock(&journal->j_state_lock);
+	while (tid_gt(tid, journal->j_commit_sequence)) {
+		jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n",
+				  tid, journal->j_commit_sequence);
+		wake_up(&journal->j_wait_commit);
+		spin_unlock(&journal->j_state_lock);
+		wait_event(journal->j_wait_done_commit,
+				!tid_gt(tid, journal->j_commit_sequence));
+		spin_lock(&journal->j_state_lock);
+	}
+	spin_unlock(&journal->j_state_lock);
+
+	if (unlikely(is_journal_aborted(journal))) {
+		printk(KERN_EMERG "journal commit I/O error\n");
+		err = -EIO;
+	}
+	return err;
+}
+
+/*
+ * Log buffer allocation routines:
+ */
+
+int journal_next_log_block(journal_t *journal, unsigned long *retp)
+{
+	unsigned long blocknr;
+
+	spin_lock(&journal->j_state_lock);
+	J_ASSERT(journal->j_free > 1);
+
+	blocknr = journal->j_head;
+	journal->j_head++;
+	journal->j_free--;
+	if (journal->j_head == journal->j_last)
+		journal->j_head = journal->j_first;
+	spin_unlock(&journal->j_state_lock);
+	return journal_bmap(journal, blocknr, retp);
+}
+
+/*
+ * Conversion of logical to physical block numbers for the journal
+ *
+ * On external journals the journal blocks are identity-mapped, so
+ * this is a no-op.  If needed, we can use j_blk_offset - everything is
+ * ready.
+ */
+int journal_bmap(journal_t *journal, unsigned long blocknr,
+		 unsigned long *retp)
+{
+	int err = 0;
+	unsigned long ret;
+
+	if (journal->j_inode) {
+		ret = bmap(journal->j_inode, blocknr);
+		if (ret)
+			*retp = ret;
+		else {
+			char b[BDEVNAME_SIZE];
+
+			printk(KERN_ALERT "%s: journal block not found "
+					"at offset %lu on %s\n",
+				__FUNCTION__,
+				blocknr,
+				bdevname(journal->j_dev, b));
+			err = -EIO;
+			__journal_abort_soft(journal, err);
+		}
+	} else {
+		*retp = blocknr; /* +journal->j_blk_offset */
+	}
+	return err;
+}
+
+/*
+ * We play buffer_head aliasing tricks to write data/metadata blocks to
+ * the journal without copying their contents, but for journal
+ * descriptor blocks we do need to generate bona fide buffers.
+ *
+ * After the caller of journal_get_descriptor_buffer() has finished modifying
+ * the buffer's contents they really should run flush_dcache_page(bh->b_page).
+ * But we don't bother doing that, so there will be coherency problems with
+ * mmaps of blockdevs which hold live JBD-controlled filesystems.
+ */
+struct journal_head *journal_get_descriptor_buffer(journal_t *journal)
+{
+	struct buffer_head *bh;
+	unsigned long blocknr;
+	int err;
+
+	err = journal_next_log_block(journal, &blocknr);
+
+	if (err)
+		return NULL;
+
+	bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
+	lock_buffer(bh);
+	memset(bh->b_data, 0, journal->j_blocksize);
+	set_buffer_uptodate(bh);
+	unlock_buffer(bh);
+	BUFFER_TRACE(bh, "return this buffer");
+	return journal_add_journal_head(bh);
+}
+
+/*
+ * Management for journal control blocks: functions to create and
+ * destroy journal_t structures, and to initialise and read existing
+ * journal blocks from disk.  */
+
+/* First: create and setup a journal_t object in memory.  We initialise
+ * very few fields yet: that has to wait until we have created the
+ * journal structures from from scratch, or loaded them from disk. */
+
+static journal_t * journal_init_common (void)
+{
+	journal_t *journal;
+	int err;
+
+	journal = jbd_kmalloc(sizeof(*journal), GFP_KERNEL);
+	if (!journal)
+		goto fail;
+	memset(journal, 0, sizeof(*journal));
+
+	init_waitqueue_head(&journal->j_wait_transaction_locked);
+	init_waitqueue_head(&journal->j_wait_logspace);
+	init_waitqueue_head(&journal->j_wait_done_commit);
+	init_waitqueue_head(&journal->j_wait_checkpoint);
+	init_waitqueue_head(&journal->j_wait_commit);
+	init_waitqueue_head(&journal->j_wait_updates);
+	mutex_init(&journal->j_barrier);
+	mutex_init(&journal->j_checkpoint_mutex);
+	spin_lock_init(&journal->j_revoke_lock);
+	spin_lock_init(&journal->j_list_lock);
+	spin_lock_init(&journal->j_state_lock);
+
+	journal->j_commit_interval = (HZ * JBD_DEFAULT_MAX_COMMIT_AGE);
+
+	/* The journal is marked for error until we succeed with recovery! */
+	journal->j_flags = JFS_ABORT;
+
+	/* Set up a default-sized revoke table for the new mount. */
+	err = journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
+	if (err) {
+		kfree(journal);
+		goto fail;
+	}
+	return journal;
+fail:
+	return NULL;
+}
+
+/* journal_init_dev and journal_init_inode:
+ *
+ * Create a journal structure assigned some fixed set of disk blocks to
+ * the journal.  We don't actually touch those disk blocks yet, but we
+ * need to set up all of the mapping information to tell the journaling
+ * system where the journal blocks are.
+ *
+ */
+
+/**
+ *  journal_t * journal_init_dev() - creates an initialises a journal structure
+ *  @bdev: Block device on which to create the journal
+ *  @fs_dev: Device which hold journalled filesystem for this journal.
+ *  @start: Block nr Start of journal.
+ *  @len:  Length of the journal in blocks.
+ *  @blocksize: blocksize of journalling device
+ *  @returns: a newly created journal_t *
+ *
+ *  journal_init_dev creates a journal which maps a fixed contiguous
+ *  range of blocks on an arbitrary block device.
+ *
+ */
+journal_t * journal_init_dev(struct block_device *bdev,
+			struct block_device *fs_dev,
+			int start, int len, int blocksize)
+{
+	journal_t *journal = journal_init_common();
+	struct buffer_head *bh;
+	int n;
+
+	if (!journal)
+		return NULL;
+
+	/* journal descriptor can store up to n blocks -bzzz */
+	journal->j_blocksize = blocksize;
+	n = journal->j_blocksize / sizeof(journal_block_tag_t);
+	journal->j_wbufsize = n;
+	journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
+	if (!journal->j_wbuf) {
+		printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
+			__FUNCTION__);
+		kfree(journal);
+		journal = NULL;
+	}
+	journal->j_dev = bdev;
+	journal->j_fs_dev = fs_dev;
+	journal->j_blk_offset = start;
+	journal->j_maxlen = len;
+
+	bh = __getblk(journal->j_dev, start, journal->j_blocksize);
+	J_ASSERT(bh != NULL);
+	journal->j_sb_buffer = bh;
+	journal->j_superblock = (journal_superblock_t *)bh->b_data;
+
+	return journal;
+}
+
+/**
+ *  journal_t * journal_init_inode () - creates a journal which maps to a inode.
+ *  @inode: An inode to create the journal in
+ *
+ * journal_init_inode creates a journal which maps an on-disk inode as
+ * the journal.  The inode must exist already, must support bmap() and
+ * must have all data blocks preallocated.
+ */
+journal_t * journal_init_inode (struct inode *inode)
+{
+	struct buffer_head *bh;
+	journal_t *journal = journal_init_common();
+	int err;
+	int n;
+	unsigned long blocknr;
+
+	if (!journal)
+		return NULL;
+
+	journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev;
+	journal->j_inode = inode;
+	jbd_debug(1,
+		  "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n",
+		  journal, inode->i_sb->s_id, inode->i_ino,
+		  (long long) inode->i_size,
+		  inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
+
+	journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
+	journal->j_blocksize = inode->i_sb->s_blocksize;
+
+	/* journal descriptor can store up to n blocks -bzzz */
+	n = journal->j_blocksize / sizeof(journal_block_tag_t);
+	journal->j_wbufsize = n;
+	journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
+	if (!journal->j_wbuf) {
+		printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
+			__FUNCTION__);
+		kfree(journal);
+		return NULL;
+	}
+
+	err = journal_bmap(journal, 0, &blocknr);
+	/* If that failed, give up */
+	if (err) {
+		printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
+		       __FUNCTION__);
+		kfree(journal);
+		return NULL;
+	}
+
+	bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
+	J_ASSERT(bh != NULL);
+	journal->j_sb_buffer = bh;
+	journal->j_superblock = (journal_superblock_t *)bh->b_data;
+
+	return journal;
+}
+
+/*
+ * If the journal init or create aborts, we need to mark the journal
+ * superblock as being NULL to prevent the journal destroy from writing
+ * back a bogus superblock.
+ */
+static void journal_fail_superblock (journal_t *journal)
+{
+	struct buffer_head *bh = journal->j_sb_buffer;
+	brelse(bh);
+	journal->j_sb_buffer = NULL;
+}
+
+/*
+ * Given a journal_t structure, initialise the various fields for
+ * startup of a new journaling session.  We use this both when creating
+ * a journal, and after recovering an old journal to reset it for
+ * subsequent use.
+ */
+
+static int journal_reset(journal_t *journal)
+{
+	journal_superblock_t *sb = journal->j_superblock;
+	unsigned long first, last;
+
+	first = be32_to_cpu(sb->s_first);
+	last = be32_to_cpu(sb->s_maxlen);
+
+	journal->j_first = first;
+	journal->j_last = last;
+
+	journal->j_head = first;
+	journal->j_tail = first;
+	journal->j_free = last - first;
+
+	journal->j_tail_sequence = journal->j_transaction_sequence;
+	journal->j_commit_sequence = journal->j_transaction_sequence - 1;
+	journal->j_commit_request = journal->j_commit_sequence;
+
+	journal->j_max_transaction_buffers = journal->j_maxlen / 4;
+
+	/* Add the dynamic fields and write it to disk. */
+	journal_update_superblock(journal, 1);
+	journal_start_thread(journal);
+	return 0;
+}
+
+/**
+ * int journal_create() - Initialise the new journal file
+ * @journal: Journal to create. This structure must have been initialised
+ *
+ * Given a journal_t structure which tells us which disk blocks we can
+ * use, create a new journal superblock and initialise all of the
+ * journal fields from scratch.
+ **/
+int journal_create(journal_t *journal)
+{
+	unsigned long blocknr;
+	struct buffer_head *bh;
+	journal_superblock_t *sb;
+	int i, err;
+
+	if (journal->j_maxlen < JFS_MIN_JOURNAL_BLOCKS) {
+		printk (KERN_ERR "Journal length (%d blocks) too short.\n",
+			journal->j_maxlen);
+		journal_fail_superblock(journal);
+		return -EINVAL;
+	}
+
+	if (journal->j_inode == NULL) {
+		/*
+		 * We don't know what block to start at!
+		 */
+		printk(KERN_EMERG
+		       "%s: creation of journal on external device!\n",
+		       __FUNCTION__);
+		BUG();
+	}
+
+	/* Zero out the entire journal on disk.  We cannot afford to
+	   have any blocks on disk beginning with JFS_MAGIC_NUMBER. */
+	jbd_debug(1, "JBD: Zeroing out journal blocks...\n");
+	for (i = 0; i < journal->j_maxlen; i++) {
+		err = journal_bmap(journal, i, &blocknr);
+		if (err)
+			return err;
+		bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
+		lock_buffer(bh);
+		memset (bh->b_data, 0, journal->j_blocksize);
+		BUFFER_TRACE(bh, "marking dirty");
+		mark_buffer_dirty(bh);
+		BUFFER_TRACE(bh, "marking uptodate");
+		set_buffer_uptodate(bh);
+		unlock_buffer(bh);
+		__brelse(bh);
+	}
+
+	sync_blockdev(journal->j_dev);
+	jbd_debug(1, "JBD: journal cleared.\n");
+
+	/* OK, fill in the initial static fields in the new superblock */
+	sb = journal->j_superblock;
+
+	sb->s_header.h_magic	 = cpu_to_be32(JFS_MAGIC_NUMBER);
+	sb->s_header.h_blocktype = cpu_to_be32(JFS_SUPERBLOCK_V2);
+
+	sb->s_blocksize	= cpu_to_be32(journal->j_blocksize);
+	sb->s_maxlen	= cpu_to_be32(journal->j_maxlen);
+	sb->s_first	= cpu_to_be32(1);
+
+	journal->j_transaction_sequence = 1;
+
+	journal->j_flags &= ~JFS_ABORT;
+	journal->j_format_version = 2;
+
+	return journal_reset(journal);
+}
+
+/**
+ * void journal_update_superblock() - Update journal sb on disk.
+ * @journal: The journal to update.
+ * @wait: Set to '0' if you don't want to wait for IO completion.
+ *
+ * Update a journal's dynamic superblock fields and write it to disk,
+ * optionally waiting for the IO to complete.
+ */
+void journal_update_superblock(journal_t *journal, int wait)
+{
+	journal_superblock_t *sb = journal->j_superblock;
+	struct buffer_head *bh = journal->j_sb_buffer;
+
+	/*
+	 * As a special case, if the on-disk copy is already marked as needing
+	 * no recovery (s_start == 0) and there are no outstanding transactions
+	 * in the filesystem, then we can safely defer the superblock update
+	 * until the next commit by setting JFS_FLUSHED.  This avoids
+	 * attempting a write to a potential-readonly device.
+	 */
+	if (sb->s_start == 0 && journal->j_tail_sequence ==
+				journal->j_transaction_sequence) {
+		jbd_debug(1,"JBD: Skipping superblock update on recovered sb "
+			"(start %ld, seq %d, errno %d)\n",
+			journal->j_tail, journal->j_tail_sequence,
+			journal->j_errno);
+		goto out;
+	}
+
+	spin_lock(&journal->j_state_lock);
+	jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
+		  journal->j_tail, journal->j_tail_sequence, journal->j_errno);
+
+	sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
+	sb->s_start    = cpu_to_be32(journal->j_tail);
+	sb->s_errno    = cpu_to_be32(journal->j_errno);
+	spin_unlock(&journal->j_state_lock);
+
+	BUFFER_TRACE(bh, "marking dirty");
+	mark_buffer_dirty(bh);
+	if (wait)
+		sync_dirty_buffer(bh);
+	else
+		ll_rw_block(SWRITE, 1, &bh);
+
+out:
+	/* If we have just flushed the log (by marking s_start==0), then
+	 * any future commit will have to be careful to update the
+	 * superblock again to re-record the true start of the log. */
+
+	spin_lock(&journal->j_state_lock);
+	if (sb->s_start)
+		journal->j_flags &= ~JFS_FLUSHED;
+	else
+		journal->j_flags |= JFS_FLUSHED;
+	spin_unlock(&journal->j_state_lock);
+}
+
+/*
+ * Read the superblock for a given journal, performing initial
+ * validation of the format.
+ */
+
+static int journal_get_superblock(journal_t *journal)
+{
+	struct buffer_head *bh;
+	journal_superblock_t *sb;
+	int err = -EIO;
+
+	bh = journal->j_sb_buffer;
+
+	J_ASSERT(bh != NULL);
+	if (!buffer_uptodate(bh)) {
+		ll_rw_block(READ, 1, &bh);
+		wait_on_buffer(bh);
+		if (!buffer_uptodate(bh)) {
+			printk (KERN_ERR
+				"JBD: IO error reading journal superblock\n");
+			goto out;
+		}
+	}
+
+	sb = journal->j_superblock;
+
+	err = -EINVAL;
+
+	if (sb->s_header.h_magic != cpu_to_be32(JFS_MAGIC_NUMBER) ||
+	    sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
+		printk(KERN_WARNING "JBD: no valid journal superblock found\n");
+		goto out;
+	}
+
+	switch(be32_to_cpu(sb->s_header.h_blocktype)) {
+	case JFS_SUPERBLOCK_V1:
+		journal->j_format_version = 1;
+		break;
+	case JFS_SUPERBLOCK_V2:
+		journal->j_format_version = 2;
+		break;
+	default:
+		printk(KERN_WARNING "JBD: unrecognised superblock format ID\n");
+		goto out;
+	}
+
+	if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen)
+		journal->j_maxlen = be32_to_cpu(sb->s_maxlen);
+	else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) {
+		printk (KERN_WARNING "JBD: journal file too short\n");
+		goto out;
+	}
+
+	return 0;
+
+out:
+	journal_fail_superblock(journal);
+	return err;
+}
+
+/*
+ * Load the on-disk journal superblock and read the key fields into the
+ * journal_t.
+ */
+
+static int load_superblock(journal_t *journal)
+{
+	int err;
+	journal_superblock_t *sb;
+
+	err = journal_get_superblock(journal);
+	if (err)
+		return err;
+
+	sb = journal->j_superblock;
+
+	journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
+	journal->j_tail = be32_to_cpu(sb->s_start);
+	journal->j_first = be32_to_cpu(sb->s_first);
+	journal->j_last = be32_to_cpu(sb->s_maxlen);
+	journal->j_errno = be32_to_cpu(sb->s_errno);
+
+	return 0;
+}
+
+
+/**
+ * int journal_load() - Read journal from disk.
+ * @journal: Journal to act on.
+ *
+ * Given a journal_t structure which tells us which disk blocks contain
+ * a journal, read the journal from disk to initialise the in-memory
+ * structures.
+ */
+int journal_load(journal_t *journal)
+{
+	int err;
+	journal_superblock_t *sb;
+
+	err = load_superblock(journal);
+	if (err)
+		return err;
+
+	sb = journal->j_superblock;
+	/* If this is a V2 superblock, then we have to check the
+	 * features flags on it. */
+
+	if (journal->j_format_version >= 2) {
+		if ((sb->s_feature_ro_compat &
+		     ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) ||
+		    (sb->s_feature_incompat &
+		     ~cpu_to_be32(JFS_KNOWN_INCOMPAT_FEATURES))) {
+			printk (KERN_WARNING
+				"JBD: Unrecognised features on journal\n");
+			return -EINVAL;
+		}
+	}
+
+	/*
+	 * Create a slab for this blocksize
+	 */
+	err = journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize));
+	if (err)
+		return err;
+
+	/* Let the recovery code check whether it needs to recover any
+	 * data from the journal. */
+	if (journal_recover(journal))
+		goto recovery_error;
+
+	/* OK, we've finished with the dynamic journal bits:
+	 * reinitialise the dynamic contents of the superblock in memory
+	 * and reset them on disk. */
+	if (journal_reset(journal))
+		goto recovery_error;
+
+	journal->j_flags &= ~JFS_ABORT;
+	journal->j_flags |= JFS_LOADED;
+	return 0;
+
+recovery_error:
+	printk (KERN_WARNING "JBD: recovery failed\n");
+	return -EIO;
+}
+
+/**
+ * void journal_destroy() - Release a journal_t structure.
+ * @journal: Journal to act on.
+ *
+ * Release a journal_t structure once it is no longer in use by the
+ * journaled object.
+ */
+void journal_destroy(journal_t *journal)
+{
+	/* Wait for the commit thread to wake up and die. */
+	journal_kill_thread(journal);
+
+	/* Force a final log commit */
+	if (journal->j_running_transaction)
+		journal_commit_transaction(journal);
+
+	/* Force any old transactions to disk */
+
+	/* Totally anal locking here... */
+	spin_lock(&journal->j_list_lock);
+	while (journal->j_checkpoint_transactions != NULL) {
+		spin_unlock(&journal->j_list_lock);
+		log_do_checkpoint(journal);
+		spin_lock(&journal->j_list_lock);
+	}
+
+	J_ASSERT(journal->j_running_transaction == NULL);
+	J_ASSERT(journal->j_committing_transaction == NULL);
+	J_ASSERT(journal->j_checkpoint_transactions == NULL);
+	spin_unlock(&journal->j_list_lock);
+
+	/* We can now mark the journal as empty. */
+	journal->j_tail = 0;
+	journal->j_tail_sequence = ++journal->j_transaction_sequence;
+	if (journal->j_sb_buffer) {
+		journal_update_superblock(journal, 1);
+		brelse(journal->j_sb_buffer);
+	}
+
+	if (journal->j_inode)
+		iput(journal->j_inode);
+	if (journal->j_revoke)
+		journal_destroy_revoke(journal);
+	kfree(journal->j_wbuf);
+	kfree(journal);
+}
+
+
+/**
+ *int journal_check_used_features () - Check if features specified are used.
+ * @journal: Journal to check.
+ * @compat: bitmask of compatible features
+ * @ro: bitmask of features that force read-only mount
+ * @incompat: bitmask of incompatible features
+ *
+ * Check whether the journal uses all of a given set of
+ * features.  Return true (non-zero) if it does.
+ **/
+
+int journal_check_used_features (journal_t *journal, unsigned long compat,
+				 unsigned long ro, unsigned long incompat)
+{
+	journal_superblock_t *sb;
+
+	if (!compat && !ro && !incompat)
+		return 1;
+	if (journal->j_format_version == 1)
+		return 0;
+
+	sb = journal->j_superblock;
+
+	if (((be32_to_cpu(sb->s_feature_compat) & compat) == compat) &&
+	    ((be32_to_cpu(sb->s_feature_ro_compat) & ro) == ro) &&
+	    ((be32_to_cpu(sb->s_feature_incompat) & incompat) == incompat))
+		return 1;
+
+	return 0;
+}
+
+/**
+ * int journal_check_available_features() - Check feature set in journalling layer
+ * @journal: Journal to check.
+ * @compat: bitmask of compatible features
+ * @ro: bitmask of features that force read-only mount
+ * @incompat: bitmask of incompatible features
+ *
+ * Check whether the journaling code supports the use of
+ * all of a given set of features on this journal.  Return true
+ * (non-zero) if it can. */
+
+int journal_check_available_features (journal_t *journal, unsigned long compat,
+				      unsigned long ro, unsigned long incompat)
+{
+	journal_superblock_t *sb;
+
+	if (!compat && !ro && !incompat)
+		return 1;
+
+	sb = journal->j_superblock;
+
+	/* We can support any known requested features iff the
+	 * superblock is in version 2.  Otherwise we fail to support any
+	 * extended sb features. */
+
+	if (journal->j_format_version != 2)
+		return 0;
+
+	if ((compat   & JFS_KNOWN_COMPAT_FEATURES) == compat &&
+	    (ro       & JFS_KNOWN_ROCOMPAT_FEATURES) == ro &&
+	    (incompat & JFS_KNOWN_INCOMPAT_FEATURES) == incompat)
+		return 1;
+
+	return 0;
+}
+
+/**
+ * int journal_set_features () - Mark a given journal feature in the superblock
+ * @journal: Journal to act on.
+ * @compat: bitmask of compatible features
+ * @ro: bitmask of features that force read-only mount
+ * @incompat: bitmask of incompatible features
+ *
+ * Mark a given journal feature as present on the
+ * superblock.  Returns true if the requested features could be set.
+ *
+ */
+
+int journal_set_features (journal_t *journal, unsigned long compat,
+			  unsigned long ro, unsigned long incompat)
+{
+	journal_superblock_t *sb;
+
+	if (journal_check_used_features(journal, compat, ro, incompat))
+		return 1;
+
+	if (!journal_check_available_features(journal, compat, ro, incompat))
+		return 0;
+
+	jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n",
+		  compat, ro, incompat);
+
+	sb = journal->j_superblock;
+
+	sb->s_feature_compat    |= cpu_to_be32(compat);
+	sb->s_feature_ro_compat |= cpu_to_be32(ro);
+	sb->s_feature_incompat  |= cpu_to_be32(incompat);
+
+	return 1;
+}
+
+
+/**
+ * int journal_update_format () - Update on-disk journal structure.
+ * @journal: Journal to act on.
+ *
+ * Given an initialised but unloaded journal struct, poke about in the
+ * on-disk structure to update it to the most recent supported version.
+ */
+int journal_update_format (journal_t *journal)
+{
+	journal_superblock_t *sb;
+	int err;
+
+	err = journal_get_superblock(journal);
+	if (err)
+		return err;
+
+	sb = journal->j_superblock;
+
+	switch (be32_to_cpu(sb->s_header.h_blocktype)) {
+	case JFS_SUPERBLOCK_V2:
+		return 0;
+	case JFS_SUPERBLOCK_V1:
+		return journal_convert_superblock_v1(journal, sb);
+	default:
+		break;
+	}
+	return -EINVAL;
+}
+
+static int journal_convert_superblock_v1(journal_t *journal,
+					 journal_superblock_t *sb)
+{
+	int offset, blocksize;
+	struct buffer_head *bh;
+
+	printk(KERN_WARNING
+		"JBD: Converting superblock from version 1 to 2.\n");
+
+	/* Pre-initialise new fields to zero */
+	offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb);
+	blocksize = be32_to_cpu(sb->s_blocksize);
+	memset(&sb->s_feature_compat, 0, blocksize-offset);
+
+	sb->s_nr_users = cpu_to_be32(1);
+	sb->s_header.h_blocktype = cpu_to_be32(JFS_SUPERBLOCK_V2);
+	journal->j_format_version = 2;
+
+	bh = journal->j_sb_buffer;
+	BUFFER_TRACE(bh, "marking dirty");
+	mark_buffer_dirty(bh);
+	sync_dirty_buffer(bh);
+	return 0;
+}
+
+
+/**
+ * int journal_flush () - Flush journal
+ * @journal: Journal to act on.
+ *
+ * Flush all data for a given journal to disk and empty the journal.
+ * Filesystems can use this when remounting readonly to ensure that
+ * recovery does not need to happen on remount.
+ */
+
+int journal_flush(journal_t *journal)
+{
+	int err = 0;
+	transaction_t *transaction = NULL;
+	unsigned long old_tail;
+
+	spin_lock(&journal->j_state_lock);
+
+	/* Force everything buffered to the log... */
+	if (journal->j_running_transaction) {
+		transaction = journal->j_running_transaction;
+		__log_start_commit(journal, transaction->t_tid);
+	} else if (journal->j_committing_transaction)
+		transaction = journal->j_committing_transaction;
+
+	/* Wait for the log commit to complete... */
+	if (transaction) {
+		tid_t tid = transaction->t_tid;
+
+		spin_unlock(&journal->j_state_lock);
+		log_wait_commit(journal, tid);
+	} else {
+		spin_unlock(&journal->j_state_lock);
+	}
+
+	/* ...and flush everything in the log out to disk. */
+	spin_lock(&journal->j_list_lock);
+	while (!err && journal->j_checkpoint_transactions != NULL) {
+		spin_unlock(&journal->j_list_lock);
+		err = log_do_checkpoint(journal);
+		spin_lock(&journal->j_list_lock);
+	}
+	spin_unlock(&journal->j_list_lock);
+	cleanup_journal_tail(journal);
+
+	/* Finally, mark the journal as really needing no recovery.
+	 * This sets s_start==0 in the underlying superblock, which is
+	 * the magic code for a fully-recovered superblock.  Any future
+	 * commits of data to the journal will restore the current
+	 * s_start value. */
+	spin_lock(&journal->j_state_lock);
+	old_tail = journal->j_tail;
+	journal->j_tail = 0;
+	spin_unlock(&journal->j_state_lock);
+	journal_update_superblock(journal, 1);
+	spin_lock(&journal->j_state_lock);
+	journal->j_tail = old_tail;
+
+	J_ASSERT(!journal->j_running_transaction);
+	J_ASSERT(!journal->j_committing_transaction);
+	J_ASSERT(!journal->j_checkpoint_transactions);
+	J_ASSERT(journal->j_head == journal->j_tail);
+	J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
+	spin_unlock(&journal->j_state_lock);
+	return err;
+}
+
+/**
+ * int journal_wipe() - Wipe journal contents
+ * @journal: Journal to act on.
+ * @write: flag (see below)
+ *
+ * Wipe out all of the contents of a journal, safely.  This will produce
+ * a warning if the journal contains any valid recovery information.
+ * Must be called between journal_init_*() and journal_load().
+ *
+ * If 'write' is non-zero, then we wipe out the journal on disk; otherwise
+ * we merely suppress recovery.
+ */
+
+int journal_wipe(journal_t *journal, int write)
+{
+	journal_superblock_t *sb;
+	int err = 0;
+
+	J_ASSERT (!(journal->j_flags & JFS_LOADED));
+
+	err = load_superblock(journal);
+	if (err)
+		return err;
+
+	sb = journal->j_superblock;
+
+	if (!journal->j_tail)
+		goto no_recovery;
+
+	printk (KERN_WARNING "JBD: %s recovery information on journal\n",
+		write ? "Clearing" : "Ignoring");
+
+	err = journal_skip_recovery(journal);
+	if (write)
+		journal_update_superblock(journal, 1);
+
+ no_recovery:
+	return err;
+}
+
+/*
+ * journal_dev_name: format a character string to describe on what
+ * device this journal is present.
+ */
+
+static const char *journal_dev_name(journal_t *journal, char *buffer)
+{
+	struct block_device *bdev;
+
+	if (journal->j_inode)
+		bdev = journal->j_inode->i_sb->s_bdev;
+	else
+		bdev = journal->j_dev;
+
+	return bdevname(bdev, buffer);
+}
+
+/*
+ * Journal abort has very specific semantics, which we describe
+ * for journal abort.
+ *
+ * Two internal function, which provide abort to te jbd layer
+ * itself are here.
+ */
+
+/*
+ * Quick version for internal journal use (doesn't lock the journal).
+ * Aborts hard --- we mark the abort as occurred, but do _nothing_ else,
+ * and don't attempt to make any other journal updates.
+ */
+void __journal_abort_hard(journal_t *journal)
+{
+	transaction_t *transaction;
+	char b[BDEVNAME_SIZE];
+
+	if (journal->j_flags & JFS_ABORT)
+		return;
+
+	printk(KERN_ERR "Aborting journal on device %s.\n",
+		journal_dev_name(journal, b));
+
+	spin_lock(&journal->j_state_lock);
+	journal->j_flags |= JFS_ABORT;
+	transaction = journal->j_running_transaction;
+	if (transaction)
+		__log_start_commit(journal, transaction->t_tid);
+	spin_unlock(&journal->j_state_lock);
+}
+
+/* Soft abort: record the abort error status in the journal superblock,
+ * but don't do any other IO. */
+static void __journal_abort_soft (journal_t *journal, int errno)
+{
+	if (journal->j_flags & JFS_ABORT)
+		return;
+
+	if (!journal->j_errno)
+		journal->j_errno = errno;
+
+	__journal_abort_hard(journal);
+
+	if (errno)
+		journal_update_superblock(journal, 1);
+}
+
+/**
+ * void journal_abort () - Shutdown the journal immediately.
+ * @journal: the journal to shutdown.
+ * @errno:   an error number to record in the journal indicating
+ *           the reason for the shutdown.
+ *
+ * Perform a complete, immediate shutdown of the ENTIRE
+ * journal (not of a single transaction).  This operation cannot be
+ * undone without closing and reopening the journal.
+ *
+ * The journal_abort function is intended to support higher level error
+ * recovery mechanisms such as the ext2/ext3 remount-readonly error
+ * mode.
+ *
+ * Journal abort has very specific semantics.  Any existing dirty,
+ * unjournaled buffers in the main filesystem will still be written to
+ * disk by bdflush, but the journaling mechanism will be suspended
+ * immediately and no further transaction commits will be honoured.
+ *
+ * Any dirty, journaled buffers will be written back to disk without
+ * hitting the journal.  Atomicity cannot be guaranteed on an aborted
+ * filesystem, but we _do_ attempt to leave as much data as possible
+ * behind for fsck to use for cleanup.
+ *
+ * Any attempt to get a new transaction handle on a journal which is in
+ * ABORT state will just result in an -EROFS error return.  A
+ * journal_stop on an existing handle will return -EIO if we have
+ * entered abort state during the update.
+ *
+ * Recursive transactions are not disturbed by journal abort until the
+ * final journal_stop, which will receive the -EIO error.
+ *
+ * Finally, the journal_abort call allows the caller to supply an errno
+ * which will be recorded (if possible) in the journal superblock.  This
+ * allows a client to record failure conditions in the middle of a
+ * transaction without having to complete the transaction to record the
+ * failure to disk.  ext3_error, for example, now uses this
+ * functionality.
+ *
+ * Errors which originate from within the journaling layer will NOT
+ * supply an errno; a null errno implies that absolutely no further
+ * writes are done to the journal (unless there are any already in
+ * progress).
+ *
+ */
+
+void journal_abort(journal_t *journal, int errno)
+{
+	__journal_abort_soft(journal, errno);
+}
+
+/**
+ * int journal_errno () - returns the journal's error state.
+ * @journal: journal to examine.
+ *
+ * This is the errno numbet set with journal_abort(), the last
+ * time the journal was mounted - if the journal was stopped
+ * without calling abort this will be 0.
+ *
+ * If the journal has been aborted on this mount time -EROFS will
+ * be returned.
+ */
+int journal_errno(journal_t *journal)
+{
+	int err;
+
+	spin_lock(&journal->j_state_lock);
+	if (journal->j_flags & JFS_ABORT)
+		err = -EROFS;
+	else
+		err = journal->j_errno;
+	spin_unlock(&journal->j_state_lock);
+	return err;
+}
+
+/**
+ * int journal_clear_err () - clears the journal's error state
+ * @journal: journal to act on.
+ *
+ * An error must be cleared or Acked to take a FS out of readonly
+ * mode.
+ */
+int journal_clear_err(journal_t *journal)
+{
+	int err = 0;
+
+	spin_lock(&journal->j_state_lock);
+	if (journal->j_flags & JFS_ABORT)
+		err = -EROFS;
+	else
+		journal->j_errno = 0;
+	spin_unlock(&journal->j_state_lock);
+	return err;
+}
+
+/**
+ * void journal_ack_err() - Ack journal err.
+ * @journal: journal to act on.
+ *
+ * An error must be cleared or Acked to take a FS out of readonly
+ * mode.
+ */
+void journal_ack_err(journal_t *journal)
+{
+	spin_lock(&journal->j_state_lock);
+	if (journal->j_errno)
+		journal->j_flags |= JFS_ACK_ERR;
+	spin_unlock(&journal->j_state_lock);
+}
+
+int journal_blocks_per_page(struct inode *inode)
+{
+	return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+}
+
+/*
+ * Simple support for retrying memory allocations.  Introduced to help to
+ * debug different VM deadlock avoidance strategies.
+ */
+void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
+{
+	return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0));
+}
+
+/*
+ * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed
+ * and allocate frozen and commit buffers from these slabs.
+ *
+ * Reason for doing this is to avoid, SLAB_DEBUG - since it could
+ * cause bh to cross page boundary.
+ */
+
+#define JBD_MAX_SLABS 5
+#define JBD_SLAB_INDEX(size)  (size >> 11)
+
+static kmem_cache_t *jbd_slab[JBD_MAX_SLABS];
+static const char *jbd_slab_names[JBD_MAX_SLABS] = {
+	"jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k"
+};
+
+static void journal_destroy_jbd_slabs(void)
+{
+	int i;
+
+	for (i = 0; i < JBD_MAX_SLABS; i++) {
+		if (jbd_slab[i])
+			kmem_cache_destroy(jbd_slab[i]);
+		jbd_slab[i] = NULL;
+	}
+}
+
+static int journal_create_jbd_slab(size_t slab_size)
+{
+	int i = JBD_SLAB_INDEX(slab_size);
+
+	BUG_ON(i >= JBD_MAX_SLABS);
+
+	/*
+	 * Check if we already have a slab created for this size
+	 */
+	if (jbd_slab[i])
+		return 0;
+
+	/*
+	 * Create a slab and force alignment to be same as slabsize -
+	 * this will make sure that allocations won't cross the page
+	 * boundary.
+	 */
+	jbd_slab[i] = kmem_cache_create(jbd_slab_names[i],
+				slab_size, slab_size, 0, NULL, NULL);
+	if (!jbd_slab[i]) {
+		printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void * jbd_slab_alloc(size_t size, gfp_t flags)
+{
+	int idx;
+
+	idx = JBD_SLAB_INDEX(size);
+	BUG_ON(jbd_slab[idx] == NULL);
+	return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
+}
+
+void jbd_slab_free(void *ptr,  size_t size)
+{
+	int idx;
+
+	idx = JBD_SLAB_INDEX(size);
+	BUG_ON(jbd_slab[idx] == NULL);
+	kmem_cache_free(jbd_slab[idx], ptr);
+}
+
+/*
+ * Journal_head storage management
+ */
+static kmem_cache_t *journal_head_cache;
+#ifdef CONFIG_JBD_DEBUG
+static atomic_t nr_journal_heads = ATOMIC_INIT(0);
+#endif
+
+static int journal_init_journal_head_cache(void)
+{
+	int retval;
+
+	J_ASSERT(journal_head_cache == 0);
+	journal_head_cache = kmem_cache_create("journal_head",
+				sizeof(struct journal_head),
+				0,		/* offset */
+				0,		/* flags */
+				NULL,		/* ctor */
+				NULL);		/* dtor */
+	retval = 0;
+	if (journal_head_cache == 0) {
+		retval = -ENOMEM;
+		printk(KERN_EMERG "JBD: no memory for journal_head cache\n");
+	}
+	return retval;
+}
+
+static void journal_destroy_journal_head_cache(void)
+{
+	J_ASSERT(journal_head_cache != NULL);
+	kmem_cache_destroy(journal_head_cache);
+	journal_head_cache = NULL;
+}
+
+/*
+ * journal_head splicing and dicing
+ */
+static struct journal_head *journal_alloc_journal_head(void)
+{
+	struct journal_head *ret;
+	static unsigned long last_warning;
+
+#ifdef CONFIG_JBD_DEBUG
+	atomic_inc(&nr_journal_heads);
+#endif
+	ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
+	if (ret == 0) {
+		jbd_debug(1, "out of memory for journal_head\n");
+		if (time_after(jiffies, last_warning + 5*HZ)) {
+			printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
+			       __FUNCTION__);
+			last_warning = jiffies;
+		}
+		while (ret == 0) {
+			yield();
+			ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
+		}
+	}
+	return ret;
+}
+
+static void journal_free_journal_head(struct journal_head *jh)
+{
+#ifdef CONFIG_JBD_DEBUG
+	atomic_dec(&nr_journal_heads);
+	memset(jh, JBD_POISON_FREE, sizeof(*jh));
+#endif
+	kmem_cache_free(journal_head_cache, jh);
+}
+
+/*
+ * A journal_head is attached to a buffer_head whenever JBD has an
+ * interest in the buffer.
+ *
+ * Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit
+ * is set.  This bit is tested in core kernel code where we need to take
+ * JBD-specific actions.  Testing the zeroness of ->b_private is not reliable
+ * there.
+ *
+ * When a buffer has its BH_JBD bit set, its ->b_count is elevated by one.
+ *
+ * When a buffer has its BH_JBD bit set it is immune from being released by
+ * core kernel code, mainly via ->b_count.
+ *
+ * A journal_head may be detached from its buffer_head when the journal_head's
+ * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL.
+ * Various places in JBD call journal_remove_journal_head() to indicate that the
+ * journal_head can be dropped if needed.
+ *
+ * Various places in the kernel want to attach a journal_head to a buffer_head
+ * _before_ attaching the journal_head to a transaction.  To protect the
+ * journal_head in this situation, journal_add_journal_head elevates the
+ * journal_head's b_jcount refcount by one.  The caller must call
+ * journal_put_journal_head() to undo this.
+ *
+ * So the typical usage would be:
+ *
+ *	(Attach a journal_head if needed.  Increments b_jcount)
+ *	struct journal_head *jh = journal_add_journal_head(bh);
+ *	...
+ *	jh->b_transaction = xxx;
+ *	journal_put_journal_head(jh);
+ *
+ * Now, the journal_head's b_jcount is zero, but it is safe from being released
+ * because it has a non-zero b_transaction.
+ */
+
+/*
+ * Give a buffer_head a journal_head.
+ *
+ * Doesn't need the journal lock.
+ * May sleep.
+ */
+struct journal_head *journal_add_journal_head(struct buffer_head *bh)
+{
+	struct journal_head *jh;
+	struct journal_head *new_jh = NULL;
+
+repeat:
+	if (!buffer_jbd(bh)) {
+		new_jh = journal_alloc_journal_head();
+		memset(new_jh, 0, sizeof(*new_jh));
+	}
+
+	jbd_lock_bh_journal_head(bh);
+	if (buffer_jbd(bh)) {
+		jh = bh2jh(bh);
+	} else {
+		J_ASSERT_BH(bh,
+			(atomic_read(&bh->b_count) > 0) ||
+			(bh->b_page && bh->b_page->mapping));
+
+		if (!new_jh) {
+			jbd_unlock_bh_journal_head(bh);
+			goto repeat;
+		}
+
+		jh = new_jh;
+		new_jh = NULL;		/* We consumed it */
+		set_buffer_jbd(bh);
+		bh->b_private = jh;
+		jh->b_bh = bh;
+		get_bh(bh);
+		BUFFER_TRACE(bh, "added journal_head");
+	}
+	jh->b_jcount++;
+	jbd_unlock_bh_journal_head(bh);
+	if (new_jh)
+		journal_free_journal_head(new_jh);
+	return bh->b_private;
+}
+
+/*
+ * Grab a ref against this buffer_head's journal_head.  If it ended up not
+ * having a journal_head, return NULL
+ */
+struct journal_head *journal_grab_journal_head(struct buffer_head *bh)
+{
+	struct journal_head *jh = NULL;
+
+	jbd_lock_bh_journal_head(bh);
+	if (buffer_jbd(bh)) {
+		jh = bh2jh(bh);
+		jh->b_jcount++;
+	}
+	jbd_unlock_bh_journal_head(bh);
+	return jh;
+}
+
+static void __journal_remove_journal_head(struct buffer_head *bh)
+{
+	struct journal_head *jh = bh2jh(bh);
+
+	J_ASSERT_JH(jh, jh->b_jcount >= 0);
+
+	get_bh(bh);
+	if (jh->b_jcount == 0) {
+		if (jh->b_transaction == NULL &&
+				jh->b_next_transaction == NULL &&
+				jh->b_cp_transaction == NULL) {
+			J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
+			J_ASSERT_BH(bh, buffer_jbd(bh));
+			J_ASSERT_BH(bh, jh2bh(jh) == bh);
+			BUFFER_TRACE(bh, "remove journal_head");
+			if (jh->b_frozen_data) {
+				printk(KERN_WARNING "%s: freeing "
+						"b_frozen_data\n",
+						__FUNCTION__);
+				jbd_slab_free(jh->b_frozen_data, bh->b_size);
+			}
+			if (jh->b_committed_data) {
+				printk(KERN_WARNING "%s: freeing "
+						"b_committed_data\n",
+						__FUNCTION__);
+				jbd_slab_free(jh->b_committed_data, bh->b_size);
+			}
+			bh->b_private = NULL;
+			jh->b_bh = NULL;	/* debug, really */
+			clear_buffer_jbd(bh);
+			__brelse(bh);
+			journal_free_journal_head(jh);
+		} else {
+			BUFFER_TRACE(bh, "journal_head was locked");
+		}
+	}
+}
+
+/*
+ * journal_remove_journal_head(): if the buffer isn't attached to a transaction
+ * and has a zero b_jcount then remove and release its journal_head.   If we did
+ * see that the buffer is not used by any transaction we also "logically"
+ * decrement ->b_count.
+ *
+ * We in fact take an additional increment on ->b_count as a convenience,
+ * because the caller usually wants to do additional things with the bh
+ * after calling here.
+ * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
+ * time.  Once the caller has run __brelse(), the buffer is eligible for
+ * reaping by try_to_free_buffers().
+ */
+void journal_remove_journal_head(struct buffer_head *bh)
+{
+	jbd_lock_bh_journal_head(bh);
+	__journal_remove_journal_head(bh);
+	jbd_unlock_bh_journal_head(bh);
+}
+
+/*
+ * Drop a reference on the passed journal_head.  If it fell to zero then try to
+ * release the journal_head from the buffer_head.
+ */
+void journal_put_journal_head(struct journal_head *jh)
+{
+	struct buffer_head *bh = jh2bh(jh);
+
+	jbd_lock_bh_journal_head(bh);
+	J_ASSERT_JH(jh, jh->b_jcount > 0);
+	--jh->b_jcount;
+	if (!jh->b_jcount && !jh->b_transaction) {
+		__journal_remove_journal_head(bh);
+		__brelse(bh);
+	}
+	jbd_unlock_bh_journal_head(bh);
+}
+
+/*
+ * /proc tunables
+ */
+#if defined(CONFIG_JBD_DEBUG)
+int journal_enable_debug;
+EXPORT_SYMBOL(journal_enable_debug);
+#endif
+
+#if defined(CONFIG_JBD_DEBUG) && defined(CONFIG_PROC_FS)
+
+static struct proc_dir_entry *proc_jbd_debug;
+
+static int read_jbd_debug(char *page, char **start, off_t off,
+			  int count, int *eof, void *data)
+{
+	int ret;
+
+	ret = sprintf(page + off, "%d\n", journal_enable_debug);
+	*eof = 1;
+	return ret;
+}
+
+static int write_jbd_debug(struct file *file, const char __user *buffer,
+			   unsigned long count, void *data)
+{
+	char buf[32];
+
+	if (count > ARRAY_SIZE(buf) - 1)
+		count = ARRAY_SIZE(buf) - 1;
+	if (copy_from_user(buf, buffer, count))
+		return -EFAULT;
+	buf[ARRAY_SIZE(buf) - 1] = '\0';
+	journal_enable_debug = simple_strtoul(buf, NULL, 10);
+	return count;
+}
+
+#define JBD_PROC_NAME "sys/fs/jbd-debug"
+
+static void __init create_jbd_proc_entry(void)
+{
+	proc_jbd_debug = create_proc_entry(JBD_PROC_NAME, 0644, NULL);
+	if (proc_jbd_debug) {
+		/* Why is this so hard? */
+		proc_jbd_debug->read_proc = read_jbd_debug;
+		proc_jbd_debug->write_proc = write_jbd_debug;
+	}
+}
+
+static void __exit remove_jbd_proc_entry(void)
+{
+	if (proc_jbd_debug)
+		remove_proc_entry(JBD_PROC_NAME, NULL);
+}
+
+#else
+
+#define create_jbd_proc_entry() do {} while (0)
+#define remove_jbd_proc_entry() do {} while (0)
+
+#endif
+
+kmem_cache_t *jbd_handle_cache;
+
+static int __init journal_init_handle_cache(void)
+{
+	jbd_handle_cache = kmem_cache_create("journal_handle",
+				sizeof(handle_t),
+				0,		/* offset */
+				0,		/* flags */
+				NULL,		/* ctor */
+				NULL);		/* dtor */
+	if (jbd_handle_cache == NULL) {
+		printk(KERN_EMERG "JBD: failed to create handle cache\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static void journal_destroy_handle_cache(void)
+{
+	if (jbd_handle_cache)
+		kmem_cache_destroy(jbd_handle_cache);
+}
+
+/*
+ * Module startup and shutdown
+ */
+
+static int __init journal_init_caches(void)
+{
+	int ret;
+
+	ret = journal_init_revoke_caches();
+	if (ret == 0)
+		ret = journal_init_journal_head_cache();
+	if (ret == 0)
+		ret = journal_init_handle_cache();
+	return ret;
+}
+
+static void journal_destroy_caches(void)
+{
+	journal_destroy_revoke_caches();
+	journal_destroy_journal_head_cache();
+	journal_destroy_handle_cache();
+	journal_destroy_jbd_slabs();
+}
+
+static int __init journal_init(void)
+{
+	int ret;
+
+	BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024);
+
+	ret = journal_init_caches();
+	if (ret != 0)
+		journal_destroy_caches();
+	create_jbd_proc_entry();
+	return ret;
+}
+
+static void __exit journal_exit(void)
+{
+#ifdef CONFIG_JBD_DEBUG
+	int n = atomic_read(&nr_journal_heads);
+	if (n)
+		printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
+#endif
+	remove_jbd_proc_entry();
+	journal_destroy_caches();
+}
+
+MODULE_LICENSE("GPL");
+module_init(journal_init);
+module_exit(journal_exit);
+
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
new file mode 100644
index 000000000000..11563fe2a52b
--- /dev/null
+++ b/fs/jbd2/recovery.c
@@ -0,0 +1,592 @@
+/*
+ * linux/fs/recovery.c
+ *
+ * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
+ *
+ * Copyright 1999-2000 Red Hat Software --- All Rights Reserved
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ *
+ * Journal recovery routines for the generic filesystem journaling code;
+ * part of the ext2fs journaling system.
+ */
+
+#ifndef __KERNEL__
+#include "jfs_user.h"
+#else
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#endif
+
+/*
+ * Maintain information about the progress of the recovery job, so that
+ * the different passes can carry information between them.
+ */
+struct recovery_info
+{
+	tid_t		start_transaction;
+	tid_t		end_transaction;
+
+	int		nr_replays;
+	int		nr_revokes;
+	int		nr_revoke_hits;
+};
+
+enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
+static int do_one_pass(journal_t *journal,
+				struct recovery_info *info, enum passtype pass);
+static int scan_revoke_records(journal_t *, struct buffer_head *,
+				tid_t, struct recovery_info *);
+
+#ifdef __KERNEL__
+
+/* Release readahead buffers after use */
+static void journal_brelse_array(struct buffer_head *b[], int n)
+{
+	while (--n >= 0)
+		brelse (b[n]);
+}
+
+
+/*
+ * When reading from the journal, we are going through the block device
+ * layer directly and so there is no readahead being done for us.  We
+ * need to implement any readahead ourselves if we want it to happen at
+ * all.  Recovery is basically one long sequential read, so make sure we
+ * do the IO in reasonably large chunks.
+ *
+ * This is not so critical that we need to be enormously clever about
+ * the readahead size, though.  128K is a purely arbitrary, good-enough
+ * fixed value.
+ */
+
+#define MAXBUF 8
+static int do_readahead(journal_t *journal, unsigned int start)
+{
+	int err;
+	unsigned int max, nbufs, next;
+	unsigned long blocknr;
+	struct buffer_head *bh;
+
+	struct buffer_head * bufs[MAXBUF];
+
+	/* Do up to 128K of readahead */
+	max = start + (128 * 1024 / journal->j_blocksize);
+	if (max > journal->j_maxlen)
+		max = journal->j_maxlen;
+
+	/* Do the readahead itself.  We'll submit MAXBUF buffer_heads at
+	 * a time to the block device IO layer. */
+
+	nbufs = 0;
+
+	for (next = start; next < max; next++) {
+		err = journal_bmap(journal, next, &blocknr);
+
+		if (err) {
+			printk (KERN_ERR "JBD: bad block at offset %u\n",
+				next);
+			goto failed;
+		}
+
+		bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
+		if (!bh) {
+			err = -ENOMEM;
+			goto failed;
+		}
+
+		if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
+			bufs[nbufs++] = bh;
+			if (nbufs == MAXBUF) {
+				ll_rw_block(READ, nbufs, bufs);
+				journal_brelse_array(bufs, nbufs);
+				nbufs = 0;
+			}
+		} else
+			brelse(bh);
+	}
+
+	if (nbufs)
+		ll_rw_block(READ, nbufs, bufs);
+	err = 0;
+
+failed:
+	if (nbufs)
+		journal_brelse_array(bufs, nbufs);
+	return err;
+}
+
+#endif /* __KERNEL__ */
+
+
+/*
+ * Read a block from the journal
+ */
+
+static int jread(struct buffer_head **bhp, journal_t *journal,
+		 unsigned int offset)
+{
+	int err;
+	unsigned long blocknr;
+	struct buffer_head *bh;
+
+	*bhp = NULL;
+
+	if (offset >= journal->j_maxlen) {
+		printk(KERN_ERR "JBD: corrupted journal superblock\n");
+		return -EIO;
+	}
+
+	err = journal_bmap(journal, offset, &blocknr);
+
+	if (err) {
+		printk (KERN_ERR "JBD: bad block at offset %u\n",
+			offset);
+		return err;
+	}
+
+	bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
+	if (!bh)
+		return -ENOMEM;
+
+	if (!buffer_uptodate(bh)) {
+		/* If this is a brand new buffer, start readahead.
+                   Otherwise, we assume we are already reading it.  */
+		if (!buffer_req(bh))
+			do_readahead(journal, offset);
+		wait_on_buffer(bh);
+	}
+
+	if (!buffer_uptodate(bh)) {
+		printk (KERN_ERR "JBD: Failed to read block at offset %u\n",
+			offset);
+		brelse(bh);
+		return -EIO;
+	}
+
+	*bhp = bh;
+	return 0;
+}
+
+
+/*
+ * Count the number of in-use tags in a journal descriptor block.
+ */
+
+static int count_tags(struct buffer_head *bh, int size)
+{
+	char *			tagp;
+	journal_block_tag_t *	tag;
+	int			nr = 0;
+
+	tagp = &bh->b_data[sizeof(journal_header_t)];
+
+	while ((tagp - bh->b_data + sizeof(journal_block_tag_t)) <= size) {
+		tag = (journal_block_tag_t *) tagp;
+
+		nr++;
+		tagp += sizeof(journal_block_tag_t);
+		if (!(tag->t_flags & cpu_to_be32(JFS_FLAG_SAME_UUID)))
+			tagp += 16;
+
+		if (tag->t_flags & cpu_to_be32(JFS_FLAG_LAST_TAG))
+			break;
+	}
+
+	return nr;
+}
+
+
+/* Make sure we wrap around the log correctly! */
+#define wrap(journal, var)						\
+do {									\
+	if (var >= (journal)->j_last)					\
+		var -= ((journal)->j_last - (journal)->j_first);	\
+} while (0)
+
+/**
+ * journal_recover - recovers a on-disk journal
+ * @journal: the journal to recover
+ *
+ * The primary function for recovering the log contents when mounting a
+ * journaled device.
+ *
+ * Recovery is done in three passes.  In the first pass, we look for the
+ * end of the log.  In the second, we assemble the list of revoke
+ * blocks.  In the third and final pass, we replay any un-revoked blocks
+ * in the log.
+ */
+int journal_recover(journal_t *journal)
+{
+	int			err;
+	journal_superblock_t *	sb;
+
+	struct recovery_info	info;
+
+	memset(&info, 0, sizeof(info));
+	sb = journal->j_superblock;
+
+	/*
+	 * The journal superblock's s_start field (the current log head)
+	 * is always zero if, and only if, the journal was cleanly
+	 * unmounted.
+	 */
+
+	if (!sb->s_start) {
+		jbd_debug(1, "No recovery required, last transaction %d\n",
+			  be32_to_cpu(sb->s_sequence));
+		journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
+		return 0;
+	}
+
+	err = do_one_pass(journal, &info, PASS_SCAN);
+	if (!err)
+		err = do_one_pass(journal, &info, PASS_REVOKE);
+	if (!err)
+		err = do_one_pass(journal, &info, PASS_REPLAY);
+
+	jbd_debug(0, "JBD: recovery, exit status %d, "
+		  "recovered transactions %u to %u\n",
+		  err, info.start_transaction, info.end_transaction);
+	jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n",
+		  info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
+
+	/* Restart the log at the next transaction ID, thus invalidating
+	 * any existing commit records in the log. */
+	journal->j_transaction_sequence = ++info.end_transaction;
+
+	journal_clear_revoke(journal);
+	sync_blockdev(journal->j_fs_dev);
+	return err;
+}
+
+/**
+ * journal_skip_recovery - Start journal and wipe exiting records
+ * @journal: journal to startup
+ *
+ * Locate any valid recovery information from the journal and set up the
+ * journal structures in memory to ignore it (presumably because the
+ * caller has evidence that it is out of date).
+ * This function does'nt appear to be exorted..
+ *
+ * We perform one pass over the journal to allow us to tell the user how
+ * much recovery information is being erased, and to let us initialise
+ * the journal transaction sequence numbers to the next unused ID.
+ */
+int journal_skip_recovery(journal_t *journal)
+{
+	int			err;
+	journal_superblock_t *	sb;
+
+	struct recovery_info	info;
+
+	memset (&info, 0, sizeof(info));
+	sb = journal->j_superblock;
+
+	err = do_one_pass(journal, &info, PASS_SCAN);
+
+	if (err) {
+		printk(KERN_ERR "JBD: error %d scanning journal\n", err);
+		++journal->j_transaction_sequence;
+	} else {
+#ifdef CONFIG_JBD_DEBUG
+		int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
+#endif
+		jbd_debug(0,
+			  "JBD: ignoring %d transaction%s from the journal.\n",
+			  dropped, (dropped == 1) ? "" : "s");
+		journal->j_transaction_sequence = ++info.end_transaction;
+	}
+
+	journal->j_tail = 0;
+	return err;
+}
+
+static int do_one_pass(journal_t *journal,
+			struct recovery_info *info, enum passtype pass)
+{
+	unsigned int		first_commit_ID, next_commit_ID;
+	unsigned long		next_log_block;
+	int			err, success = 0;
+	journal_superblock_t *	sb;
+	journal_header_t *	tmp;
+	struct buffer_head *	bh;
+	unsigned int		sequence;
+	int			blocktype;
+
+	/* Precompute the maximum metadata descriptors in a descriptor block */
+	int			MAX_BLOCKS_PER_DESC;
+	MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
+			       / sizeof(journal_block_tag_t));
+
+	/*
+	 * First thing is to establish what we expect to find in the log
+	 * (in terms of transaction IDs), and where (in terms of log
+	 * block offsets): query the superblock.
+	 */
+
+	sb = journal->j_superblock;
+	next_commit_ID = be32_to_cpu(sb->s_sequence);
+	next_log_block = be32_to_cpu(sb->s_start);
+
+	first_commit_ID = next_commit_ID;
+	if (pass == PASS_SCAN)
+		info->start_transaction = first_commit_ID;
+
+	jbd_debug(1, "Starting recovery pass %d\n", pass);
+
+	/*
+	 * Now we walk through the log, transaction by transaction,
+	 * making sure that each transaction has a commit block in the
+	 * expected place.  Each complete transaction gets replayed back
+	 * into the main filesystem.
+	 */
+
+	while (1) {
+		int			flags;
+		char *			tagp;
+		journal_block_tag_t *	tag;
+		struct buffer_head *	obh;
+		struct buffer_head *	nbh;
+
+		cond_resched();		/* We're under lock_kernel() */
+
+		/* If we already know where to stop the log traversal,
+		 * check right now that we haven't gone past the end of
+		 * the log. */
+
+		if (pass != PASS_SCAN)
+			if (tid_geq(next_commit_ID, info->end_transaction))
+				break;
+
+		jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
+			  next_commit_ID, next_log_block, journal->j_last);
+
+		/* Skip over each chunk of the transaction looking
+		 * either the next descriptor block or the final commit
+		 * record. */
+
+		jbd_debug(3, "JBD: checking block %ld\n", next_log_block);
+		err = jread(&bh, journal, next_log_block);
+		if (err)
+			goto failed;
+
+		next_log_block++;
+		wrap(journal, next_log_block);
+
+		/* What kind of buffer is it?
+		 *
+		 * If it is a descriptor block, check that it has the
+		 * expected sequence number.  Otherwise, we're all done
+		 * here. */
+
+		tmp = (journal_header_t *)bh->b_data;
+
+		if (tmp->h_magic != cpu_to_be32(JFS_MAGIC_NUMBER)) {
+			brelse(bh);
+			break;
+		}
+
+		blocktype = be32_to_cpu(tmp->h_blocktype);
+		sequence = be32_to_cpu(tmp->h_sequence);
+		jbd_debug(3, "Found magic %d, sequence %d\n",
+			  blocktype, sequence);
+
+		if (sequence != next_commit_ID) {
+			brelse(bh);
+			break;
+		}
+
+		/* OK, we have a valid descriptor block which matches
+		 * all of the sequence number checks.  What are we going
+		 * to do with it?  That depends on the pass... */
+
+		switch(blocktype) {
+		case JFS_DESCRIPTOR_BLOCK:
+			/* If it is a valid descriptor block, replay it
+			 * in pass REPLAY; otherwise, just skip over the
+			 * blocks it describes. */
+			if (pass != PASS_REPLAY) {
+				next_log_block +=
+					count_tags(bh, journal->j_blocksize);
+				wrap(journal, next_log_block);
+				brelse(bh);
+				continue;
+			}
+
+			/* A descriptor block: we can now write all of
+			 * the data blocks.  Yay, useful work is finally
+			 * getting done here! */
+
+			tagp = &bh->b_data[sizeof(journal_header_t)];
+			while ((tagp - bh->b_data +sizeof(journal_block_tag_t))
+			       <= journal->j_blocksize) {
+				unsigned long io_block;
+
+				tag = (journal_block_tag_t *) tagp;
+				flags = be32_to_cpu(tag->t_flags);
+
+				io_block = next_log_block++;
+				wrap(journal, next_log_block);
+				err = jread(&obh, journal, io_block);
+				if (err) {
+					/* Recover what we can, but
+					 * report failure at the end. */
+					success = err;
+					printk (KERN_ERR
+						"JBD: IO error %d recovering "
+						"block %ld in log\n",
+						err, io_block);
+				} else {
+					unsigned long blocknr;
+
+					J_ASSERT(obh != NULL);
+					blocknr = be32_to_cpu(tag->t_blocknr);
+
+					/* If the block has been
+					 * revoked, then we're all done
+					 * here. */
+					if (journal_test_revoke
+					    (journal, blocknr,
+					     next_commit_ID)) {
+						brelse(obh);
+						++info->nr_revoke_hits;
+						goto skip_write;
+					}
+
+					/* Find a buffer for the new
+					 * data being restored */
+					nbh = __getblk(journal->j_fs_dev,
+							blocknr,
+							journal->j_blocksize);
+					if (nbh == NULL) {
+						printk(KERN_ERR
+						       "JBD: Out of memory "
+						       "during recovery.\n");
+						err = -ENOMEM;
+						brelse(bh);
+						brelse(obh);
+						goto failed;
+					}
+
+					lock_buffer(nbh);
+					memcpy(nbh->b_data, obh->b_data,
+							journal->j_blocksize);
+					if (flags & JFS_FLAG_ESCAPE) {
+						*((__be32 *)bh->b_data) =
+						cpu_to_be32(JFS_MAGIC_NUMBER);
+					}
+
+					BUFFER_TRACE(nbh, "marking dirty");
+					set_buffer_uptodate(nbh);
+					mark_buffer_dirty(nbh);
+					BUFFER_TRACE(nbh, "marking uptodate");
+					++info->nr_replays;
+					/* ll_rw_block(WRITE, 1, &nbh); */
+					unlock_buffer(nbh);
+					brelse(obh);
+					brelse(nbh);
+				}
+
+			skip_write:
+				tagp += sizeof(journal_block_tag_t);
+				if (!(flags & JFS_FLAG_SAME_UUID))
+					tagp += 16;
+
+				if (flags & JFS_FLAG_LAST_TAG)
+					break;
+			}
+
+			brelse(bh);
+			continue;
+
+		case JFS_COMMIT_BLOCK:
+			/* Found an expected commit block: not much to
+			 * do other than move on to the next sequence
+			 * number. */
+			brelse(bh);
+			next_commit_ID++;
+			continue;
+
+		case JFS_REVOKE_BLOCK:
+			/* If we aren't in the REVOKE pass, then we can
+			 * just skip over this block. */
+			if (pass != PASS_REVOKE) {
+				brelse(bh);
+				continue;
+			}
+
+			err = scan_revoke_records(journal, bh,
+						  next_commit_ID, info);
+			brelse(bh);
+			if (err)
+				goto failed;
+			continue;
+
+		default:
+			jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
+				  blocktype);
+			brelse(bh);
+			goto done;
+		}
+	}
+
+ done:
+	/*
+	 * We broke out of the log scan loop: either we came to the
+	 * known end of the log or we found an unexpected block in the
+	 * log.  If the latter happened, then we know that the "current"
+	 * transaction marks the end of the valid log.
+	 */
+
+	if (pass == PASS_SCAN)
+		info->end_transaction = next_commit_ID;
+	else {
+		/* It's really bad news if different passes end up at
+		 * different places (but possible due to IO errors). */
+		if (info->end_transaction != next_commit_ID) {
+			printk (KERN_ERR "JBD: recovery pass %d ended at "
+				"transaction %u, expected %u\n",
+				pass, next_commit_ID, info->end_transaction);
+			if (!success)
+				success = -EIO;
+		}
+	}
+
+	return success;
+
+ failed:
+	return err;
+}
+
+
+/* Scan a revoke record, marking all blocks mentioned as revoked. */
+
+static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
+			       tid_t sequence, struct recovery_info *info)
+{
+	journal_revoke_header_t *header;
+	int offset, max;
+
+	header = (journal_revoke_header_t *) bh->b_data;
+	offset = sizeof(journal_revoke_header_t);
+	max = be32_to_cpu(header->r_count);
+
+	while (offset < max) {
+		unsigned long blocknr;
+		int err;
+
+		blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
+		offset += 4;
+		err = journal_set_revoke(journal, blocknr, sequence);
+		if (err)
+			return err;
+		++info->nr_revokes;
+	}
+	return 0;
+}
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
new file mode 100644
index 000000000000..c532429d8d9b
--- /dev/null
+++ b/fs/jbd2/revoke.c
@@ -0,0 +1,703 @@
+/*
+ * linux/fs/revoke.c
+ *
+ * Written by Stephen C. Tweedie <sct@redhat.com>, 2000
+ *
+ * Copyright 2000 Red Hat corp --- All Rights Reserved
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ *
+ * Journal revoke routines for the generic filesystem journaling code;
+ * part of the ext2fs journaling system.
+ *
+ * Revoke is the mechanism used to prevent old log records for deleted
+ * metadata from being replayed on top of newer data using the same
+ * blocks.  The revoke mechanism is used in two separate places:
+ *
+ * + Commit: during commit we write the entire list of the current
+ *   transaction's revoked blocks to the journal
+ *
+ * + Recovery: during recovery we record the transaction ID of all
+ *   revoked blocks.  If there are multiple revoke records in the log
+ *   for a single block, only the last one counts, and if there is a log
+ *   entry for a block beyond the last revoke, then that log entry still
+ *   gets replayed.
+ *
+ * We can get interactions between revokes and new log data within a
+ * single transaction:
+ *
+ * Block is revoked and then journaled:
+ *   The desired end result is the journaling of the new block, so we
+ *   cancel the revoke before the transaction commits.
+ *
+ * Block is journaled and then revoked:
+ *   The revoke must take precedence over the write of the block, so we
+ *   need either to cancel the journal entry or to write the revoke
+ *   later in the log than the log block.  In this case, we choose the
+ *   latter: journaling a block cancels any revoke record for that block
+ *   in the current transaction, so any revoke for that block in the
+ *   transaction must have happened after the block was journaled and so
+ *   the revoke must take precedence.
+ *
+ * Block is revoked and then written as data:
+ *   The data write is allowed to succeed, but the revoke is _not_
+ *   cancelled.  We still need to prevent old log records from
+ *   overwriting the new data.  We don't even need to clear the revoke
+ *   bit here.
+ *
+ * Revoke information on buffers is a tri-state value:
+ *
+ * RevokeValid clear:	no cached revoke status, need to look it up
+ * RevokeValid set, Revoked clear:
+ *			buffer has not been revoked, and cancel_revoke
+ *			need do nothing.
+ * RevokeValid set, Revoked set:
+ *			buffer has been revoked.
+ */
+
+#ifndef __KERNEL__
+#include "jfs_user.h"
+#else
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+#endif
+
+static kmem_cache_t *revoke_record_cache;
+static kmem_cache_t *revoke_table_cache;
+
+/* Each revoke record represents one single revoked block.  During
+   journal replay, this involves recording the transaction ID of the
+   last transaction to revoke this block. */
+
+struct jbd_revoke_record_s
+{
+	struct list_head  hash;
+	tid_t		  sequence;	/* Used for recovery only */
+	unsigned long	  blocknr;
+};
+
+
+/* The revoke table is just a simple hash table of revoke records. */
+struct jbd_revoke_table_s
+{
+	/* It is conceivable that we might want a larger hash table
+	 * for recovery.  Must be a power of two. */
+	int		  hash_size;
+	int		  hash_shift;
+	struct list_head *hash_table;
+};
+
+
+#ifdef __KERNEL__
+static void write_one_revoke_record(journal_t *, transaction_t *,
+				    struct journal_head **, int *,
+				    struct jbd_revoke_record_s *);
+static void flush_descriptor(journal_t *, struct journal_head *, int);
+#endif
+
+/* Utility functions to maintain the revoke table */
+
+/* Borrowed from buffer.c: this is a tried and tested block hash function */
+static inline int hash(journal_t *journal, unsigned long block)
+{
+	struct jbd_revoke_table_s *table = journal->j_revoke;
+	int hash_shift = table->hash_shift;
+
+	return ((block << (hash_shift - 6)) ^
+		(block >> 13) ^
+		(block << (hash_shift - 12))) & (table->hash_size - 1);
+}
+
+static int insert_revoke_hash(journal_t *journal, unsigned long blocknr,
+			      tid_t seq)
+{
+	struct list_head *hash_list;
+	struct jbd_revoke_record_s *record;
+
+repeat:
+	record = kmem_cache_alloc(revoke_record_cache, GFP_NOFS);
+	if (!record)
+		goto oom;
+
+	record->sequence = seq;
+	record->blocknr = blocknr;
+	hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)];
+	spin_lock(&journal->j_revoke_lock);
+	list_add(&record->hash, hash_list);
+	spin_unlock(&journal->j_revoke_lock);
+	return 0;
+
+oom:
+	if (!journal_oom_retry)
+		return -ENOMEM;
+	jbd_debug(1, "ENOMEM in %s, retrying\n", __FUNCTION__);
+	yield();
+	goto repeat;
+}
+
+/* Find a revoke record in the journal's hash table. */
+
+static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal,
+						      unsigned long blocknr)
+{
+	struct list_head *hash_list;
+	struct jbd_revoke_record_s *record;
+
+	hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)];
+
+	spin_lock(&journal->j_revoke_lock);
+	record = (struct jbd_revoke_record_s *) hash_list->next;
+	while (&(record->hash) != hash_list) {
+		if (record->blocknr == blocknr) {
+			spin_unlock(&journal->j_revoke_lock);
+			return record;
+		}
+		record = (struct jbd_revoke_record_s *) record->hash.next;
+	}
+	spin_unlock(&journal->j_revoke_lock);
+	return NULL;
+}
+
+int __init journal_init_revoke_caches(void)
+{
+	revoke_record_cache = kmem_cache_create("revoke_record",
+					   sizeof(struct jbd_revoke_record_s),
+					   0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (revoke_record_cache == 0)
+		return -ENOMEM;
+
+	revoke_table_cache = kmem_cache_create("revoke_table",
+					   sizeof(struct jbd_revoke_table_s),
+					   0, 0, NULL, NULL);
+	if (revoke_table_cache == 0) {
+		kmem_cache_destroy(revoke_record_cache);
+		revoke_record_cache = NULL;
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void journal_destroy_revoke_caches(void)
+{
+	kmem_cache_destroy(revoke_record_cache);
+	revoke_record_cache = NULL;
+	kmem_cache_destroy(revoke_table_cache);
+	revoke_table_cache = NULL;
+}
+
+/* Initialise the revoke table for a given journal to a given size. */
+
+int journal_init_revoke(journal_t *journal, int hash_size)
+{
+	int shift, tmp;
+
+	J_ASSERT (journal->j_revoke_table[0] == NULL);
+
+	shift = 0;
+	tmp = hash_size;
+	while((tmp >>= 1UL) != 0UL)
+		shift++;
+
+	journal->j_revoke_table[0] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
+	if (!journal->j_revoke_table[0])
+		return -ENOMEM;
+	journal->j_revoke = journal->j_revoke_table[0];
+
+	/* Check that the hash_size is a power of two */
+	J_ASSERT ((hash_size & (hash_size-1)) == 0);
+
+	journal->j_revoke->hash_size = hash_size;
+
+	journal->j_revoke->hash_shift = shift;
+
+	journal->j_revoke->hash_table =
+		kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
+	if (!journal->j_revoke->hash_table) {
+		kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
+		journal->j_revoke = NULL;
+		return -ENOMEM;
+	}
+
+	for (tmp = 0; tmp < hash_size; tmp++)
+		INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
+
+	journal->j_revoke_table[1] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
+	if (!journal->j_revoke_table[1]) {
+		kfree(journal->j_revoke_table[0]->hash_table);
+		kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
+		return -ENOMEM;
+	}
+
+	journal->j_revoke = journal->j_revoke_table[1];
+
+	/* Check that the hash_size is a power of two */
+	J_ASSERT ((hash_size & (hash_size-1)) == 0);
+
+	journal->j_revoke->hash_size = hash_size;
+
+	journal->j_revoke->hash_shift = shift;
+
+	journal->j_revoke->hash_table =
+		kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
+	if (!journal->j_revoke->hash_table) {
+		kfree(journal->j_revoke_table[0]->hash_table);
+		kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
+		kmem_cache_free(revoke_table_cache, journal->j_revoke_table[1]);
+		journal->j_revoke = NULL;
+		return -ENOMEM;
+	}
+
+	for (tmp = 0; tmp < hash_size; tmp++)
+		INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
+
+	spin_lock_init(&journal->j_revoke_lock);
+
+	return 0;
+}
+
+/* Destoy a journal's revoke table.  The table must already be empty! */
+
+void journal_destroy_revoke(journal_t *journal)
+{
+	struct jbd_revoke_table_s *table;
+	struct list_head *hash_list;
+	int i;
+
+	table = journal->j_revoke_table[0];
+	if (!table)
+		return;
+
+	for (i=0; i<table->hash_size; i++) {
+		hash_list = &table->hash_table[i];
+		J_ASSERT (list_empty(hash_list));
+	}
+
+	kfree(table->hash_table);
+	kmem_cache_free(revoke_table_cache, table);
+	journal->j_revoke = NULL;
+
+	table = journal->j_revoke_table[1];
+	if (!table)
+		return;
+
+	for (i=0; i<table->hash_size; i++) {
+		hash_list = &table->hash_table[i];
+		J_ASSERT (list_empty(hash_list));
+	}
+
+	kfree(table->hash_table);
+	kmem_cache_free(revoke_table_cache, table);
+	journal->j_revoke = NULL;
+}
+
+
+#ifdef __KERNEL__
+
+/*
+ * journal_revoke: revoke a given buffer_head from the journal.  This
+ * prevents the block from being replayed during recovery if we take a
+ * crash after this current transaction commits.  Any subsequent
+ * metadata writes of the buffer in this transaction cancel the
+ * revoke.
+ *
+ * Note that this call may block --- it is up to the caller to make
+ * sure that there are no further calls to journal_write_metadata
+ * before the revoke is complete.  In ext3, this implies calling the
+ * revoke before clearing the block bitmap when we are deleting
+ * metadata.
+ *
+ * Revoke performs a journal_forget on any buffer_head passed in as a
+ * parameter, but does _not_ forget the buffer_head if the bh was only
+ * found implicitly.
+ *
+ * bh_in may not be a journalled buffer - it may have come off
+ * the hash tables without an attached journal_head.
+ *
+ * If bh_in is non-zero, journal_revoke() will decrement its b_count
+ * by one.
+ */
+
+int journal_revoke(handle_t *handle, unsigned long blocknr,
+		   struct buffer_head *bh_in)
+{
+	struct buffer_head *bh = NULL;
+	journal_t *journal;
+	struct block_device *bdev;
+	int err;
+
+	might_sleep();
+	if (bh_in)
+		BUFFER_TRACE(bh_in, "enter");
+
+	journal = handle->h_transaction->t_journal;
+	if (!journal_set_features(journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)){
+		J_ASSERT (!"Cannot set revoke feature!");
+		return -EINVAL;
+	}
+
+	bdev = journal->j_fs_dev;
+	bh = bh_in;
+
+	if (!bh) {
+		bh = __find_get_block(bdev, blocknr, journal->j_blocksize);
+		if (bh)
+			BUFFER_TRACE(bh, "found on hash");
+	}
+#ifdef JBD_EXPENSIVE_CHECKING
+	else {
+		struct buffer_head *bh2;
+
+		/* If there is a different buffer_head lying around in
+		 * memory anywhere... */
+		bh2 = __find_get_block(bdev, blocknr, journal->j_blocksize);
+		if (bh2) {
+			/* ... and it has RevokeValid status... */
+			if (bh2 != bh && buffer_revokevalid(bh2))
+				/* ...then it better be revoked too,
+				 * since it's illegal to create a revoke
+				 * record against a buffer_head which is
+				 * not marked revoked --- that would
+				 * risk missing a subsequent revoke
+				 * cancel. */
+				J_ASSERT_BH(bh2, buffer_revoked(bh2));
+			put_bh(bh2);
+		}
+	}
+#endif
+
+	/* We really ought not ever to revoke twice in a row without
+           first having the revoke cancelled: it's illegal to free a
+           block twice without allocating it in between! */
+	if (bh) {
+		if (!J_EXPECT_BH(bh, !buffer_revoked(bh),
+				 "inconsistent data on disk")) {
+			if (!bh_in)
+				brelse(bh);
+			return -EIO;
+		}
+		set_buffer_revoked(bh);
+		set_buffer_revokevalid(bh);
+		if (bh_in) {
+			BUFFER_TRACE(bh_in, "call journal_forget");
+			journal_forget(handle, bh_in);
+		} else {
+			BUFFER_TRACE(bh, "call brelse");
+			__brelse(bh);
+		}
+	}
+
+	jbd_debug(2, "insert revoke for block %lu, bh_in=%p\n", blocknr, bh_in);
+	err = insert_revoke_hash(journal, blocknr,
+				handle->h_transaction->t_tid);
+	BUFFER_TRACE(bh_in, "exit");
+	return err;
+}
+
+/*
+ * Cancel an outstanding revoke.  For use only internally by the
+ * journaling code (called from journal_get_write_access).
+ *
+ * We trust buffer_revoked() on the buffer if the buffer is already
+ * being journaled: if there is no revoke pending on the buffer, then we
+ * don't do anything here.
+ *
+ * This would break if it were possible for a buffer to be revoked and
+ * discarded, and then reallocated within the same transaction.  In such
+ * a case we would have lost the revoked bit, but when we arrived here
+ * the second time we would still have a pending revoke to cancel.  So,
+ * do not trust the Revoked bit on buffers unless RevokeValid is also
+ * set.
+ *
+ * The caller must have the journal locked.
+ */
+int journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
+{
+	struct jbd_revoke_record_s *record;
+	journal_t *journal = handle->h_transaction->t_journal;
+	int need_cancel;
+	int did_revoke = 0;	/* akpm: debug */
+	struct buffer_head *bh = jh2bh(jh);
+
+	jbd_debug(4, "journal_head %p, cancelling revoke\n", jh);
+
+	/* Is the existing Revoke bit valid?  If so, we trust it, and
+	 * only perform the full cancel if the revoke bit is set.  If
+	 * not, we can't trust the revoke bit, and we need to do the
+	 * full search for a revoke record. */
+	if (test_set_buffer_revokevalid(bh)) {
+		need_cancel = test_clear_buffer_revoked(bh);
+	} else {
+		need_cancel = 1;
+		clear_buffer_revoked(bh);
+	}
+
+	if (need_cancel) {
+		record = find_revoke_record(journal, bh->b_blocknr);
+		if (record) {
+			jbd_debug(4, "cancelled existing revoke on "
+				  "blocknr %llu\n", (unsigned long long)bh->b_blocknr);
+			spin_lock(&journal->j_revoke_lock);
+			list_del(&record->hash);
+			spin_unlock(&journal->j_revoke_lock);
+			kmem_cache_free(revoke_record_cache, record);
+			did_revoke = 1;
+		}
+	}
+
+#ifdef JBD_EXPENSIVE_CHECKING
+	/* There better not be one left behind by now! */
+	record = find_revoke_record(journal, bh->b_blocknr);
+	J_ASSERT_JH(jh, record == NULL);
+#endif
+
+	/* Finally, have we just cleared revoke on an unhashed
+	 * buffer_head?  If so, we'd better make sure we clear the
+	 * revoked status on any hashed alias too, otherwise the revoke
+	 * state machine will get very upset later on. */
+	if (need_cancel) {
+		struct buffer_head *bh2;
+		bh2 = __find_get_block(bh->b_bdev, bh->b_blocknr, bh->b_size);
+		if (bh2) {
+			if (bh2 != bh)
+				clear_buffer_revoked(bh2);
+			__brelse(bh2);
+		}
+	}
+	return did_revoke;
+}
+
+/* journal_switch_revoke table select j_revoke for next transaction
+ * we do not want to suspend any processing until all revokes are
+ * written -bzzz
+ */
+void journal_switch_revoke_table(journal_t *journal)
+{
+	int i;
+
+	if (journal->j_revoke == journal->j_revoke_table[0])
+		journal->j_revoke = journal->j_revoke_table[1];
+	else
+		journal->j_revoke = journal->j_revoke_table[0];
+
+	for (i = 0; i < journal->j_revoke->hash_size; i++)
+		INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]);
+}
+
+/*
+ * Write revoke records to the journal for all entries in the current
+ * revoke hash, deleting the entries as we go.
+ *
+ * Called with the journal lock held.
+ */
+
+void journal_write_revoke_records(journal_t *journal,
+				  transaction_t *transaction)
+{
+	struct journal_head *descriptor;
+	struct jbd_revoke_record_s *record;
+	struct jbd_revoke_table_s *revoke;
+	struct list_head *hash_list;
+	int i, offset, count;
+
+	descriptor = NULL;
+	offset = 0;
+	count = 0;
+
+	/* select revoke table for committing transaction */
+	revoke = journal->j_revoke == journal->j_revoke_table[0] ?
+		journal->j_revoke_table[1] : journal->j_revoke_table[0];
+
+	for (i = 0; i < revoke->hash_size; i++) {
+		hash_list = &revoke->hash_table[i];
+
+		while (!list_empty(hash_list)) {
+			record = (struct jbd_revoke_record_s *)
+				hash_list->next;
+			write_one_revoke_record(journal, transaction,
+						&descriptor, &offset,
+						record);
+			count++;
+			list_del(&record->hash);
+			kmem_cache_free(revoke_record_cache, record);
+		}
+	}
+	if (descriptor)
+		flush_descriptor(journal, descriptor, offset);
+	jbd_debug(1, "Wrote %d revoke records\n", count);
+}
+
+/*
+ * Write out one revoke record.  We need to create a new descriptor
+ * block if the old one is full or if we have not already created one.
+ */
+
+static void write_one_revoke_record(journal_t *journal,
+				    transaction_t *transaction,
+				    struct journal_head **descriptorp,
+				    int *offsetp,
+				    struct jbd_revoke_record_s *record)
+{
+	struct journal_head *descriptor;
+	int offset;
+	journal_header_t *header;
+
+	/* If we are already aborting, this all becomes a noop.  We
+           still need to go round the loop in
+           journal_write_revoke_records in order to free all of the
+           revoke records: only the IO to the journal is omitted. */
+	if (is_journal_aborted(journal))
+		return;
+
+	descriptor = *descriptorp;
+	offset = *offsetp;
+
+	/* Make sure we have a descriptor with space left for the record */
+	if (descriptor) {
+		if (offset == journal->j_blocksize) {
+			flush_descriptor(journal, descriptor, offset);
+			descriptor = NULL;
+		}
+	}
+
+	if (!descriptor) {
+		descriptor = journal_get_descriptor_buffer(journal);
+		if (!descriptor)
+			return;
+		header = (journal_header_t *) &jh2bh(descriptor)->b_data[0];
+		header->h_magic     = cpu_to_be32(JFS_MAGIC_NUMBER);
+		header->h_blocktype = cpu_to_be32(JFS_REVOKE_BLOCK);
+		header->h_sequence  = cpu_to_be32(transaction->t_tid);
+
+		/* Record it so that we can wait for IO completion later */
+		JBUFFER_TRACE(descriptor, "file as BJ_LogCtl");
+		journal_file_buffer(descriptor, transaction, BJ_LogCtl);
+
+		offset = sizeof(journal_revoke_header_t);
+		*descriptorp = descriptor;
+	}
+
+	* ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) =
+		cpu_to_be32(record->blocknr);
+	offset += 4;
+	*offsetp = offset;
+}
+
+/*
+ * Flush a revoke descriptor out to the journal.  If we are aborting,
+ * this is a noop; otherwise we are generating a buffer which needs to
+ * be waited for during commit, so it has to go onto the appropriate
+ * journal buffer list.
+ */
+
+static void flush_descriptor(journal_t *journal,
+			     struct journal_head *descriptor,
+			     int offset)
+{
+	journal_revoke_header_t *header;
+	struct buffer_head *bh = jh2bh(descriptor);
+
+	if (is_journal_aborted(journal)) {
+		put_bh(bh);
+		return;
+	}
+
+	header = (journal_revoke_header_t *) jh2bh(descriptor)->b_data;
+	header->r_count = cpu_to_be32(offset);
+	set_buffer_jwrite(bh);
+	BUFFER_TRACE(bh, "write");
+	set_buffer_dirty(bh);
+	ll_rw_block(SWRITE, 1, &bh);
+}
+#endif
+
+/*
+ * Revoke support for recovery.
+ *
+ * Recovery needs to be able to:
+ *
+ *  record all revoke records, including the tid of the latest instance
+ *  of each revoke in the journal
+ *
+ *  check whether a given block in a given transaction should be replayed
+ *  (ie. has not been revoked by a revoke record in that or a subsequent
+ *  transaction)
+ *
+ *  empty the revoke table after recovery.
+ */
+
+/*
+ * First, setting revoke records.  We create a new revoke record for
+ * every block ever revoked in the log as we scan it for recovery, and
+ * we update the existing records if we find multiple revokes for a
+ * single block.
+ */
+
+int journal_set_revoke(journal_t *journal,
+		       unsigned long blocknr,
+		       tid_t sequence)
+{
+	struct jbd_revoke_record_s *record;
+
+	record = find_revoke_record(journal, blocknr);
+	if (record) {
+		/* If we have multiple occurrences, only record the
+		 * latest sequence number in the hashed record */
+		if (tid_gt(sequence, record->sequence))
+			record->sequence = sequence;
+		return 0;
+	}
+	return insert_revoke_hash(journal, blocknr, sequence);
+}
+
+/*
+ * Test revoke records.  For a given block referenced in the log, has
+ * that block been revoked?  A revoke record with a given transaction
+ * sequence number revokes all blocks in that transaction and earlier
+ * ones, but later transactions still need replayed.
+ */
+
+int journal_test_revoke(journal_t *journal,
+			unsigned long blocknr,
+			tid_t sequence)
+{
+	struct jbd_revoke_record_s *record;
+
+	record = find_revoke_record(journal, blocknr);
+	if (!record)
+		return 0;
+	if (tid_gt(sequence, record->sequence))
+		return 0;
+	return 1;
+}
+
+/*
+ * Finally, once recovery is over, we need to clear the revoke table so
+ * that it can be reused by the running filesystem.
+ */
+
+void journal_clear_revoke(journal_t *journal)
+{
+	int i;
+	struct list_head *hash_list;
+	struct jbd_revoke_record_s *record;
+	struct jbd_revoke_table_s *revoke;
+
+	revoke = journal->j_revoke;
+
+	for (i = 0; i < revoke->hash_size; i++) {
+		hash_list = &revoke->hash_table[i];
+		while (!list_empty(hash_list)) {
+			record = (struct jbd_revoke_record_s*) hash_list->next;
+			list_del(&record->hash);
+			kmem_cache_free(revoke_record_cache, record);
+		}
+	}
+}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
new file mode 100644
index 000000000000..e1b3c8af4d17
--- /dev/null
+++ b/fs/jbd2/transaction.c
@@ -0,0 +1,2080 @@
+/*
+ * linux/fs/transaction.c
+ *
+ * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
+ *
+ * Copyright 1998 Red Hat corp --- All Rights Reserved
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ *
+ * Generic filesystem transaction handling code; part of the ext2fs
+ * journaling system.
+ *
+ * This file manages transactions (compound commits managed by the
+ * journaling code) and handles (individual atomic operations by the
+ * filesystem).
+ */
+
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <linux/smp_lock.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+
+/*
+ * get_transaction: obtain a new transaction_t object.
+ *
+ * Simply allocate and initialise a new transaction.  Create it in
+ * RUNNING state and add it to the current journal (which should not
+ * have an existing running transaction: we only make a new transaction
+ * once we have started to commit the old one).
+ *
+ * Preconditions:
+ *	The journal MUST be locked.  We don't perform atomic mallocs on the
+ *	new transaction	and we can't block without protecting against other
+ *	processes trying to touch the journal while it is in transition.
+ *
+ * Called under j_state_lock
+ */
+
+static transaction_t *
+get_transaction(journal_t *journal, transaction_t *transaction)
+{
+	transaction->t_journal = journal;
+	transaction->t_state = T_RUNNING;
+	transaction->t_tid = journal->j_transaction_sequence++;
+	transaction->t_expires = jiffies + journal->j_commit_interval;
+	spin_lock_init(&transaction->t_handle_lock);
+
+	/* Set up the commit timer for the new transaction. */
+	journal->j_commit_timer.expires = transaction->t_expires;
+	add_timer(&journal->j_commit_timer);
+
+	J_ASSERT(journal->j_running_transaction == NULL);
+	journal->j_running_transaction = transaction;
+
+	return transaction;
+}
+
+/*
+ * Handle management.
+ *
+ * A handle_t is an object which represents a single atomic update to a
+ * filesystem, and which tracks all of the modifications which form part
+ * of that one update.
+ */
+
+/*
+ * start_this_handle: Given a handle, deal with any locking or stalling
+ * needed to make sure that there is enough journal space for the handle
+ * to begin.  Attach the handle to a transaction and set up the
+ * transaction's buffer credits.
+ */
+
+static int start_this_handle(journal_t *journal, handle_t *handle)
+{
+	transaction_t *transaction;
+	int needed;
+	int nblocks = handle->h_buffer_credits;
+	transaction_t *new_transaction = NULL;
+	int ret = 0;
+
+	if (nblocks > journal->j_max_transaction_buffers) {
+		printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
+		       current->comm, nblocks,
+		       journal->j_max_transaction_buffers);
+		ret = -ENOSPC;
+		goto out;
+	}
+
+alloc_transaction:
+	if (!journal->j_running_transaction) {
+		new_transaction = jbd_kmalloc(sizeof(*new_transaction),
+						GFP_NOFS);
+		if (!new_transaction) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		memset(new_transaction, 0, sizeof(*new_transaction));
+	}
+
+	jbd_debug(3, "New handle %p going live.\n", handle);
+
+repeat:
+
+	/*
+	 * We need to hold j_state_lock until t_updates has been incremented,
+	 * for proper journal barrier handling
+	 */
+	spin_lock(&journal->j_state_lock);
+repeat_locked:
+	if (is_journal_aborted(journal) ||
+	    (journal->j_errno != 0 && !(journal->j_flags & JFS_ACK_ERR))) {
+		spin_unlock(&journal->j_state_lock);
+		ret = -EROFS;
+		goto out;
+	}
+
+	/* Wait on the journal's transaction barrier if necessary */
+	if (journal->j_barrier_count) {
+		spin_unlock(&journal->j_state_lock);
+		wait_event(journal->j_wait_transaction_locked,
+				journal->j_barrier_count == 0);
+		goto repeat;
+	}
+
+	if (!journal->j_running_transaction) {
+		if (!new_transaction) {
+			spin_unlock(&journal->j_state_lock);
+			goto alloc_transaction;
+		}
+		get_transaction(journal, new_transaction);
+		new_transaction = NULL;
+	}
+
+	transaction = journal->j_running_transaction;
+
+	/*
+	 * If the current transaction is locked down for commit, wait for the
+	 * lock to be released.
+	 */
+	if (transaction->t_state == T_LOCKED) {
+		DEFINE_WAIT(wait);
+
+		prepare_to_wait(&journal->j_wait_transaction_locked,
+					&wait, TASK_UNINTERRUPTIBLE);
+		spin_unlock(&journal->j_state_lock);
+		schedule();
+		finish_wait(&journal->j_wait_transaction_locked, &wait);
+		goto repeat;
+	}
+
+	/*
+	 * If there is not enough space left in the log to write all potential
+	 * buffers requested by this operation, we need to stall pending a log
+	 * checkpoint to free some more log space.
+	 */
+	spin_lock(&transaction->t_handle_lock);
+	needed = transaction->t_outstanding_credits + nblocks;
+
+	if (needed > journal->j_max_transaction_buffers) {
+		/*
+		 * If the current transaction is already too large, then start
+		 * to commit it: we can then go back and attach this handle to
+		 * a new transaction.
+		 */
+		DEFINE_WAIT(wait);
+
+		jbd_debug(2, "Handle %p starting new commit...\n", handle);
+		spin_unlock(&transaction->t_handle_lock);
+		prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
+				TASK_UNINTERRUPTIBLE);
+		__log_start_commit(journal, transaction->t_tid);
+		spin_unlock(&journal->j_state_lock);
+		schedule();
+		finish_wait(&journal->j_wait_transaction_locked, &wait);
+		goto repeat;
+	}
+
+	/*
+	 * The commit code assumes that it can get enough log space
+	 * without forcing a checkpoint.  This is *critical* for
+	 * correctness: a checkpoint of a buffer which is also
+	 * associated with a committing transaction creates a deadlock,
+	 * so commit simply cannot force through checkpoints.
+	 *
+	 * We must therefore ensure the necessary space in the journal
+	 * *before* starting to dirty potentially checkpointed buffers
+	 * in the new transaction.
+	 *
+	 * The worst part is, any transaction currently committing can
+	 * reduce the free space arbitrarily.  Be careful to account for
+	 * those buffers when checkpointing.
+	 */
+
+	/*
+	 * @@@ AKPM: This seems rather over-defensive.  We're giving commit
+	 * a _lot_ of headroom: 1/4 of the journal plus the size of
+	 * the committing transaction.  Really, we only need to give it
+	 * committing_transaction->t_outstanding_credits plus "enough" for
+	 * the log control blocks.
+	 * Also, this test is inconsitent with the matching one in
+	 * journal_extend().
+	 */
+	if (__log_space_left(journal) < jbd_space_needed(journal)) {
+		jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
+		spin_unlock(&transaction->t_handle_lock);
+		__log_wait_for_space(journal);
+		goto repeat_locked;
+	}
+
+	/* OK, account for the buffers that this operation expects to
+	 * use and add the handle to the running transaction. */
+
+	handle->h_transaction = transaction;
+	transaction->t_outstanding_credits += nblocks;
+	transaction->t_updates++;
+	transaction->t_handle_count++;
+	jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n",
+		  handle, nblocks, transaction->t_outstanding_credits,
+		  __log_space_left(journal));
+	spin_unlock(&transaction->t_handle_lock);
+	spin_unlock(&journal->j_state_lock);
+out:
+	if (unlikely(new_transaction))		/* It's usually NULL */
+		kfree(new_transaction);
+	return ret;
+}
+
+/* Allocate a new handle.  This should probably be in a slab... */
+static handle_t *new_handle(int nblocks)
+{
+	handle_t *handle = jbd_alloc_handle(GFP_NOFS);
+	if (!handle)
+		return NULL;
+	memset(handle, 0, sizeof(*handle));
+	handle->h_buffer_credits = nblocks;
+	handle->h_ref = 1;
+
+	return handle;
+}
+
+/**
+ * handle_t *journal_start() - Obtain a new handle.
+ * @journal: Journal to start transaction on.
+ * @nblocks: number of block buffer we might modify
+ *
+ * We make sure that the transaction can guarantee at least nblocks of
+ * modified buffers in the log.  We block until the log can guarantee
+ * that much space.
+ *
+ * This function is visible to journal users (like ext3fs), so is not
+ * called with the journal already locked.
+ *
+ * Return a pointer to a newly allocated handle, or NULL on failure
+ */
+handle_t *journal_start(journal_t *journal, int nblocks)
+{
+	handle_t *handle = journal_current_handle();
+	int err;
+
+	if (!journal)
+		return ERR_PTR(-EROFS);
+
+	if (handle) {
+		J_ASSERT(handle->h_transaction->t_journal == journal);
+		handle->h_ref++;
+		return handle;
+	}
+
+	handle = new_handle(nblocks);
+	if (!handle)
+		return ERR_PTR(-ENOMEM);
+
+	current->journal_info = handle;
+
+	err = start_this_handle(journal, handle);
+	if (err < 0) {
+		jbd_free_handle(handle);
+		current->journal_info = NULL;
+		handle = ERR_PTR(err);
+	}
+	return handle;
+}
+
+/**
+ * int journal_extend() - extend buffer credits.
+ * @handle:  handle to 'extend'
+ * @nblocks: nr blocks to try to extend by.
+ *
+ * Some transactions, such as large extends and truncates, can be done
+ * atomically all at once or in several stages.  The operation requests
+ * a credit for a number of buffer modications in advance, but can
+ * extend its credit if it needs more.
+ *
+ * journal_extend tries to give the running handle more buffer credits.
+ * It does not guarantee that allocation - this is a best-effort only.
+ * The calling process MUST be able to deal cleanly with a failure to
+ * extend here.
+ *
+ * Return 0 on success, non-zero on failure.
+ *
+ * return code < 0 implies an error
+ * return code > 0 implies normal transaction-full status.
+ */
+int journal_extend(handle_t *handle, int nblocks)
+{
+	transaction_t *transaction = handle->h_transaction;
+	journal_t *journal = transaction->t_journal;
+	int result;
+	int wanted;
+
+	result = -EIO;
+	if (is_handle_aborted(handle))
+		goto out;
+
+	result = 1;
+
+	spin_lock(&journal->j_state_lock);
+
+	/* Don't extend a locked-down transaction! */
+	if (handle->h_transaction->t_state != T_RUNNING) {
+		jbd_debug(3, "denied handle %p %d blocks: "
+			  "transaction not running\n", handle, nblocks);
+		goto error_out;
+	}
+
+	spin_lock(&transaction->t_handle_lock);
+	wanted = transaction->t_outstanding_credits + nblocks;
+
+	if (wanted > journal->j_max_transaction_buffers) {
+		jbd_debug(3, "denied handle %p %d blocks: "
+			  "transaction too large\n", handle, nblocks);
+		goto unlock;
+	}
+
+	if (wanted > __log_space_left(journal)) {
+		jbd_debug(3, "denied handle %p %d blocks: "
+			  "insufficient log space\n", handle, nblocks);
+		goto unlock;
+	}
+
+	handle->h_buffer_credits += nblocks;
+	transaction->t_outstanding_credits += nblocks;
+	result = 0;
+
+	jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
+unlock:
+	spin_unlock(&transaction->t_handle_lock);
+error_out:
+	spin_unlock(&journal->j_state_lock);
+out:
+	return result;
+}
+
+
+/**
+ * int journal_restart() - restart a handle .
+ * @handle:  handle to restart
+ * @nblocks: nr credits requested
+ *
+ * Restart a handle for a multi-transaction filesystem
+ * operation.
+ *
+ * If the journal_extend() call above fails to grant new buffer credits
+ * to a running handle, a call to journal_restart will commit the
+ * handle's transaction so far and reattach the handle to a new
+ * transaction capabable of guaranteeing the requested number of
+ * credits.
+ */
+
+int journal_restart(handle_t *handle, int nblocks)
+{
+	transaction_t *transaction = handle->h_transaction;
+	journal_t *journal = transaction->t_journal;
+	int ret;
+
+	/* If we've had an abort of any type, don't even think about
+	 * actually doing the restart! */
+	if (is_handle_aborted(handle))
+		return 0;
+
+	/*
+	 * First unlink the handle from its current transaction, and start the
+	 * commit on that.
+	 */
+	J_ASSERT(transaction->t_updates > 0);
+	J_ASSERT(journal_current_handle() == handle);
+
+	spin_lock(&journal->j_state_lock);
+	spin_lock(&transaction->t_handle_lock);
+	transaction->t_outstanding_credits -= handle->h_buffer_credits;
+	transaction->t_updates--;
+
+	if (!transaction->t_updates)
+		wake_up(&journal->j_wait_updates);
+	spin_unlock(&transaction->t_handle_lock);
+
+	jbd_debug(2, "restarting handle %p\n", handle);
+	__log_start_commit(journal, transaction->t_tid);
+	spin_unlock(&journal->j_state_lock);
+
+	handle->h_buffer_credits = nblocks;
+	ret = start_this_handle(journal, handle);
+	return ret;
+}
+
+
+/**
+ * void journal_lock_updates () - establish a transaction barrier.
+ * @journal:  Journal to establish a barrier on.
+ *
+ * This locks out any further updates from being started, and blocks
+ * until all existing updates have completed, returning only once the
+ * journal is in a quiescent state with no updates running.
+ *
+ * The journal lock should not be held on entry.
+ */
+void journal_lock_updates(journal_t *journal)
+{
+	DEFINE_WAIT(wait);
+
+	spin_lock(&journal->j_state_lock);
+	++journal->j_barrier_count;
+
+	/* Wait until there are no running updates */
+	while (1) {
+		transaction_t *transaction = journal->j_running_transaction;
+
+		if (!transaction)
+			break;
+
+		spin_lock(&transaction->t_handle_lock);
+		if (!transaction->t_updates) {
+			spin_unlock(&transaction->t_handle_lock);
+			break;
+		}
+		prepare_to_wait(&journal->j_wait_updates, &wait,
+				TASK_UNINTERRUPTIBLE);
+		spin_unlock(&transaction->t_handle_lock);
+		spin_unlock(&journal->j_state_lock);
+		schedule();
+		finish_wait(&journal->j_wait_updates, &wait);
+		spin_lock(&journal->j_state_lock);
+	}
+	spin_unlock(&journal->j_state_lock);
+
+	/*
+	 * We have now established a barrier against other normal updates, but
+	 * we also need to barrier against other journal_lock_updates() calls
+	 * to make sure that we serialise special journal-locked operations
+	 * too.
+	 */
+	mutex_lock(&journal->j_barrier);
+}
+
+/**
+ * void journal_unlock_updates (journal_t* journal) - release barrier
+ * @journal:  Journal to release the barrier on.
+ *
+ * Release a transaction barrier obtained with journal_lock_updates().
+ *
+ * Should be called without the journal lock held.
+ */
+void journal_unlock_updates (journal_t *journal)
+{
+	J_ASSERT(journal->j_barrier_count != 0);
+
+	mutex_unlock(&journal->j_barrier);
+	spin_lock(&journal->j_state_lock);
+	--journal->j_barrier_count;
+	spin_unlock(&journal->j_state_lock);
+	wake_up(&journal->j_wait_transaction_locked);
+}
+
+/*
+ * Report any unexpected dirty buffers which turn up.  Normally those
+ * indicate an error, but they can occur if the user is running (say)
+ * tune2fs to modify the live filesystem, so we need the option of
+ * continuing as gracefully as possible.  #
+ *
+ * The caller should already hold the journal lock and
+ * j_list_lock spinlock: most callers will need those anyway
+ * in order to probe the buffer's journaling state safely.
+ */
+static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
+{
+	int jlist;
+
+	/* If this buffer is one which might reasonably be dirty
+	 * --- ie. data, or not part of this journal --- then
+	 * we're OK to leave it alone, but otherwise we need to
+	 * move the dirty bit to the journal's own internal
+	 * JBDDirty bit. */
+	jlist = jh->b_jlist;
+
+	if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
+	    jlist == BJ_Shadow || jlist == BJ_Forget) {
+		struct buffer_head *bh = jh2bh(jh);
+
+		if (test_clear_buffer_dirty(bh))
+			set_buffer_jbddirty(bh);
+	}
+}
+
+/*
+ * If the buffer is already part of the current transaction, then there
+ * is nothing we need to do.  If it is already part of a prior
+ * transaction which we are still committing to disk, then we need to
+ * make sure that we do not overwrite the old copy: we do copy-out to
+ * preserve the copy going to disk.  We also account the buffer against
+ * the handle's metadata buffer credits (unless the buffer is already
+ * part of the transaction, that is).
+ *
+ */
+static int
+do_get_write_access(handle_t *handle, struct journal_head *jh,
+			int force_copy)
+{
+	struct buffer_head *bh;
+	transaction_t *transaction;
+	journal_t *journal;
+	int error;
+	char *frozen_buffer = NULL;
+	int need_copy = 0;
+
+	if (is_handle_aborted(handle))
+		return -EROFS;
+
+	transaction = handle->h_transaction;
+	journal = transaction->t_journal;
+
+	jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy);
+
+	JBUFFER_TRACE(jh, "entry");
+repeat:
+	bh = jh2bh(jh);
+
+	/* @@@ Need to check for errors here at some point. */
+
+	lock_buffer(bh);
+	jbd_lock_bh_state(bh);
+
+	/* We now hold the buffer lock so it is safe to query the buffer
+	 * state.  Is the buffer dirty?
+	 *
+	 * If so, there are two possibilities.  The buffer may be
+	 * non-journaled, and undergoing a quite legitimate writeback.
+	 * Otherwise, it is journaled, and we don't expect dirty buffers
+	 * in that state (the buffers should be marked JBD_Dirty
+	 * instead.)  So either the IO is being done under our own
+	 * control and this is a bug, or it's a third party IO such as
+	 * dump(8) (which may leave the buffer scheduled for read ---
+	 * ie. locked but not dirty) or tune2fs (which may actually have
+	 * the buffer dirtied, ugh.)  */
+
+	if (buffer_dirty(bh)) {
+		/*
+		 * First question: is this buffer already part of the current
+		 * transaction or the existing committing transaction?
+		 */
+		if (jh->b_transaction) {
+			J_ASSERT_JH(jh,
+				jh->b_transaction == transaction ||
+				jh->b_transaction ==
+					journal->j_committing_transaction);
+			if (jh->b_next_transaction)
+				J_ASSERT_JH(jh, jh->b_next_transaction ==
+							transaction);
+		}
+		/*
+		 * In any case we need to clean the dirty flag and we must
+		 * do it under the buffer lock to be sure we don't race
+		 * with running write-out.
+		 */
+		JBUFFER_TRACE(jh, "Unexpected dirty buffer");
+		jbd_unexpected_dirty_buffer(jh);
+	}
+
+	unlock_buffer(bh);
+
+	error = -EROFS;
+	if (is_handle_aborted(handle)) {
+		jbd_unlock_bh_state(bh);
+		goto out;
+	}
+	error = 0;
+
+	/*
+	 * The buffer is already part of this transaction if b_transaction or
+	 * b_next_transaction points to it
+	 */
+	if (jh->b_transaction == transaction ||
+	    jh->b_next_transaction == transaction)
+		goto done;
+
+	/*
+	 * If there is already a copy-out version of this buffer, then we don't
+	 * need to make another one
+	 */
+	if (jh->b_frozen_data) {
+		JBUFFER_TRACE(jh, "has frozen data");
+		J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
+		jh->b_next_transaction = transaction;
+		goto done;
+	}
+
+	/* Is there data here we need to preserve? */
+
+	if (jh->b_transaction && jh->b_transaction != transaction) {
+		JBUFFER_TRACE(jh, "owned by older transaction");
+		J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
+		J_ASSERT_JH(jh, jh->b_transaction ==
+					journal->j_committing_transaction);
+
+		/* There is one case we have to be very careful about.
+		 * If the committing transaction is currently writing
+		 * this buffer out to disk and has NOT made a copy-out,
+		 * then we cannot modify the buffer contents at all
+		 * right now.  The essence of copy-out is that it is the
+		 * extra copy, not the primary copy, which gets
+		 * journaled.  If the primary copy is already going to
+		 * disk then we cannot do copy-out here. */
+
+		if (jh->b_jlist == BJ_Shadow) {
+			DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow);
+			wait_queue_head_t *wqh;
+
+			wqh = bit_waitqueue(&bh->b_state, BH_Unshadow);
+
+			JBUFFER_TRACE(jh, "on shadow: sleep");
+			jbd_unlock_bh_state(bh);
+			/* commit wakes up all shadow buffers after IO */
+			for ( ; ; ) {
+				prepare_to_wait(wqh, &wait.wait,
+						TASK_UNINTERRUPTIBLE);
+				if (jh->b_jlist != BJ_Shadow)
+					break;
+				schedule();
+			}
+			finish_wait(wqh, &wait.wait);
+			goto repeat;
+		}
+
+		/* Only do the copy if the currently-owning transaction
+		 * still needs it.  If it is on the Forget list, the
+		 * committing transaction is past that stage.  The
+		 * buffer had better remain locked during the kmalloc,
+		 * but that should be true --- we hold the journal lock
+		 * still and the buffer is already on the BUF_JOURNAL
+		 * list so won't be flushed.
+		 *
+		 * Subtle point, though: if this is a get_undo_access,
+		 * then we will be relying on the frozen_data to contain
+		 * the new value of the committed_data record after the
+		 * transaction, so we HAVE to force the frozen_data copy
+		 * in that case. */
+
+		if (jh->b_jlist != BJ_Forget || force_copy) {
+			JBUFFER_TRACE(jh, "generate frozen data");
+			if (!frozen_buffer) {
+				JBUFFER_TRACE(jh, "allocate memory for buffer");
+				jbd_unlock_bh_state(bh);
+				frozen_buffer =
+					jbd_slab_alloc(jh2bh(jh)->b_size,
+							 GFP_NOFS);
+				if (!frozen_buffer) {
+					printk(KERN_EMERG
+					       "%s: OOM for frozen_buffer\n",
+					       __FUNCTION__);
+					JBUFFER_TRACE(jh, "oom!");
+					error = -ENOMEM;
+					jbd_lock_bh_state(bh);
+					goto done;
+				}
+				goto repeat;
+			}
+			jh->b_frozen_data = frozen_buffer;
+			frozen_buffer = NULL;
+			need_copy = 1;
+		}
+		jh->b_next_transaction = transaction;
+	}
+
+
+	/*
+	 * Finally, if the buffer is not journaled right now, we need to make
+	 * sure it doesn't get written to disk before the caller actually
+	 * commits the new data
+	 */
+	if (!jh->b_transaction) {
+		JBUFFER_TRACE(jh, "no transaction");
+		J_ASSERT_JH(jh, !jh->b_next_transaction);
+		jh->b_transaction = transaction;
+		JBUFFER_TRACE(jh, "file as BJ_Reserved");
+		spin_lock(&journal->j_list_lock);
+		__journal_file_buffer(jh, transaction, BJ_Reserved);
+		spin_unlock(&journal->j_list_lock);
+	}
+
+done:
+	if (need_copy) {
+		struct page *page;
+		int offset;
+		char *source;
+
+		J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)),
+			    "Possible IO failure.\n");
+		page = jh2bh(jh)->b_page;
+		offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
+		source = kmap_atomic(page, KM_USER0);
+		memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
+		kunmap_atomic(source, KM_USER0);
+	}
+	jbd_unlock_bh_state(bh);
+
+	/*
+	 * If we are about to journal a buffer, then any revoke pending on it is
+	 * no longer valid
+	 */
+	journal_cancel_revoke(handle, jh);
+
+out:
+	if (unlikely(frozen_buffer))	/* It's usually NULL */
+		jbd_slab_free(frozen_buffer, bh->b_size);
+
+	JBUFFER_TRACE(jh, "exit");
+	return error;
+}
+
+/**
+ * int journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
+ * @handle: transaction to add buffer modifications to
+ * @bh:     bh to be used for metadata writes
+ * @credits: variable that will receive credits for the buffer
+ *
+ * Returns an error code or 0 on success.
+ *
+ * In full data journalling mode the buffer may be of type BJ_AsyncData,
+ * because we're write()ing a buffer which is also part of a shared mapping.
+ */
+
+int journal_get_write_access(handle_t *handle, struct buffer_head *bh)
+{
+	struct journal_head *jh = journal_add_journal_head(bh);
+	int rc;
+
+	/* We do not want to get caught playing with fields which the
+	 * log thread also manipulates.  Make sure that the buffer
+	 * completes any outstanding IO before proceeding. */
+	rc = do_get_write_access(handle, jh, 0);
+	journal_put_journal_head(jh);
+	return rc;
+}
+
+
+/*
+ * When the user wants to journal a newly created buffer_head
+ * (ie. getblk() returned a new buffer and we are going to populate it
+ * manually rather than reading off disk), then we need to keep the
+ * buffer_head locked until it has been completely filled with new
+ * data.  In this case, we should be able to make the assertion that
+ * the bh is not already part of an existing transaction.
+ *
+ * The buffer should already be locked by the caller by this point.
+ * There is no lock ranking violation: it was a newly created,
+ * unlocked buffer beforehand. */
+
+/**
+ * int journal_get_create_access () - notify intent to use newly created bh
+ * @handle: transaction to new buffer to
+ * @bh: new buffer.
+ *
+ * Call this if you create a new bh.
+ */
+int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
+{
+	transaction_t *transaction = handle->h_transaction;
+	journal_t *journal = transaction->t_journal;
+	struct journal_head *jh = journal_add_journal_head(bh);
+	int err;
+
+	jbd_debug(5, "journal_head %p\n", jh);
+	err = -EROFS;
+	if (is_handle_aborted(handle))
+		goto out;
+	err = 0;
+
+	JBUFFER_TRACE(jh, "entry");
+	/*
+	 * The buffer may already belong to this transaction due to pre-zeroing
+	 * in the filesystem's new_block code.  It may also be on the previous,
+	 * committing transaction's lists, but it HAS to be in Forget state in
+	 * that case: the transaction must have deleted the buffer for it to be
+	 * reused here.
+	 */
+	jbd_lock_bh_state(bh);
+	spin_lock(&journal->j_list_lock);
+	J_ASSERT_JH(jh, (jh->b_transaction == transaction ||
+		jh->b_transaction == NULL ||
+		(jh->b_transaction == journal->j_committing_transaction &&
+			  jh->b_jlist == BJ_Forget)));
+
+	J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
+	J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
+
+	if (jh->b_transaction == NULL) {
+		jh->b_transaction = transaction;
+		JBUFFER_TRACE(jh, "file as BJ_Reserved");
+		__journal_file_buffer(jh, transaction, BJ_Reserved);
+	} else if (jh->b_transaction == journal->j_committing_transaction) {
+		JBUFFER_TRACE(jh, "set next transaction");
+		jh->b_next_transaction = transaction;
+	}
+	spin_unlock(&journal->j_list_lock);
+	jbd_unlock_bh_state(bh);
+
+	/*
+	 * akpm: I added this.  ext3_alloc_branch can pick up new indirect
+	 * blocks which contain freed but then revoked metadata.  We need
+	 * to cancel the revoke in case we end up freeing it yet again
+	 * and the reallocating as data - this would cause a second revoke,
+	 * which hits an assertion error.
+	 */
+	JBUFFER_TRACE(jh, "cancelling revoke");
+	journal_cancel_revoke(handle, jh);
+	journal_put_journal_head(jh);
+out:
+	return err;
+}
+
+/**
+ * int journal_get_undo_access() -  Notify intent to modify metadata with
+ *     non-rewindable consequences
+ * @handle: transaction
+ * @bh: buffer to undo
+ * @credits: store the number of taken credits here (if not NULL)
+ *
+ * Sometimes there is a need to distinguish between metadata which has
+ * been committed to disk and that which has not.  The ext3fs code uses
+ * this for freeing and allocating space, we have to make sure that we
+ * do not reuse freed space until the deallocation has been committed,
+ * since if we overwrote that space we would make the delete
+ * un-rewindable in case of a crash.
+ *
+ * To deal with that, journal_get_undo_access requests write access to a
+ * buffer for parts of non-rewindable operations such as delete
+ * operations on the bitmaps.  The journaling code must keep a copy of
+ * the buffer's contents prior to the undo_access call until such time
+ * as we know that the buffer has definitely been committed to disk.
+ *
+ * We never need to know which transaction the committed data is part
+ * of, buffers touched here are guaranteed to be dirtied later and so
+ * will be committed to a new transaction in due course, at which point
+ * we can discard the old committed data pointer.
+ *
+ * Returns error number or 0 on success.
+ */
+int journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
+{
+	int err;
+	struct journal_head *jh = journal_add_journal_head(bh);
+	char *committed_data = NULL;
+
+	JBUFFER_TRACE(jh, "entry");
+
+	/*
+	 * Do this first --- it can drop the journal lock, so we want to
+	 * make sure that obtaining the committed_data is done
+	 * atomically wrt. completion of any outstanding commits.
+	 */
+	err = do_get_write_access(handle, jh, 1);
+	if (err)
+		goto out;
+
+repeat:
+	if (!jh->b_committed_data) {
+		committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
+		if (!committed_data) {
+			printk(KERN_EMERG "%s: No memory for committed data\n",
+				__FUNCTION__);
+			err = -ENOMEM;
+			goto out;
+		}
+	}
+
+	jbd_lock_bh_state(bh);
+	if (!jh->b_committed_data) {
+		/* Copy out the current buffer contents into the
+		 * preserved, committed copy. */
+		JBUFFER_TRACE(jh, "generate b_committed data");
+		if (!committed_data) {
+			jbd_unlock_bh_state(bh);
+			goto repeat;
+		}
+
+		jh->b_committed_data = committed_data;
+		committed_data = NULL;
+		memcpy(jh->b_committed_data, bh->b_data, bh->b_size);
+	}
+	jbd_unlock_bh_state(bh);
+out:
+	journal_put_journal_head(jh);
+	if (unlikely(committed_data))
+		jbd_slab_free(committed_data, bh->b_size);
+	return err;
+}
+
+/**
+ * int journal_dirty_data() -  mark a buffer as containing dirty data which
+ *                             needs to be flushed before we can commit the
+ *                             current transaction.
+ * @handle: transaction
+ * @bh: bufferhead to mark
+ *
+ * The buffer is placed on the transaction's data list and is marked as
+ * belonging to the transaction.
+ *
+ * Returns error number or 0 on success.
+ *
+ * journal_dirty_data() can be called via page_launder->ext3_writepage
+ * by kswapd.
+ */
+int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
+{
+	journal_t *journal = handle->h_transaction->t_journal;
+	int need_brelse = 0;
+	struct journal_head *jh;
+
+	if (is_handle_aborted(handle))
+		return 0;
+
+	jh = journal_add_journal_head(bh);
+	JBUFFER_TRACE(jh, "entry");
+
+	/*
+	 * The buffer could *already* be dirty.  Writeout can start
+	 * at any time.
+	 */
+	jbd_debug(4, "jh: %p, tid:%d\n", jh, handle->h_transaction->t_tid);
+
+	/*
+	 * What if the buffer is already part of a running transaction?
+	 *
+	 * There are two cases:
+	 * 1) It is part of the current running transaction.  Refile it,
+	 *    just in case we have allocated it as metadata, deallocated
+	 *    it, then reallocated it as data.
+	 * 2) It is part of the previous, still-committing transaction.
+	 *    If all we want to do is to guarantee that the buffer will be
+	 *    written to disk before this new transaction commits, then
+	 *    being sure that the *previous* transaction has this same
+	 *    property is sufficient for us!  Just leave it on its old
+	 *    transaction.
+	 *
+	 * In case (2), the buffer must not already exist as metadata
+	 * --- that would violate write ordering (a transaction is free
+	 * to write its data at any point, even before the previous
+	 * committing transaction has committed).  The caller must
+	 * never, ever allow this to happen: there's nothing we can do
+	 * about it in this layer.
+	 */
+	jbd_lock_bh_state(bh);
+	spin_lock(&journal->j_list_lock);
+	if (jh->b_transaction) {
+		JBUFFER_TRACE(jh, "has transaction");
+		if (jh->b_transaction != handle->h_transaction) {
+			JBUFFER_TRACE(jh, "belongs to older transaction");
+			J_ASSERT_JH(jh, jh->b_transaction ==
+					journal->j_committing_transaction);
+
+			/* @@@ IS THIS TRUE  ? */
+			/*
+			 * Not any more.  Scenario: someone does a write()
+			 * in data=journal mode.  The buffer's transaction has
+			 * moved into commit.  Then someone does another
+			 * write() to the file.  We do the frozen data copyout
+			 * and set b_next_transaction to point to j_running_t.
+			 * And while we're in that state, someone does a
+			 * writepage() in an attempt to pageout the same area
+			 * of the file via a shared mapping.  At present that
+			 * calls journal_dirty_data(), and we get right here.
+			 * It may be too late to journal the data.  Simply
+			 * falling through to the next test will suffice: the
+			 * data will be dirty and wil be checkpointed.  The
+			 * ordering comments in the next comment block still
+			 * apply.
+			 */
+			//J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
+
+			/*
+			 * If we're journalling data, and this buffer was
+			 * subject to a write(), it could be metadata, forget
+			 * or shadow against the committing transaction.  Now,
+			 * someone has dirtied the same darn page via a mapping
+			 * and it is being writepage()'d.
+			 * We *could* just steal the page from commit, with some
+			 * fancy locking there.  Instead, we just skip it -
+			 * don't tie the page's buffers to the new transaction
+			 * at all.
+			 * Implication: if we crash before the writepage() data
+			 * is written into the filesystem, recovery will replay
+			 * the write() data.
+			 */
+			if (jh->b_jlist != BJ_None &&
+					jh->b_jlist != BJ_SyncData &&
+					jh->b_jlist != BJ_Locked) {
+				JBUFFER_TRACE(jh, "Not stealing");
+				goto no_journal;
+			}
+
+			/*
+			 * This buffer may be undergoing writeout in commit.  We
+			 * can't return from here and let the caller dirty it
+			 * again because that can cause the write-out loop in
+			 * commit to never terminate.
+			 */
+			if (buffer_dirty(bh)) {
+				get_bh(bh);
+				spin_unlock(&journal->j_list_lock);
+				jbd_unlock_bh_state(bh);
+				need_brelse = 1;
+				sync_dirty_buffer(bh);
+				jbd_lock_bh_state(bh);
+				spin_lock(&journal->j_list_lock);
+				/* The buffer may become locked again at any
+				   time if it is redirtied */
+			}
+
+			/* journal_clean_data_list() may have got there first */
+			if (jh->b_transaction != NULL) {
+				JBUFFER_TRACE(jh, "unfile from commit");
+				__journal_temp_unlink_buffer(jh);
+				/* It still points to the committing
+				 * transaction; move it to this one so
+				 * that the refile assert checks are
+				 * happy. */
+				jh->b_transaction = handle->h_transaction;
+			}
+			/* The buffer will be refiled below */
+
+		}
+		/*
+		 * Special case --- the buffer might actually have been
+		 * allocated and then immediately deallocated in the previous,
+		 * committing transaction, so might still be left on that
+		 * transaction's metadata lists.
+		 */
+		if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) {
+			JBUFFER_TRACE(jh, "not on correct data list: unfile");
+			J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
+			__journal_temp_unlink_buffer(jh);
+			jh->b_transaction = handle->h_transaction;
+			JBUFFER_TRACE(jh, "file as data");
+			__journal_file_buffer(jh, handle->h_transaction,
+						BJ_SyncData);
+		}
+	} else {
+		JBUFFER_TRACE(jh, "not on a transaction");
+		__journal_file_buffer(jh, handle->h_transaction, BJ_SyncData);
+	}
+no_journal:
+	spin_unlock(&journal->j_list_lock);
+	jbd_unlock_bh_state(bh);
+	if (need_brelse) {
+		BUFFER_TRACE(bh, "brelse");
+		__brelse(bh);
+	}
+	JBUFFER_TRACE(jh, "exit");
+	journal_put_journal_head(jh);
+	return 0;
+}
+
+/**
+ * int journal_dirty_metadata() -  mark a buffer as containing dirty metadata
+ * @handle: transaction to add buffer to.
+ * @bh: buffer to mark
+ *
+ * mark dirty metadata which needs to be journaled as part of the current
+ * transaction.
+ *
+ * The buffer is placed on the transaction's metadata list and is marked
+ * as belonging to the transaction.
+ *
+ * Returns error number or 0 on success.
+ *
+ * Special care needs to be taken if the buffer already belongs to the
+ * current committing transaction (in which case we should have frozen
+ * data present for that commit).  In that case, we don't relink the
+ * buffer: that only gets done when the old transaction finally
+ * completes its commit.
+ */
+int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
+{
+	transaction_t *transaction = handle->h_transaction;
+	journal_t *journal = transaction->t_journal;
+	struct journal_head *jh = bh2jh(bh);
+
+	jbd_debug(5, "journal_head %p\n", jh);
+	JBUFFER_TRACE(jh, "entry");
+	if (is_handle_aborted(handle))
+		goto out;
+
+	jbd_lock_bh_state(bh);
+
+	if (jh->b_modified == 0) {
+		/*
+		 * This buffer's got modified and becoming part
+		 * of the transaction. This needs to be done
+		 * once a transaction -bzzz
+		 */
+		jh->b_modified = 1;
+		J_ASSERT_JH(jh, handle->h_buffer_credits > 0);
+		handle->h_buffer_credits--;
+	}
+
+	/*
+	 * fastpath, to avoid expensive locking.  If this buffer is already
+	 * on the running transaction's metadata list there is nothing to do.
+	 * Nobody can take it off again because there is a handle open.
+	 * I _think_ we're OK here with SMP barriers - a mistaken decision will
+	 * result in this test being false, so we go in and take the locks.
+	 */
+	if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
+		JBUFFER_TRACE(jh, "fastpath");
+		J_ASSERT_JH(jh, jh->b_transaction ==
+					journal->j_running_transaction);
+		goto out_unlock_bh;
+	}
+
+	set_buffer_jbddirty(bh);
+
+	/*
+	 * Metadata already on the current transaction list doesn't
+	 * need to be filed.  Metadata on another transaction's list must
+	 * be committing, and will be refiled once the commit completes:
+	 * leave it alone for now.
+	 */
+	if (jh->b_transaction != transaction) {
+		JBUFFER_TRACE(jh, "already on other transaction");
+		J_ASSERT_JH(jh, jh->b_transaction ==
+					journal->j_committing_transaction);
+		J_ASSERT_JH(jh, jh->b_next_transaction == transaction);
+		/* And this case is illegal: we can't reuse another
+		 * transaction's data buffer, ever. */
+		goto out_unlock_bh;
+	}
+
+	/* That test should have eliminated the following case: */
+	J_ASSERT_JH(jh, jh->b_frozen_data == 0);
+
+	JBUFFER_TRACE(jh, "file as BJ_Metadata");
+	spin_lock(&journal->j_list_lock);
+	__journal_file_buffer(jh, handle->h_transaction, BJ_Metadata);
+	spin_unlock(&journal->j_list_lock);
+out_unlock_bh:
+	jbd_unlock_bh_state(bh);
+out:
+	JBUFFER_TRACE(jh, "exit");
+	return 0;
+}
+
+/*
+ * journal_release_buffer: undo a get_write_access without any buffer
+ * updates, if the update decided in the end that it didn't need access.
+ *
+ */
+void
+journal_release_buffer(handle_t *handle, struct buffer_head *bh)
+{
+	BUFFER_TRACE(bh, "entry");
+}
+
+/**
+ * void journal_forget() - bforget() for potentially-journaled buffers.
+ * @handle: transaction handle
+ * @bh:     bh to 'forget'
+ *
+ * We can only do the bforget if there are no commits pending against the
+ * buffer.  If the buffer is dirty in the current running transaction we
+ * can safely unlink it.
+ *
+ * bh may not be a journalled buffer at all - it may be a non-JBD
+ * buffer which came off the hashtable.  Check for this.
+ *
+ * Decrements bh->b_count by one.
+ *
+ * Allow this call even if the handle has aborted --- it may be part of
+ * the caller's cleanup after an abort.
+ */
+int journal_forget (handle_t *handle, struct buffer_head *bh)
+{
+	transaction_t *transaction = handle->h_transaction;
+	journal_t *journal = transaction->t_journal;
+	struct journal_head *jh;
+	int drop_reserve = 0;
+	int err = 0;
+
+	BUFFER_TRACE(bh, "entry");
+
+	jbd_lock_bh_state(bh);
+	spin_lock(&journal->j_list_lock);
+
+	if (!buffer_jbd(bh))
+		goto not_jbd;
+	jh = bh2jh(bh);
+
+	/* Critical error: attempting to delete a bitmap buffer, maybe?
+	 * Don't do any jbd operations, and return an error. */
+	if (!J_EXPECT_JH(jh, !jh->b_committed_data,
+			 "inconsistent data on disk")) {
+		err = -EIO;
+		goto not_jbd;
+	}
+
+	/*
+	 * The buffer's going from the transaction, we must drop
+	 * all references -bzzz
+	 */
+	jh->b_modified = 0;
+
+	if (jh->b_transaction == handle->h_transaction) {
+		J_ASSERT_JH(jh, !jh->b_frozen_data);
+
+		/* If we are forgetting a buffer which is already part
+		 * of this transaction, then we can just drop it from
+		 * the transaction immediately. */
+		clear_buffer_dirty(bh);
+		clear_buffer_jbddirty(bh);
+
+		JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
+
+		drop_reserve = 1;
+
+		/*
+		 * We are no longer going to journal this buffer.
+		 * However, the commit of this transaction is still
+		 * important to the buffer: the delete that we are now
+		 * processing might obsolete an old log entry, so by
+		 * committing, we can satisfy the buffer's checkpoint.
+		 *
+		 * So, if we have a checkpoint on the buffer, we should
+		 * now refile the buffer on our BJ_Forget list so that
+		 * we know to remove the checkpoint after we commit.
+		 */
+
+		if (jh->b_cp_transaction) {
+			__journal_temp_unlink_buffer(jh);
+			__journal_file_buffer(jh, transaction, BJ_Forget);
+		} else {
+			__journal_unfile_buffer(jh);
+			journal_remove_journal_head(bh);
+			__brelse(bh);
+			if (!buffer_jbd(bh)) {
+				spin_unlock(&journal->j_list_lock);
+				jbd_unlock_bh_state(bh);
+				__bforget(bh);
+				goto drop;
+			}
+		}
+	} else if (jh->b_transaction) {
+		J_ASSERT_JH(jh, (jh->b_transaction ==
+				 journal->j_committing_transaction));
+		/* However, if the buffer is still owned by a prior
+		 * (committing) transaction, we can't drop it yet... */
+		JBUFFER_TRACE(jh, "belongs to older transaction");
+		/* ... but we CAN drop it from the new transaction if we
+		 * have also modified it since the original commit. */
+
+		if (jh->b_next_transaction) {
+			J_ASSERT(jh->b_next_transaction == transaction);
+			jh->b_next_transaction = NULL;
+			drop_reserve = 1;
+		}
+	}
+
+not_jbd:
+	spin_unlock(&journal->j_list_lock);
+	jbd_unlock_bh_state(bh);
+	__brelse(bh);
+drop:
+	if (drop_reserve) {
+		/* no need to reserve log space for this block -bzzz */
+		handle->h_buffer_credits++;
+	}
+	return err;
+}
+
+/**
+ * int journal_stop() - complete a transaction
+ * @handle: tranaction to complete.
+ *
+ * All done for a particular handle.
+ *
+ * There is not much action needed here.  We just return any remaining
+ * buffer credits to the transaction and remove the handle.  The only
+ * complication is that we need to start a commit operation if the
+ * filesystem is marked for synchronous update.
+ *
+ * journal_stop itself will not usually return an error, but it may
+ * do so in unusual circumstances.  In particular, expect it to
+ * return -EIO if a journal_abort has been executed since the
+ * transaction began.
+ */
+int journal_stop(handle_t *handle)
+{
+	transaction_t *transaction = handle->h_transaction;
+	journal_t *journal = transaction->t_journal;
+	int old_handle_count, err;
+	pid_t pid;
+
+	J_ASSERT(transaction->t_updates > 0);
+	J_ASSERT(journal_current_handle() == handle);
+
+	if (is_handle_aborted(handle))
+		err = -EIO;
+	else
+		err = 0;
+
+	if (--handle->h_ref > 0) {
+		jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
+			  handle->h_ref);
+		return err;
+	}
+
+	jbd_debug(4, "Handle %p going down\n", handle);
+
+	/*
+	 * Implement synchronous transaction batching.  If the handle
+	 * was synchronous, don't force a commit immediately.  Let's
+	 * yield and let another thread piggyback onto this transaction.
+	 * Keep doing that while new threads continue to arrive.
+	 * It doesn't cost much - we're about to run a commit and sleep
+	 * on IO anyway.  Speeds up many-threaded, many-dir operations
+	 * by 30x or more...
+	 *
+	 * But don't do this if this process was the most recent one to
+	 * perform a synchronous write.  We do this to detect the case where a
+	 * single process is doing a stream of sync writes.  No point in waiting
+	 * for joiners in that case.
+	 */
+	pid = current->pid;
+	if (handle->h_sync && journal->j_last_sync_writer != pid) {
+		journal->j_last_sync_writer = pid;
+		do {
+			old_handle_count = transaction->t_handle_count;
+			schedule_timeout_uninterruptible(1);
+		} while (old_handle_count != transaction->t_handle_count);
+	}
+
+	current->journal_info = NULL;
+	spin_lock(&journal->j_state_lock);
+	spin_lock(&transaction->t_handle_lock);
+	transaction->t_outstanding_credits -= handle->h_buffer_credits;
+	transaction->t_updates--;
+	if (!transaction->t_updates) {
+		wake_up(&journal->j_wait_updates);
+		if (journal->j_barrier_count)
+			wake_up(&journal->j_wait_transaction_locked);
+	}
+
+	/*
+	 * If the handle is marked SYNC, we need to set another commit
+	 * going!  We also want to force a commit if the current
+	 * transaction is occupying too much of the log, or if the
+	 * transaction is too old now.
+	 */
+	if (handle->h_sync ||
+			transaction->t_outstanding_credits >
+				journal->j_max_transaction_buffers ||
+			time_after_eq(jiffies, transaction->t_expires)) {
+		/* Do this even for aborted journals: an abort still
+		 * completes the commit thread, it just doesn't write
+		 * anything to disk. */
+		tid_t tid = transaction->t_tid;
+
+		spin_unlock(&transaction->t_handle_lock);
+		jbd_debug(2, "transaction too old, requesting commit for "
+					"handle %p\n", handle);
+		/* This is non-blocking */
+		__log_start_commit(journal, transaction->t_tid);
+		spin_unlock(&journal->j_state_lock);
+
+		/*
+		 * Special case: JFS_SYNC synchronous updates require us
+		 * to wait for the commit to complete.
+		 */
+		if (handle->h_sync && !(current->flags & PF_MEMALLOC))
+			err = log_wait_commit(journal, tid);
+	} else {
+		spin_unlock(&transaction->t_handle_lock);
+		spin_unlock(&journal->j_state_lock);
+	}
+
+	jbd_free_handle(handle);
+	return err;
+}
+
+/**int journal_force_commit() - force any uncommitted transactions
+ * @journal: journal to force
+ *
+ * For synchronous operations: force any uncommitted transactions
+ * to disk.  May seem kludgy, but it reuses all the handle batching
+ * code in a very simple manner.
+ */
+int journal_force_commit(journal_t *journal)
+{
+	handle_t *handle;
+	int ret;
+
+	handle = journal_start(journal, 1);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+	} else {
+		handle->h_sync = 1;
+		ret = journal_stop(handle);
+	}
+	return ret;
+}
+
+/*
+ *
+ * List management code snippets: various functions for manipulating the
+ * transaction buffer lists.
+ *
+ */
+
+/*
+ * Append a buffer to a transaction list, given the transaction's list head
+ * pointer.
+ *
+ * j_list_lock is held.
+ *
+ * jbd_lock_bh_state(jh2bh(jh)) is held.
+ */
+
+static inline void
+__blist_add_buffer(struct journal_head **list, struct journal_head *jh)
+{
+	if (!*list) {
+		jh->b_tnext = jh->b_tprev = jh;
+		*list = jh;
+	} else {
+		/* Insert at the tail of the list to preserve order */
+		struct journal_head *first = *list, *last = first->b_tprev;
+		jh->b_tprev = last;
+		jh->b_tnext = first;
+		last->b_tnext = first->b_tprev = jh;
+	}
+}
+
+/*
+ * Remove a buffer from a transaction list, given the transaction's list
+ * head pointer.
+ *
+ * Called with j_list_lock held, and the journal may not be locked.
+ *
+ * jbd_lock_bh_state(jh2bh(jh)) is held.
+ */
+
+static inline void
+__blist_del_buffer(struct journal_head **list, struct journal_head *jh)
+{
+	if (*list == jh) {
+		*list = jh->b_tnext;
+		if (*list == jh)
+			*list = NULL;
+	}
+	jh->b_tprev->b_tnext = jh->b_tnext;
+	jh->b_tnext->b_tprev = jh->b_tprev;
+}
+
+/*
+ * Remove a buffer from the appropriate transaction list.
+ *
+ * Note that this function can *change* the value of
+ * bh->b_transaction->t_sync_datalist, t_buffers, t_forget,
+ * t_iobuf_list, t_shadow_list, t_log_list or t_reserved_list.  If the caller
+ * is holding onto a copy of one of thee pointers, it could go bad.
+ * Generally the caller needs to re-read the pointer from the transaction_t.
+ *
+ * Called under j_list_lock.  The journal may not be locked.
+ */
+void __journal_temp_unlink_buffer(struct journal_head *jh)
+{
+	struct journal_head **list = NULL;
+	transaction_t *transaction;
+	struct buffer_head *bh = jh2bh(jh);
+
+	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
+	transaction = jh->b_transaction;
+	if (transaction)
+		assert_spin_locked(&transaction->t_journal->j_list_lock);
+
+	J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
+	if (jh->b_jlist != BJ_None)
+		J_ASSERT_JH(jh, transaction != 0);
+
+	switch (jh->b_jlist) {
+	case BJ_None:
+		return;
+	case BJ_SyncData:
+		list = &transaction->t_sync_datalist;
+		break;
+	case BJ_Metadata:
+		transaction->t_nr_buffers--;
+		J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0);
+		list = &transaction->t_buffers;
+		break;
+	case BJ_Forget:
+		list = &transaction->t_forget;
+		break;
+	case BJ_IO:
+		list = &transaction->t_iobuf_list;
+		break;
+	case BJ_Shadow:
+		list = &transaction->t_shadow_list;
+		break;
+	case BJ_LogCtl:
+		list = &transaction->t_log_list;
+		break;
+	case BJ_Reserved:
+		list = &transaction->t_reserved_list;
+		break;
+	case BJ_Locked:
+		list = &transaction->t_locked_list;
+		break;
+	}
+
+	__blist_del_buffer(list, jh);
+	jh->b_jlist = BJ_None;
+	if (test_clear_buffer_jbddirty(bh))
+		mark_buffer_dirty(bh);	/* Expose it to the VM */
+}
+
+void __journal_unfile_buffer(struct journal_head *jh)
+{
+	__journal_temp_unlink_buffer(jh);
+	jh->b_transaction = NULL;
+}
+
+void journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
+{
+	jbd_lock_bh_state(jh2bh(jh));
+	spin_lock(&journal->j_list_lock);
+	__journal_unfile_buffer(jh);
+	spin_unlock(&journal->j_list_lock);
+	jbd_unlock_bh_state(jh2bh(jh));
+}
+
+/*
+ * Called from journal_try_to_free_buffers().
+ *
+ * Called under jbd_lock_bh_state(bh)
+ */
+static void
+__journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
+{
+	struct journal_head *jh;
+
+	jh = bh2jh(bh);
+
+	if (buffer_locked(bh) || buffer_dirty(bh))
+		goto out;
+
+	if (jh->b_next_transaction != 0)
+		goto out;
+
+	spin_lock(&journal->j_list_lock);
+	if (jh->b_transaction != 0 && jh->b_cp_transaction == 0) {
+		if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) {
+			/* A written-back ordered data buffer */
+			JBUFFER_TRACE(jh, "release data");
+			__journal_unfile_buffer(jh);
+			journal_remove_journal_head(bh);
+			__brelse(bh);
+		}
+	} else if (jh->b_cp_transaction != 0 && jh->b_transaction == 0) {
+		/* written-back checkpointed metadata buffer */
+		if (jh->b_jlist == BJ_None) {
+			JBUFFER_TRACE(jh, "remove from checkpoint list");
+			__journal_remove_checkpoint(jh);
+			journal_remove_journal_head(bh);
+			__brelse(bh);
+		}
+	}
+	spin_unlock(&journal->j_list_lock);
+out:
+	return;
+}
+
+
+/**
+ * int journal_try_to_free_buffers() - try to free page buffers.
+ * @journal: journal for operation
+ * @page: to try and free
+ * @unused_gfp_mask: unused
+ *
+ *
+ * For all the buffers on this page,
+ * if they are fully written out ordered data, move them onto BUF_CLEAN
+ * so try_to_free_buffers() can reap them.
+ *
+ * This function returns non-zero if we wish try_to_free_buffers()
+ * to be called. We do this if the page is releasable by try_to_free_buffers().
+ * We also do it if the page has locked or dirty buffers and the caller wants
+ * us to perform sync or async writeout.
+ *
+ * This complicates JBD locking somewhat.  We aren't protected by the
+ * BKL here.  We wish to remove the buffer from its committing or
+ * running transaction's ->t_datalist via __journal_unfile_buffer.
+ *
+ * This may *change* the value of transaction_t->t_datalist, so anyone
+ * who looks at t_datalist needs to lock against this function.
+ *
+ * Even worse, someone may be doing a journal_dirty_data on this
+ * buffer.  So we need to lock against that.  journal_dirty_data()
+ * will come out of the lock with the buffer dirty, which makes it
+ * ineligible for release here.
+ *
+ * Who else is affected by this?  hmm...  Really the only contender
+ * is do_get_write_access() - it could be looking at the buffer while
+ * journal_try_to_free_buffer() is changing its state.  But that
+ * cannot happen because we never reallocate freed data as metadata
+ * while the data is part of a transaction.  Yes?
+ */
+int journal_try_to_free_buffers(journal_t *journal,
+				struct page *page, gfp_t unused_gfp_mask)
+{
+	struct buffer_head *head;
+	struct buffer_head *bh;
+	int ret = 0;
+
+	J_ASSERT(PageLocked(page));
+
+	head = page_buffers(page);
+	bh = head;
+	do {
+		struct journal_head *jh;
+
+		/*
+		 * We take our own ref against the journal_head here to avoid
+		 * having to add tons of locking around each instance of
+		 * journal_remove_journal_head() and journal_put_journal_head().
+		 */
+		jh = journal_grab_journal_head(bh);
+		if (!jh)
+			continue;
+
+		jbd_lock_bh_state(bh);
+		__journal_try_to_free_buffer(journal, bh);
+		journal_put_journal_head(jh);
+		jbd_unlock_bh_state(bh);
+		if (buffer_jbd(bh))
+			goto busy;
+	} while ((bh = bh->b_this_page) != head);
+	ret = try_to_free_buffers(page);
+busy:
+	return ret;
+}
+
+/*
+ * This buffer is no longer needed.  If it is on an older transaction's
+ * checkpoint list we need to record it on this transaction's forget list
+ * to pin this buffer (and hence its checkpointing transaction) down until
+ * this transaction commits.  If the buffer isn't on a checkpoint list, we
+ * release it.
+ * Returns non-zero if JBD no longer has an interest in the buffer.
+ *
+ * Called under j_list_lock.
+ *
+ * Called under jbd_lock_bh_state(bh).
+ */
+static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
+{
+	int may_free = 1;
+	struct buffer_head *bh = jh2bh(jh);
+
+	__journal_unfile_buffer(jh);
+
+	if (jh->b_cp_transaction) {
+		JBUFFER_TRACE(jh, "on running+cp transaction");
+		__journal_file_buffer(jh, transaction, BJ_Forget);
+		clear_buffer_jbddirty(bh);
+		may_free = 0;
+	} else {
+		JBUFFER_TRACE(jh, "on running transaction");
+		journal_remove_journal_head(bh);
+		__brelse(bh);
+	}
+	return may_free;
+}
+
+/*
+ * journal_invalidatepage
+ *
+ * This code is tricky.  It has a number of cases to deal with.
+ *
+ * There are two invariants which this code relies on:
+ *
+ * i_size must be updated on disk before we start calling invalidatepage on the
+ * data.
+ *
+ *  This is done in ext3 by defining an ext3_setattr method which
+ *  updates i_size before truncate gets going.  By maintaining this
+ *  invariant, we can be sure that it is safe to throw away any buffers
+ *  attached to the current transaction: once the transaction commits,
+ *  we know that the data will not be needed.
+ *
+ *  Note however that we can *not* throw away data belonging to the
+ *  previous, committing transaction!
+ *
+ * Any disk blocks which *are* part of the previous, committing
+ * transaction (and which therefore cannot be discarded immediately) are
+ * not going to be reused in the new running transaction
+ *
+ *  The bitmap committed_data images guarantee this: any block which is
+ *  allocated in one transaction and removed in the next will be marked
+ *  as in-use in the committed_data bitmap, so cannot be reused until
+ *  the next transaction to delete the block commits.  This means that
+ *  leaving committing buffers dirty is quite safe: the disk blocks
+ *  cannot be reallocated to a different file and so buffer aliasing is
+ *  not possible.
+ *
+ *
+ * The above applies mainly to ordered data mode.  In writeback mode we
+ * don't make guarantees about the order in which data hits disk --- in
+ * particular we don't guarantee that new dirty data is flushed before
+ * transaction commit --- so it is always safe just to discard data
+ * immediately in that mode.  --sct
+ */
+
+/*
+ * The journal_unmap_buffer helper function returns zero if the buffer
+ * concerned remains pinned as an anonymous buffer belonging to an older
+ * transaction.
+ *
+ * We're outside-transaction here.  Either or both of j_running_transaction
+ * and j_committing_transaction may be NULL.
+ */
+static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
+{
+	transaction_t *transaction;
+	struct journal_head *jh;
+	int may_free = 1;
+	int ret;
+
+	BUFFER_TRACE(bh, "entry");
+
+	/*
+	 * It is safe to proceed here without the j_list_lock because the
+	 * buffers cannot be stolen by try_to_free_buffers as long as we are
+	 * holding the page lock. --sct
+	 */
+
+	if (!buffer_jbd(bh))
+		goto zap_buffer_unlocked;
+
+	spin_lock(&journal->j_state_lock);
+	jbd_lock_bh_state(bh);
+	spin_lock(&journal->j_list_lock);
+
+	jh = journal_grab_journal_head(bh);
+	if (!jh)
+		goto zap_buffer_no_jh;
+
+	transaction = jh->b_transaction;
+	if (transaction == NULL) {
+		/* First case: not on any transaction.  If it
+		 * has no checkpoint link, then we can zap it:
+		 * it's a writeback-mode buffer so we don't care
+		 * if it hits disk safely. */
+		if (!jh->b_cp_transaction) {
+			JBUFFER_TRACE(jh, "not on any transaction: zap");
+			goto zap_buffer;
+		}
+
+		if (!buffer_dirty(bh)) {
+			/* bdflush has written it.  We can drop it now */
+			goto zap_buffer;
+		}
+
+		/* OK, it must be in the journal but still not
+		 * written fully to disk: it's metadata or
+		 * journaled data... */
+
+		if (journal->j_running_transaction) {
+			/* ... and once the current transaction has
+			 * committed, the buffer won't be needed any
+			 * longer. */
+			JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
+			ret = __dispose_buffer(jh,
+					journal->j_running_transaction);
+			journal_put_journal_head(jh);
+			spin_unlock(&journal->j_list_lock);
+			jbd_unlock_bh_state(bh);
+			spin_unlock(&journal->j_state_lock);
+			return ret;
+		} else {
+			/* There is no currently-running transaction. So the
+			 * orphan record which we wrote for this file must have
+			 * passed into commit.  We must attach this buffer to
+			 * the committing transaction, if it exists. */
+			if (journal->j_committing_transaction) {
+				JBUFFER_TRACE(jh, "give to committing trans");
+				ret = __dispose_buffer(jh,
+					journal->j_committing_transaction);
+				journal_put_journal_head(jh);
+				spin_unlock(&journal->j_list_lock);
+				jbd_unlock_bh_state(bh);
+				spin_unlock(&journal->j_state_lock);
+				return ret;
+			} else {
+				/* The orphan record's transaction has
+				 * committed.  We can cleanse this buffer */
+				clear_buffer_jbddirty(bh);
+				goto zap_buffer;
+			}
+		}
+	} else if (transaction == journal->j_committing_transaction) {
+		if (jh->b_jlist == BJ_Locked) {
+			/*
+			 * The buffer is on the committing transaction's locked
+			 * list.  We have the buffer locked, so I/O has
+			 * completed.  So we can nail the buffer now.
+			 */
+			may_free = __dispose_buffer(jh, transaction);
+			goto zap_buffer;
+		}
+		/*
+		 * If it is committing, we simply cannot touch it.  We
+		 * can remove it's next_transaction pointer from the
+		 * running transaction if that is set, but nothing
+		 * else. */
+		JBUFFER_TRACE(jh, "on committing transaction");
+		set_buffer_freed(bh);
+		if (jh->b_next_transaction) {
+			J_ASSERT(jh->b_next_transaction ==
+					journal->j_running_transaction);
+			jh->b_next_transaction = NULL;
+		}
+		journal_put_journal_head(jh);
+		spin_unlock(&journal->j_list_lock);
+		jbd_unlock_bh_state(bh);
+		spin_unlock(&journal->j_state_lock);
+		return 0;
+	} else {
+		/* Good, the buffer belongs to the running transaction.
+		 * We are writing our own transaction's data, not any
+		 * previous one's, so it is safe to throw it away
+		 * (remember that we expect the filesystem to have set
+		 * i_size already for this truncate so recovery will not
+		 * expose the disk blocks we are discarding here.) */
+		J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
+		may_free = __dispose_buffer(jh, transaction);
+	}
+
+zap_buffer:
+	journal_put_journal_head(jh);
+zap_buffer_no_jh:
+	spin_unlock(&journal->j_list_lock);
+	jbd_unlock_bh_state(bh);
+	spin_unlock(&journal->j_state_lock);
+zap_buffer_unlocked:
+	clear_buffer_dirty(bh);
+	J_ASSERT_BH(bh, !buffer_jbddirty(bh));
+	clear_buffer_mapped(bh);
+	clear_buffer_req(bh);
+	clear_buffer_new(bh);
+	bh->b_bdev = NULL;
+	return may_free;
+}
+
+/**
+ * void journal_invalidatepage()
+ * @journal: journal to use for flush...
+ * @page:    page to flush
+ * @offset:  length of page to invalidate.
+ *
+ * Reap page buffers containing data after offset in page.
+ *
+ */
+void journal_invalidatepage(journal_t *journal,
+		      struct page *page,
+		      unsigned long offset)
+{
+	struct buffer_head *head, *bh, *next;
+	unsigned int curr_off = 0;
+	int may_free = 1;
+
+	if (!PageLocked(page))
+		BUG();
+	if (!page_has_buffers(page))
+		return;
+
+	/* We will potentially be playing with lists other than just the
+	 * data lists (especially for journaled data mode), so be
+	 * cautious in our locking. */
+
+	head = bh = page_buffers(page);
+	do {
+		unsigned int next_off = curr_off + bh->b_size;
+		next = bh->b_this_page;
+
+		if (offset <= curr_off) {
+			/* This block is wholly outside the truncation point */
+			lock_buffer(bh);
+			may_free &= journal_unmap_buffer(journal, bh);
+			unlock_buffer(bh);
+		}
+		curr_off = next_off;
+		bh = next;
+
+	} while (bh != head);
+
+	if (!offset) {
+		if (may_free && try_to_free_buffers(page))
+			J_ASSERT(!page_has_buffers(page));
+	}
+}
+
+/*
+ * File a buffer on the given transaction list.
+ */
+void __journal_file_buffer(struct journal_head *jh,
+			transaction_t *transaction, int jlist)
+{
+	struct journal_head **list = NULL;
+	int was_dirty = 0;
+	struct buffer_head *bh = jh2bh(jh);
+
+	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
+	assert_spin_locked(&transaction->t_journal->j_list_lock);
+
+	J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
+	J_ASSERT_JH(jh, jh->b_transaction == transaction ||
+				jh->b_transaction == 0);
+
+	if (jh->b_transaction && jh->b_jlist == jlist)
+		return;
+
+	/* The following list of buffer states needs to be consistent
+	 * with __jbd_unexpected_dirty_buffer()'s handling of dirty
+	 * state. */
+
+	if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
+	    jlist == BJ_Shadow || jlist == BJ_Forget) {
+		if (test_clear_buffer_dirty(bh) ||
+		    test_clear_buffer_jbddirty(bh))
+			was_dirty = 1;
+	}
+
+	if (jh->b_transaction)
+		__journal_temp_unlink_buffer(jh);
+	jh->b_transaction = transaction;
+
+	switch (jlist) {
+	case BJ_None:
+		J_ASSERT_JH(jh, !jh->b_committed_data);
+		J_ASSERT_JH(jh, !jh->b_frozen_data);
+		return;
+	case BJ_SyncData:
+		list = &transaction->t_sync_datalist;
+		break;
+	case BJ_Metadata:
+		transaction->t_nr_buffers++;
+		list = &transaction->t_buffers;
+		break;
+	case BJ_Forget:
+		list = &transaction->t_forget;
+		break;
+	case BJ_IO:
+		list = &transaction->t_iobuf_list;
+		break;
+	case BJ_Shadow:
+		list = &transaction->t_shadow_list;
+		break;
+	case BJ_LogCtl:
+		list = &transaction->t_log_list;
+		break;
+	case BJ_Reserved:
+		list = &transaction->t_reserved_list;
+		break;
+	case BJ_Locked:
+		list =  &transaction->t_locked_list;
+		break;
+	}
+
+	__blist_add_buffer(list, jh);
+	jh->b_jlist = jlist;
+
+	if (was_dirty)
+		set_buffer_jbddirty(bh);
+}
+
+void journal_file_buffer(struct journal_head *jh,
+				transaction_t *transaction, int jlist)
+{
+	jbd_lock_bh_state(jh2bh(jh));
+	spin_lock(&transaction->t_journal->j_list_lock);
+	__journal_file_buffer(jh, transaction, jlist);
+	spin_unlock(&transaction->t_journal->j_list_lock);
+	jbd_unlock_bh_state(jh2bh(jh));
+}
+
+/*
+ * Remove a buffer from its current buffer list in preparation for
+ * dropping it from its current transaction entirely.  If the buffer has
+ * already started to be used by a subsequent transaction, refile the
+ * buffer on that transaction's metadata list.
+ *
+ * Called under journal->j_list_lock
+ *
+ * Called under jbd_lock_bh_state(jh2bh(jh))
+ */
+void __journal_refile_buffer(struct journal_head *jh)
+{
+	int was_dirty;
+	struct buffer_head *bh = jh2bh(jh);
+
+	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
+	if (jh->b_transaction)
+		assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock);
+
+	/* If the buffer is now unused, just drop it. */
+	if (jh->b_next_transaction == NULL) {
+		__journal_unfile_buffer(jh);
+		return;
+	}
+
+	/*
+	 * It has been modified by a later transaction: add it to the new
+	 * transaction's metadata list.
+	 */
+
+	was_dirty = test_clear_buffer_jbddirty(bh);
+	__journal_temp_unlink_buffer(jh);
+	jh->b_transaction = jh->b_next_transaction;
+	jh->b_next_transaction = NULL;
+	__journal_file_buffer(jh, jh->b_transaction,
+				was_dirty ? BJ_Metadata : BJ_Reserved);
+	J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
+
+	if (was_dirty)
+		set_buffer_jbddirty(bh);
+}
+
+/*
+ * For the unlocked version of this call, also make sure that any
+ * hanging journal_head is cleaned up if necessary.
+ *
+ * __journal_refile_buffer is usually called as part of a single locked
+ * operation on a buffer_head, in which the caller is probably going to
+ * be hooking the journal_head onto other lists.  In that case it is up
+ * to the caller to remove the journal_head if necessary.  For the
+ * unlocked journal_refile_buffer call, the caller isn't going to be
+ * doing anything else to the buffer so we need to do the cleanup
+ * ourselves to avoid a jh leak.
+ *
+ * *** The journal_head may be freed by this call! ***
+ */
+void journal_refile_buffer(journal_t *journal, struct journal_head *jh)
+{
+	struct buffer_head *bh = jh2bh(jh);
+
+	jbd_lock_bh_state(bh);
+	spin_lock(&journal->j_list_lock);
+
+	__journal_refile_buffer(jh);
+	jbd_unlock_bh_state(bh);
+	journal_remove_journal_head(bh);
+
+	spin_unlock(&journal->j_list_lock);
+	__brelse(bh);
+}
-- 
cgit v1.2.3


From f7f4bccb729844a0fa873e224e3a6f7eeed095bb Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Wed, 11 Oct 2006 01:20:59 -0700
Subject: [PATCH] jbd2: rename jbd2 symbols to avoid duplication of jbd symbols

Mingming Cao originally did this work, and Shaggy reproduced it using some
scripts from her.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd2/Makefile      |   4 +-
 fs/jbd2/checkpoint.c  |  54 +++---
 fs/jbd2/commit.c      | 122 +++++++-------
 fs/jbd2/journal.c     | 454 +++++++++++++++++++++++++-------------------------
 fs/jbd2/recovery.c    |  46 ++---
 fs/jbd2/revoke.c      | 146 ++++++++--------
 fs/jbd2/transaction.c | 244 +++++++++++++--------------
 7 files changed, 535 insertions(+), 535 deletions(-)

(limited to 'fs')

diff --git a/fs/jbd2/Makefile b/fs/jbd2/Makefile
index 54aca4868a36..802a3413872a 100644
--- a/fs/jbd2/Makefile
+++ b/fs/jbd2/Makefile
@@ -2,6 +2,6 @@
 # Makefile for the linux journaling routines.
 #
 
-obj-$(CONFIG_JBD) += jbd.o
+obj-$(CONFIG_JBD2) += jbd2.o
 
-jbd-objs := transaction.o commit.o recovery.o checkpoint.o revoke.o journal.o
+jbd2-objs := transaction.o commit.o recovery.o checkpoint.o revoke.o journal.o
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 0208cc7ac5d0..68039fa9a566 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -19,7 +19,7 @@
 
 #include <linux/time.h>
 #include <linux/fs.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
 
@@ -95,9 +95,9 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
 
 	if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
 		JBUFFER_TRACE(jh, "remove from checkpoint list");
-		ret = __journal_remove_checkpoint(jh) + 1;
+		ret = __jbd2_journal_remove_checkpoint(jh) + 1;
 		jbd_unlock_bh_state(bh);
-		journal_remove_journal_head(bh);
+		jbd2_journal_remove_journal_head(bh);
 		BUFFER_TRACE(bh, "release");
 		__brelse(bh);
 	} else {
@@ -107,19 +107,19 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
 }
 
 /*
- * __log_wait_for_space: wait until there is space in the journal.
+ * __jbd2_log_wait_for_space: wait until there is space in the journal.
  *
  * Called under j-state_lock *only*.  It will be unlocked if we have to wait
  * for a checkpoint to free up some space in the log.
  */
-void __log_wait_for_space(journal_t *journal)
+void __jbd2_log_wait_for_space(journal_t *journal)
 {
 	int nblocks;
 	assert_spin_locked(&journal->j_state_lock);
 
 	nblocks = jbd_space_needed(journal);
-	while (__log_space_left(journal) < nblocks) {
-		if (journal->j_flags & JFS_ABORT)
+	while (__jbd2_log_space_left(journal) < nblocks) {
+		if (journal->j_flags & JBD2_ABORT)
 			return;
 		spin_unlock(&journal->j_state_lock);
 		mutex_lock(&journal->j_checkpoint_mutex);
@@ -130,9 +130,9 @@ void __log_wait_for_space(journal_t *journal)
 		 */
 		spin_lock(&journal->j_state_lock);
 		nblocks = jbd_space_needed(journal);
-		if (__log_space_left(journal) < nblocks) {
+		if (__jbd2_log_space_left(journal) < nblocks) {
 			spin_unlock(&journal->j_state_lock);
-			log_do_checkpoint(journal);
+			jbd2_log_do_checkpoint(journal);
 			spin_lock(&journal->j_state_lock);
 		}
 		mutex_unlock(&journal->j_checkpoint_mutex);
@@ -198,9 +198,9 @@ restart:
 		 * Now in whatever state the buffer currently is, we know that
 		 * it has been written out and so we can drop it from the list
 		 */
-		released = __journal_remove_checkpoint(jh);
+		released = __jbd2_journal_remove_checkpoint(jh);
 		jbd_unlock_bh_state(bh);
-		journal_remove_journal_head(bh);
+		jbd2_journal_remove_journal_head(bh);
 		__brelse(bh);
 	}
 }
@@ -252,16 +252,16 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
 
 		spin_unlock(&journal->j_list_lock);
 		jbd_unlock_bh_state(bh);
-		log_start_commit(journal, tid);
-		log_wait_commit(journal, tid);
+		jbd2_log_start_commit(journal, tid);
+		jbd2_log_wait_commit(journal, tid);
 		ret = 1;
 	} else if (!buffer_dirty(bh)) {
 		J_ASSERT_JH(jh, !buffer_jbddirty(bh));
 		BUFFER_TRACE(bh, "remove from checkpoint");
-		__journal_remove_checkpoint(jh);
+		__jbd2_journal_remove_checkpoint(jh);
 		spin_unlock(&journal->j_list_lock);
 		jbd_unlock_bh_state(bh);
-		journal_remove_journal_head(bh);
+		jbd2_journal_remove_journal_head(bh);
 		__brelse(bh);
 		ret = 1;
 	} else {
@@ -296,7 +296,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
  *
  * The journal should be locked before calling this function.
  */
-int log_do_checkpoint(journal_t *journal)
+int jbd2_log_do_checkpoint(journal_t *journal)
 {
 	transaction_t *transaction;
 	tid_t this_tid;
@@ -309,7 +309,7 @@ int log_do_checkpoint(journal_t *journal)
 	 * don't need checkpointing, just eliminate them from the
 	 * journal straight away.
 	 */
-	result = cleanup_journal_tail(journal);
+	result = jbd2_cleanup_journal_tail(journal);
 	jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
 	if (result <= 0)
 		return result;
@@ -374,7 +374,7 @@ restart:
 	}
 out:
 	spin_unlock(&journal->j_list_lock);
-	result = cleanup_journal_tail(journal);
+	result = jbd2_cleanup_journal_tail(journal);
 	if (result < 0)
 		return result;
 	return 0;
@@ -397,7 +397,7 @@ out:
  * we have an abort error outstanding.
  */
 
-int cleanup_journal_tail(journal_t *journal)
+int jbd2_cleanup_journal_tail(journal_t *journal)
 {
 	transaction_t * transaction;
 	tid_t		first_tid;
@@ -452,8 +452,8 @@ int cleanup_journal_tail(journal_t *journal)
 	journal->j_tail_sequence = first_tid;
 	journal->j_tail = blocknr;
 	spin_unlock(&journal->j_state_lock);
-	if (!(journal->j_flags & JFS_ABORT))
-		journal_update_superblock(journal, 1);
+	if (!(journal->j_flags & JBD2_ABORT))
+		jbd2_journal_update_superblock(journal, 1);
 	return 0;
 }
 
@@ -518,7 +518,7 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
  * Returns number of buffers reaped (for debug)
  */
 
-int __journal_clean_checkpoint_list(journal_t *journal)
+int __jbd2_journal_clean_checkpoint_list(journal_t *journal)
 {
 	transaction_t *transaction, *last_transaction, *next_transaction;
 	int ret = 0;
@@ -578,7 +578,7 @@ out:
  * This function is called with jbd_lock_bh_state(jh2bh(jh))
  */
 
-int __journal_remove_checkpoint(struct journal_head *jh)
+int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
 {
 	transaction_t *transaction;
 	journal_t *journal;
@@ -607,7 +607,7 @@ int __journal_remove_checkpoint(struct journal_head *jh)
 	 * dropped!
 	 *
 	 * The locking here around j_committing_transaction is a bit sleazy.
-	 * See the comment at the end of journal_commit_transaction().
+	 * See the comment at the end of jbd2_journal_commit_transaction().
 	 */
 	if (transaction == journal->j_committing_transaction) {
 		JBUFFER_TRACE(jh, "belongs to committing transaction");
@@ -617,7 +617,7 @@ int __journal_remove_checkpoint(struct journal_head *jh)
 	/* OK, that was the last buffer for the transaction: we can now
 	   safely remove this transaction from the log */
 
-	__journal_drop_transaction(journal, transaction);
+	__jbd2_journal_drop_transaction(journal, transaction);
 
 	/* Just in case anybody was waiting for more transactions to be
            checkpointed... */
@@ -636,7 +636,7 @@ out:
  * Called with the journal locked.
  * Called with j_list_lock held.
  */
-void __journal_insert_checkpoint(struct journal_head *jh,
+void __jbd2_journal_insert_checkpoint(struct journal_head *jh,
 			       transaction_t *transaction)
 {
 	JBUFFER_TRACE(jh, "entry");
@@ -666,7 +666,7 @@ void __journal_insert_checkpoint(struct journal_head *jh,
  * Called with j_list_lock held.
  */
 
-void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
+void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transaction)
 {
 	assert_spin_locked(&journal->j_list_lock);
 	if (transaction->t_cpnext) {
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 10be51290a27..b1a4eafc1541 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -1,5 +1,5 @@
 /*
- * linux/fs/jbd/commit.c
+ * linux/fs/jbd2/commit.c
  *
  * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
  *
@@ -15,7 +15,7 @@
 
 #include <linux/time.h>
 #include <linux/fs.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
@@ -111,7 +111,7 @@ static int journal_write_commit_record(journal_t *journal,
 	if (is_journal_aborted(journal))
 		return 0;
 
-	descriptor = journal_get_descriptor_buffer(journal);
+	descriptor = jbd2_journal_get_descriptor_buffer(journal);
 	if (!descriptor)
 		return 1;
 
@@ -120,14 +120,14 @@ static int journal_write_commit_record(journal_t *journal,
 	/* AKPM: buglet - add `i' to tmp! */
 	for (i = 0; i < bh->b_size; i += 512) {
 		journal_header_t *tmp = (journal_header_t*)bh->b_data;
-		tmp->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
-		tmp->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
+		tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
+		tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
 		tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
 	}
 
 	JBUFFER_TRACE(descriptor, "write commit block");
 	set_buffer_dirty(bh);
-	if (journal->j_flags & JFS_BARRIER) {
+	if (journal->j_flags & JBD2_BARRIER) {
 		set_buffer_ordered(bh);
 		barrier_done = 1;
 	}
@@ -145,7 +145,7 @@ static int journal_write_commit_record(journal_t *journal,
 			"disabling barriers\n",
 			bdevname(journal->j_dev, b));
 		spin_lock(&journal->j_state_lock);
-		journal->j_flags &= ~JFS_BARRIER;
+		journal->j_flags &= ~JBD2_BARRIER;
 		spin_unlock(&journal->j_state_lock);
 
 		/* And try again, without the barrier */
@@ -155,7 +155,7 @@ static int journal_write_commit_record(journal_t *journal,
 		ret = sync_dirty_buffer(bh);
 	}
 	put_bh(bh);		/* One for getblk() */
-	journal_put_journal_head(descriptor);
+	jbd2_journal_put_journal_head(descriptor);
 
 	return (ret == -EIO);
 }
@@ -239,7 +239,7 @@ write_out_data:
 		if (locked && test_clear_buffer_dirty(bh)) {
 			BUFFER_TRACE(bh, "needs writeout, adding to array");
 			wbuf[bufs++] = bh;
-			__journal_file_buffer(jh, commit_transaction,
+			__jbd2_journal_file_buffer(jh, commit_transaction,
 						BJ_Locked);
 			jbd_unlock_bh_state(bh);
 			if (bufs == journal->j_wbufsize) {
@@ -251,13 +251,13 @@ write_out_data:
 		}
 		else {
 			BUFFER_TRACE(bh, "writeout complete: unfile");
-			__journal_unfile_buffer(jh);
+			__jbd2_journal_unfile_buffer(jh);
 			jbd_unlock_bh_state(bh);
 			if (locked)
 				unlock_buffer(bh);
-			journal_remove_journal_head(bh);
+			jbd2_journal_remove_journal_head(bh);
 			/* Once for our safety reference, once for
-			 * journal_remove_journal_head() */
+			 * jbd2_journal_remove_journal_head() */
 			put_bh(bh);
 			put_bh(bh);
 		}
@@ -272,12 +272,12 @@ write_out_data:
 }
 
 /*
- * journal_commit_transaction
+ * jbd2_journal_commit_transaction
  *
  * The primary function for committing a transaction to the log.  This
  * function is called by the journal thread to begin a complete commit.
  */
-void journal_commit_transaction(journal_t *journal)
+void jbd2_journal_commit_transaction(journal_t *journal)
 {
 	transaction_t *commit_transaction;
 	struct journal_head *jh, *new_jh, *descriptor;
@@ -305,10 +305,10 @@ void journal_commit_transaction(journal_t *journal)
 	spin_unlock(&journal->j_list_lock);
 #endif
 
-	/* Do we need to erase the effects of a prior journal_flush? */
-	if (journal->j_flags & JFS_FLUSHED) {
+	/* Do we need to erase the effects of a prior jbd2_journal_flush? */
+	if (journal->j_flags & JBD2_FLUSHED) {
 		jbd_debug(3, "super block updated\n");
-		journal_update_superblock(journal, 1);
+		jbd2_journal_update_superblock(journal, 1);
 	} else {
 		jbd_debug(3, "superblock not updated\n");
 	}
@@ -350,7 +350,7 @@ void journal_commit_transaction(journal_t *journal)
 	 * BJ_Reserved buffers.  Note, it is _not_ permissible to assume
 	 * that there are no such buffers: if a large filesystem
 	 * operation like a truncate needs to split itself over multiple
-	 * transactions, then it may try to do a journal_restart() while
+	 * transactions, then it may try to do a jbd2_journal_restart() while
 	 * there are still BJ_Reserved buffers outstanding.  These must
 	 * be released cleanly from the current transaction.
 	 *
@@ -358,25 +358,25 @@ void journal_commit_transaction(journal_t *journal)
 	 * again before modifying the buffer in the new transaction, but
 	 * we do not require it to remember exactly which old buffers it
 	 * has reserved.  This is consistent with the existing behaviour
-	 * that multiple journal_get_write_access() calls to the same
+	 * that multiple jbd2_journal_get_write_access() calls to the same
 	 * buffer are perfectly permissable.
 	 */
 	while (commit_transaction->t_reserved_list) {
 		jh = commit_transaction->t_reserved_list;
 		JBUFFER_TRACE(jh, "reserved, unused: refile");
 		/*
-		 * A journal_get_undo_access()+journal_release_buffer() may
+		 * A jbd2_journal_get_undo_access()+jbd2_journal_release_buffer() may
 		 * leave undo-committed data.
 		 */
 		if (jh->b_committed_data) {
 			struct buffer_head *bh = jh2bh(jh);
 
 			jbd_lock_bh_state(bh);
-			jbd_slab_free(jh->b_committed_data, bh->b_size);
+			jbd2_slab_free(jh->b_committed_data, bh->b_size);
 			jh->b_committed_data = NULL;
 			jbd_unlock_bh_state(bh);
 		}
-		journal_refile_buffer(journal, jh);
+		jbd2_journal_refile_buffer(journal, jh);
 	}
 
 	/*
@@ -385,7 +385,7 @@ void journal_commit_transaction(journal_t *journal)
 	 * frees some memory
 	 */
 	spin_lock(&journal->j_list_lock);
-	__journal_clean_checkpoint_list(journal);
+	__jbd2_journal_clean_checkpoint_list(journal);
 	spin_unlock(&journal->j_list_lock);
 
 	jbd_debug (3, "JBD: commit phase 1\n");
@@ -393,7 +393,7 @@ void journal_commit_transaction(journal_t *journal)
 	/*
 	 * Switch to a new revoke table.
 	 */
-	journal_switch_revoke_table(journal);
+	jbd2_journal_switch_revoke_table(journal);
 
 	commit_transaction->t_state = T_FLUSH;
 	journal->j_committing_transaction = commit_transaction;
@@ -450,9 +450,9 @@ void journal_commit_transaction(journal_t *journal)
 			continue;
 		}
 		if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
-			__journal_unfile_buffer(jh);
+			__jbd2_journal_unfile_buffer(jh);
 			jbd_unlock_bh_state(bh);
-			journal_remove_journal_head(bh);
+			jbd2_journal_remove_journal_head(bh);
 			put_bh(bh);
 		} else {
 			jbd_unlock_bh_state(bh);
@@ -463,9 +463,9 @@ void journal_commit_transaction(journal_t *journal)
 	spin_unlock(&journal->j_list_lock);
 
 	if (err)
-		__journal_abort_hard(journal);
+		__jbd2_journal_abort_hard(journal);
 
-	journal_write_revoke_records(journal, commit_transaction);
+	jbd2_journal_write_revoke_records(journal, commit_transaction);
 
 	jbd_debug(3, "JBD: commit phase 2\n");
 
@@ -499,7 +499,7 @@ void journal_commit_transaction(journal_t *journal)
 
 		if (is_journal_aborted(journal)) {
 			JBUFFER_TRACE(jh, "journal is aborting: refile");
-			journal_refile_buffer(journal, jh);
+			jbd2_journal_refile_buffer(journal, jh);
 			/* If that was the last one, we need to clean up
 			 * any descriptor buffers which may have been
 			 * already allocated, even if we are now
@@ -519,9 +519,9 @@ void journal_commit_transaction(journal_t *journal)
 
 			jbd_debug(4, "JBD: get descriptor\n");
 
-			descriptor = journal_get_descriptor_buffer(journal);
+			descriptor = jbd2_journal_get_descriptor_buffer(journal);
 			if (!descriptor) {
-				__journal_abort_hard(journal);
+				__jbd2_journal_abort_hard(journal);
 				continue;
 			}
 
@@ -529,8 +529,8 @@ void journal_commit_transaction(journal_t *journal)
 			jbd_debug(4, "JBD: got buffer %llu (%p)\n",
 				(unsigned long long)bh->b_blocknr, bh->b_data);
 			header = (journal_header_t *)&bh->b_data[0];
-			header->h_magic     = cpu_to_be32(JFS_MAGIC_NUMBER);
-			header->h_blocktype = cpu_to_be32(JFS_DESCRIPTOR_BLOCK);
+			header->h_magic     = cpu_to_be32(JBD2_MAGIC_NUMBER);
+			header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK);
 			header->h_sequence  = cpu_to_be32(commit_transaction->t_tid);
 
 			tagp = &bh->b_data[sizeof(journal_header_t)];
@@ -543,25 +543,25 @@ void journal_commit_transaction(journal_t *journal)
 			/* Record it so that we can wait for IO
                            completion later */
 			BUFFER_TRACE(bh, "ph3: file as descriptor");
-			journal_file_buffer(descriptor, commit_transaction,
+			jbd2_journal_file_buffer(descriptor, commit_transaction,
 					BJ_LogCtl);
 		}
 
 		/* Where is the buffer to be written? */
 
-		err = journal_next_log_block(journal, &blocknr);
+		err = jbd2_journal_next_log_block(journal, &blocknr);
 		/* If the block mapping failed, just abandon the buffer
 		   and repeat this loop: we'll fall into the
 		   refile-on-abort condition above. */
 		if (err) {
-			__journal_abort_hard(journal);
+			__jbd2_journal_abort_hard(journal);
 			continue;
 		}
 
 		/*
 		 * start_this_handle() uses t_outstanding_credits to determine
 		 * the free space in the log, but this counter is changed
-		 * by journal_next_log_block() also.
+		 * by jbd2_journal_next_log_block() also.
 		 */
 		commit_transaction->t_outstanding_credits--;
 
@@ -576,13 +576,13 @@ void journal_commit_transaction(journal_t *journal)
 
 		set_bit(BH_JWrite, &jh2bh(jh)->b_state);
 		/*
-		 * akpm: journal_write_metadata_buffer() sets
+		 * akpm: jbd2_journal_write_metadata_buffer() sets
 		 * new_bh->b_transaction to commit_transaction.
 		 * We need to clean this up before we release new_bh
 		 * (which is of type BJ_IO)
 		 */
 		JBUFFER_TRACE(jh, "ph3: write metadata");
-		flags = journal_write_metadata_buffer(commit_transaction,
+		flags = jbd2_journal_write_metadata_buffer(commit_transaction,
 						      jh, &new_jh, blocknr);
 		set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
 		wbuf[bufs++] = jh2bh(new_jh);
@@ -592,9 +592,9 @@ void journal_commit_transaction(journal_t *journal)
 
 		tag_flag = 0;
 		if (flags & 1)
-			tag_flag |= JFS_FLAG_ESCAPE;
+			tag_flag |= JBD2_FLAG_ESCAPE;
 		if (!first_tag)
-			tag_flag |= JFS_FLAG_SAME_UUID;
+			tag_flag |= JBD2_FLAG_SAME_UUID;
 
 		tag = (journal_block_tag_t *) tagp;
 		tag->t_blocknr = cpu_to_be32(jh2bh(jh)->b_blocknr);
@@ -622,7 +622,7 @@ void journal_commit_transaction(journal_t *journal)
                            submitting the IOs.  "tag" still points to
                            the last tag we set up. */
 
-			tag->t_flags |= cpu_to_be32(JFS_FLAG_LAST_TAG);
+			tag->t_flags |= cpu_to_be32(JBD2_FLAG_LAST_TAG);
 
 start_journal_io:
 			for (i = 0; i < bufs; i++) {
@@ -678,14 +678,14 @@ wait_for_iobuf:
 		clear_buffer_jwrite(bh);
 
 		JBUFFER_TRACE(jh, "ph4: unfile after journal write");
-		journal_unfile_buffer(journal, jh);
+		jbd2_journal_unfile_buffer(journal, jh);
 
 		/*
 		 * ->t_iobuf_list should contain only dummy buffer_heads
-		 * which were created by journal_write_metadata_buffer().
+		 * which were created by jbd2_journal_write_metadata_buffer().
 		 */
 		BUFFER_TRACE(bh, "dumping temporary bh");
-		journal_put_journal_head(jh);
+		jbd2_journal_put_journal_head(jh);
 		__brelse(bh);
 		J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
 		free_buffer_head(bh);
@@ -702,7 +702,7 @@ wait_for_iobuf:
                    we finally commit, we can do any checkpointing
                    required. */
 		JBUFFER_TRACE(jh, "file as BJ_Forget");
-		journal_file_buffer(jh, commit_transaction, BJ_Forget);
+		jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget);
 		/* Wake up any transactions which were waiting for this
 		   IO to complete */
 		wake_up_bit(&bh->b_state, BH_Unshadow);
@@ -733,8 +733,8 @@ wait_for_iobuf:
 
 		BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
 		clear_buffer_jwrite(bh);
-		journal_unfile_buffer(journal, jh);
-		journal_put_journal_head(jh);
+		jbd2_journal_unfile_buffer(journal, jh);
+		jbd2_journal_put_journal_head(jh);
 		__brelse(bh);		/* One for getblk */
 		/* AKPM: bforget here */
 	}
@@ -745,7 +745,7 @@ wait_for_iobuf:
 		err = -EIO;
 
 	if (err)
-		__journal_abort_hard(journal);
+		__jbd2_journal_abort_hard(journal);
 
 	/* End of a transaction!  Finally, we can do checkpoint
            processing: any buffers committed as a result of this
@@ -789,14 +789,14 @@ restart_loop:
 		 * Otherwise, we can just throw away the frozen data now.
 		 */
 		if (jh->b_committed_data) {
-			jbd_slab_free(jh->b_committed_data, bh->b_size);
+			jbd2_slab_free(jh->b_committed_data, bh->b_size);
 			jh->b_committed_data = NULL;
 			if (jh->b_frozen_data) {
 				jh->b_committed_data = jh->b_frozen_data;
 				jh->b_frozen_data = NULL;
 			}
 		} else if (jh->b_frozen_data) {
-			jbd_slab_free(jh->b_frozen_data, bh->b_size);
+			jbd2_slab_free(jh->b_frozen_data, bh->b_size);
 			jh->b_frozen_data = NULL;
 		}
 
@@ -804,12 +804,12 @@ restart_loop:
 		cp_transaction = jh->b_cp_transaction;
 		if (cp_transaction) {
 			JBUFFER_TRACE(jh, "remove from old cp transaction");
-			__journal_remove_checkpoint(jh);
+			__jbd2_journal_remove_checkpoint(jh);
 		}
 
 		/* Only re-checkpoint the buffer_head if it is marked
 		 * dirty.  If the buffer was added to the BJ_Forget list
-		 * by journal_forget, it may no longer be dirty and
+		 * by jbd2_journal_forget, it may no longer be dirty and
 		 * there's no point in keeping a checkpoint record for
 		 * it. */
 
@@ -828,9 +828,9 @@ restart_loop:
 
 		if (buffer_jbddirty(bh)) {
 			JBUFFER_TRACE(jh, "add to new checkpointing trans");
-			__journal_insert_checkpoint(jh, commit_transaction);
+			__jbd2_journal_insert_checkpoint(jh, commit_transaction);
 			JBUFFER_TRACE(jh, "refile for checkpoint writeback");
-			__journal_refile_buffer(jh);
+			__jbd2_journal_refile_buffer(jh);
 			jbd_unlock_bh_state(bh);
 		} else {
 			J_ASSERT_BH(bh, !buffer_dirty(bh));
@@ -842,11 +842,11 @@ restart_loop:
 			 * disk and before we process the buffer on BJ_Forget
 			 * list. */
 			JBUFFER_TRACE(jh, "refile or unfile freed buffer");
-			__journal_refile_buffer(jh);
+			__jbd2_journal_refile_buffer(jh);
 			if (!jh->b_transaction) {
 				jbd_unlock_bh_state(bh);
 				 /* needs a brelse */
-				journal_remove_journal_head(bh);
+				jbd2_journal_remove_journal_head(bh);
 				release_buffer_page(bh);
 			} else
 				jbd_unlock_bh_state(bh);
@@ -856,9 +856,9 @@ restart_loop:
 	spin_unlock(&journal->j_list_lock);
 	/*
 	 * This is a bit sleazy.  We borrow j_list_lock to protect
-	 * journal->j_committing_transaction in __journal_remove_checkpoint.
-	 * Really, __journal_remove_checkpoint should be using j_state_lock but
-	 * it's a bit hassle to hold that across __journal_remove_checkpoint
+	 * journal->j_committing_transaction in __jbd2_journal_remove_checkpoint.
+	 * Really, __jbd2_journal_remove_checkpoint should be using j_state_lock but
+	 * it's a bit hassle to hold that across __jbd2_journal_remove_checkpoint
 	 */
 	spin_lock(&journal->j_state_lock);
 	spin_lock(&journal->j_list_lock);
@@ -885,7 +885,7 @@ restart_loop:
 	spin_unlock(&journal->j_state_lock);
 
 	if (commit_transaction->t_checkpoint_list == NULL) {
-		__journal_drop_transaction(journal, commit_transaction);
+		__jbd2_journal_drop_transaction(journal, commit_transaction);
 	} else {
 		if (journal->j_checkpoint_transactions == NULL) {
 			journal->j_checkpoint_transactions = commit_transaction;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index c518dd8fe60a..3fbbba20a516 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1,5 +1,5 @@
 /*
- * linux/fs/jbd/journal.c
+ * linux/fs/jbd2/journal.c
  *
  * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
  *
@@ -25,7 +25,7 @@
 #include <linux/module.h>
 #include <linux/time.h>
 #include <linux/fs.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/smp_lock.h>
@@ -40,51 +40,51 @@
 #include <asm/uaccess.h>
 #include <asm/page.h>
 
-EXPORT_SYMBOL(journal_start);
-EXPORT_SYMBOL(journal_restart);
-EXPORT_SYMBOL(journal_extend);
-EXPORT_SYMBOL(journal_stop);
-EXPORT_SYMBOL(journal_lock_updates);
-EXPORT_SYMBOL(journal_unlock_updates);
-EXPORT_SYMBOL(journal_get_write_access);
-EXPORT_SYMBOL(journal_get_create_access);
-EXPORT_SYMBOL(journal_get_undo_access);
-EXPORT_SYMBOL(journal_dirty_data);
-EXPORT_SYMBOL(journal_dirty_metadata);
-EXPORT_SYMBOL(journal_release_buffer);
-EXPORT_SYMBOL(journal_forget);
+EXPORT_SYMBOL(jbd2_journal_start);
+EXPORT_SYMBOL(jbd2_journal_restart);
+EXPORT_SYMBOL(jbd2_journal_extend);
+EXPORT_SYMBOL(jbd2_journal_stop);
+EXPORT_SYMBOL(jbd2_journal_lock_updates);
+EXPORT_SYMBOL(jbd2_journal_unlock_updates);
+EXPORT_SYMBOL(jbd2_journal_get_write_access);
+EXPORT_SYMBOL(jbd2_journal_get_create_access);
+EXPORT_SYMBOL(jbd2_journal_get_undo_access);
+EXPORT_SYMBOL(jbd2_journal_dirty_data);
+EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
+EXPORT_SYMBOL(jbd2_journal_release_buffer);
+EXPORT_SYMBOL(jbd2_journal_forget);
 #if 0
 EXPORT_SYMBOL(journal_sync_buffer);
 #endif
-EXPORT_SYMBOL(journal_flush);
-EXPORT_SYMBOL(journal_revoke);
-
-EXPORT_SYMBOL(journal_init_dev);
-EXPORT_SYMBOL(journal_init_inode);
-EXPORT_SYMBOL(journal_update_format);
-EXPORT_SYMBOL(journal_check_used_features);
-EXPORT_SYMBOL(journal_check_available_features);
-EXPORT_SYMBOL(journal_set_features);
-EXPORT_SYMBOL(journal_create);
-EXPORT_SYMBOL(journal_load);
-EXPORT_SYMBOL(journal_destroy);
-EXPORT_SYMBOL(journal_update_superblock);
-EXPORT_SYMBOL(journal_abort);
-EXPORT_SYMBOL(journal_errno);
-EXPORT_SYMBOL(journal_ack_err);
-EXPORT_SYMBOL(journal_clear_err);
-EXPORT_SYMBOL(log_wait_commit);
-EXPORT_SYMBOL(journal_start_commit);
-EXPORT_SYMBOL(journal_force_commit_nested);
-EXPORT_SYMBOL(journal_wipe);
-EXPORT_SYMBOL(journal_blocks_per_page);
-EXPORT_SYMBOL(journal_invalidatepage);
-EXPORT_SYMBOL(journal_try_to_free_buffers);
-EXPORT_SYMBOL(journal_force_commit);
+EXPORT_SYMBOL(jbd2_journal_flush);
+EXPORT_SYMBOL(jbd2_journal_revoke);
+
+EXPORT_SYMBOL(jbd2_journal_init_dev);
+EXPORT_SYMBOL(jbd2_journal_init_inode);
+EXPORT_SYMBOL(jbd2_journal_update_format);
+EXPORT_SYMBOL(jbd2_journal_check_used_features);
+EXPORT_SYMBOL(jbd2_journal_check_available_features);
+EXPORT_SYMBOL(jbd2_journal_set_features);
+EXPORT_SYMBOL(jbd2_journal_create);
+EXPORT_SYMBOL(jbd2_journal_load);
+EXPORT_SYMBOL(jbd2_journal_destroy);
+EXPORT_SYMBOL(jbd2_journal_update_superblock);
+EXPORT_SYMBOL(jbd2_journal_abort);
+EXPORT_SYMBOL(jbd2_journal_errno);
+EXPORT_SYMBOL(jbd2_journal_ack_err);
+EXPORT_SYMBOL(jbd2_journal_clear_err);
+EXPORT_SYMBOL(jbd2_log_wait_commit);
+EXPORT_SYMBOL(jbd2_journal_start_commit);
+EXPORT_SYMBOL(jbd2_journal_force_commit_nested);
+EXPORT_SYMBOL(jbd2_journal_wipe);
+EXPORT_SYMBOL(jbd2_journal_blocks_per_page);
+EXPORT_SYMBOL(jbd2_journal_invalidatepage);
+EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
+EXPORT_SYMBOL(jbd2_journal_force_commit);
 
 static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
 static void __journal_abort_soft (journal_t *journal, int errno);
-static int journal_create_jbd_slab(size_t slab_size);
+static int jbd2_journal_create_jbd_slab(size_t slab_size);
 
 /*
  * Helper function used to manage commit timeouts
@@ -98,7 +98,7 @@ static void commit_timeout(unsigned long __data)
 }
 
 /*
- * kjournald: The main thread function used to manage a logging device
+ * kjournald2: The main thread function used to manage a logging device
  * journal.
  *
  * This kernel thread is responsible for two things:
@@ -113,7 +113,7 @@ static void commit_timeout(unsigned long __data)
  *    known as checkpointing, and this thread is responsible for that job.
  */
 
-static int kjournald(void *arg)
+static int kjournald2(void *arg)
 {
 	journal_t *journal = arg;
 	transaction_t *transaction;
@@ -129,7 +129,7 @@ static int kjournald(void *arg)
 	journal->j_task = current;
 	wake_up(&journal->j_wait_done_commit);
 
-	printk(KERN_INFO "kjournald starting.  Commit interval %ld seconds\n",
+	printk(KERN_INFO "kjournald2 starting.  Commit interval %ld seconds\n",
 			journal->j_commit_interval / HZ);
 
 	/*
@@ -138,7 +138,7 @@ static int kjournald(void *arg)
 	spin_lock(&journal->j_state_lock);
 
 loop:
-	if (journal->j_flags & JFS_UNMOUNT)
+	if (journal->j_flags & JBD2_UNMOUNT)
 		goto end_loop;
 
 	jbd_debug(1, "commit_sequence=%d, commit_request=%d\n",
@@ -148,7 +148,7 @@ loop:
 		jbd_debug(1, "OK, requests differ\n");
 		spin_unlock(&journal->j_state_lock);
 		del_timer_sync(&journal->j_commit_timer);
-		journal_commit_transaction(journal);
+		jbd2_journal_commit_transaction(journal);
 		spin_lock(&journal->j_state_lock);
 		goto loop;
 	}
@@ -160,7 +160,7 @@ loop:
 		 * good idea, because that depends on threads that may
 		 * be already stopped.
 		 */
-		jbd_debug(1, "Now suspending kjournald\n");
+		jbd_debug(1, "Now suspending kjournald2\n");
 		spin_unlock(&journal->j_state_lock);
 		refrigerator();
 		spin_lock(&journal->j_state_lock);
@@ -180,7 +180,7 @@ loop:
 		if (transaction && time_after_eq(jiffies,
 						transaction->t_expires))
 			should_sleep = 0;
-		if (journal->j_flags & JFS_UNMOUNT)
+		if (journal->j_flags & JBD2_UNMOUNT)
 			should_sleep = 0;
 		if (should_sleep) {
 			spin_unlock(&journal->j_state_lock);
@@ -190,7 +190,7 @@ loop:
 		finish_wait(&journal->j_wait_commit, &wait);
 	}
 
-	jbd_debug(1, "kjournald wakes\n");
+	jbd_debug(1, "kjournald2 wakes\n");
 
 	/*
 	 * Were we woken up by a commit wakeup event?
@@ -211,16 +211,16 @@ end_loop:
 	return 0;
 }
 
-static void journal_start_thread(journal_t *journal)
+static void jbd2_journal_start_thread(journal_t *journal)
 {
-	kthread_run(kjournald, journal, "kjournald");
+	kthread_run(kjournald2, journal, "kjournald2");
 	wait_event(journal->j_wait_done_commit, journal->j_task != 0);
 }
 
 static void journal_kill_thread(journal_t *journal)
 {
 	spin_lock(&journal->j_state_lock);
-	journal->j_flags |= JFS_UNMOUNT;
+	journal->j_flags |= JBD2_UNMOUNT;
 
 	while (journal->j_task) {
 		wake_up(&journal->j_wait_commit);
@@ -232,7 +232,7 @@ static void journal_kill_thread(journal_t *journal)
 }
 
 /*
- * journal_write_metadata_buffer: write a metadata buffer to the journal.
+ * jbd2_journal_write_metadata_buffer: write a metadata buffer to the journal.
  *
  * Writes a metadata buffer to a given disk block.  The actual IO is not
  * performed but a new buffer_head is constructed which labels the data
@@ -240,7 +240,7 @@ static void journal_kill_thread(journal_t *journal)
  *
  * Any magic-number escaping which needs to be done will cause a
  * copy-out here.  If the buffer happens to start with the
- * JFS_MAGIC_NUMBER, then we can't write it to the log directly: the
+ * JBD2_MAGIC_NUMBER, then we can't write it to the log directly: the
  * magic number is only written to the log for descripter blocks.  In
  * this case, we copy the data and replace the first word with 0, and we
  * return a result code which indicates that this buffer needs to be
@@ -268,7 +268,7 @@ static void journal_kill_thread(journal_t *journal)
  * Bit 1 set == buffer copy-out performed (kfree the data after IO)
  */
 
-int journal_write_metadata_buffer(transaction_t *transaction,
+int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
 				  struct journal_head  *jh_in,
 				  struct journal_head **jh_out,
 				  unsigned long blocknr)
@@ -316,7 +316,7 @@ repeat:
 	 * Check for escaping
 	 */
 	if (*((__be32 *)(mapped_data + new_offset)) ==
-				cpu_to_be32(JFS_MAGIC_NUMBER)) {
+				cpu_to_be32(JBD2_MAGIC_NUMBER)) {
 		need_copy_out = 1;
 		do_escape = 1;
 	}
@@ -329,10 +329,10 @@ repeat:
 		char *tmp;
 
 		jbd_unlock_bh_state(bh_in);
-		tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS);
+		tmp = jbd2_slab_alloc(bh_in->b_size, GFP_NOFS);
 		jbd_lock_bh_state(bh_in);
 		if (jh_in->b_frozen_data) {
-			jbd_slab_free(tmp, bh_in->b_size);
+			jbd2_slab_free(tmp, bh_in->b_size);
 			goto repeat;
 		}
 
@@ -362,7 +362,7 @@ repeat:
 	atomic_set(&new_bh->b_count, 1);
 	jbd_unlock_bh_state(bh_in);
 
-	new_jh = journal_add_journal_head(new_bh);	/* This sleeps */
+	new_jh = jbd2_journal_add_journal_head(new_bh);	/* This sleeps */
 
 	set_bh_page(new_bh, new_page, new_offset);
 	new_jh->b_transaction = NULL;
@@ -380,9 +380,9 @@ repeat:
 	 * copying is moved to the transaction's shadow queue.
 	 */
 	JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
-	journal_file_buffer(jh_in, transaction, BJ_Shadow);
+	jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
 	JBUFFER_TRACE(new_jh, "file as BJ_IO");
-	journal_file_buffer(new_jh, transaction, BJ_IO);
+	jbd2_journal_file_buffer(new_jh, transaction, BJ_IO);
 
 	return do_escape | (done_copy_out << 1);
 }
@@ -393,14 +393,14 @@ repeat:
  */
 
 /*
- * __log_space_left: Return the number of free blocks left in the journal.
+ * __jbd2_log_space_left: Return the number of free blocks left in the journal.
  *
  * Called with the journal already locked.
  *
  * Called under j_state_lock
  */
 
-int __log_space_left(journal_t *journal)
+int __jbd2_log_space_left(journal_t *journal)
 {
 	int left = journal->j_free;
 
@@ -424,7 +424,7 @@ int __log_space_left(journal_t *journal)
 /*
  * Called under j_state_lock.  Returns true if a transaction was started.
  */
-int __log_start_commit(journal_t *journal, tid_t target)
+int __jbd2_log_start_commit(journal_t *journal, tid_t target)
 {
 	/*
 	 * Are we already doing a recent enough commit?
@@ -445,12 +445,12 @@ int __log_start_commit(journal_t *journal, tid_t target)
 	return 0;
 }
 
-int log_start_commit(journal_t *journal, tid_t tid)
+int jbd2_log_start_commit(journal_t *journal, tid_t tid)
 {
 	int ret;
 
 	spin_lock(&journal->j_state_lock);
-	ret = __log_start_commit(journal, tid);
+	ret = __jbd2_log_start_commit(journal, tid);
 	spin_unlock(&journal->j_state_lock);
 	return ret;
 }
@@ -465,7 +465,7 @@ int log_start_commit(journal_t *journal, tid_t tid)
  *
  * Returns true if a transaction was started.
  */
-int journal_force_commit_nested(journal_t *journal)
+int jbd2_journal_force_commit_nested(journal_t *journal)
 {
 	transaction_t *transaction = NULL;
 	tid_t tid;
@@ -473,7 +473,7 @@ int journal_force_commit_nested(journal_t *journal)
 	spin_lock(&journal->j_state_lock);
 	if (journal->j_running_transaction && !current->journal_info) {
 		transaction = journal->j_running_transaction;
-		__log_start_commit(journal, transaction->t_tid);
+		__jbd2_log_start_commit(journal, transaction->t_tid);
 	} else if (journal->j_committing_transaction)
 		transaction = journal->j_committing_transaction;
 
@@ -484,7 +484,7 @@ int journal_force_commit_nested(journal_t *journal)
 
 	tid = transaction->t_tid;
 	spin_unlock(&journal->j_state_lock);
-	log_wait_commit(journal, tid);
+	jbd2_log_wait_commit(journal, tid);
 	return 1;
 }
 
@@ -492,7 +492,7 @@ int journal_force_commit_nested(journal_t *journal)
  * Start a commit of the current running transaction (if any).  Returns true
  * if a transaction was started, and fills its tid in at *ptid
  */
-int journal_start_commit(journal_t *journal, tid_t *ptid)
+int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
 {
 	int ret = 0;
 
@@ -500,7 +500,7 @@ int journal_start_commit(journal_t *journal, tid_t *ptid)
 	if (journal->j_running_transaction) {
 		tid_t tid = journal->j_running_transaction->t_tid;
 
-		ret = __log_start_commit(journal, tid);
+		ret = __jbd2_log_start_commit(journal, tid);
 		if (ret && ptid)
 			*ptid = tid;
 	} else if (journal->j_committing_transaction && ptid) {
@@ -519,7 +519,7 @@ int journal_start_commit(journal_t *journal, tid_t *ptid)
  * Wait for a specified commit to complete.
  * The caller may not hold the journal lock.
  */
-int log_wait_commit(journal_t *journal, tid_t tid)
+int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
 {
 	int err = 0;
 
@@ -555,7 +555,7 @@ int log_wait_commit(journal_t *journal, tid_t tid)
  * Log buffer allocation routines:
  */
 
-int journal_next_log_block(journal_t *journal, unsigned long *retp)
+int jbd2_journal_next_log_block(journal_t *journal, unsigned long *retp)
 {
 	unsigned long blocknr;
 
@@ -568,7 +568,7 @@ int journal_next_log_block(journal_t *journal, unsigned long *retp)
 	if (journal->j_head == journal->j_last)
 		journal->j_head = journal->j_first;
 	spin_unlock(&journal->j_state_lock);
-	return journal_bmap(journal, blocknr, retp);
+	return jbd2_journal_bmap(journal, blocknr, retp);
 }
 
 /*
@@ -578,7 +578,7 @@ int journal_next_log_block(journal_t *journal, unsigned long *retp)
  * this is a no-op.  If needed, we can use j_blk_offset - everything is
  * ready.
  */
-int journal_bmap(journal_t *journal, unsigned long blocknr,
+int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
 		 unsigned long *retp)
 {
 	int err = 0;
@@ -610,18 +610,18 @@ int journal_bmap(journal_t *journal, unsigned long blocknr,
  * the journal without copying their contents, but for journal
  * descriptor blocks we do need to generate bona fide buffers.
  *
- * After the caller of journal_get_descriptor_buffer() has finished modifying
+ * After the caller of jbd2_journal_get_descriptor_buffer() has finished modifying
  * the buffer's contents they really should run flush_dcache_page(bh->b_page).
  * But we don't bother doing that, so there will be coherency problems with
  * mmaps of blockdevs which hold live JBD-controlled filesystems.
  */
-struct journal_head *journal_get_descriptor_buffer(journal_t *journal)
+struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
 {
 	struct buffer_head *bh;
 	unsigned long blocknr;
 	int err;
 
-	err = journal_next_log_block(journal, &blocknr);
+	err = jbd2_journal_next_log_block(journal, &blocknr);
 
 	if (err)
 		return NULL;
@@ -632,7 +632,7 @@ struct journal_head *journal_get_descriptor_buffer(journal_t *journal)
 	set_buffer_uptodate(bh);
 	unlock_buffer(bh);
 	BUFFER_TRACE(bh, "return this buffer");
-	return journal_add_journal_head(bh);
+	return jbd2_journal_add_journal_head(bh);
 }
 
 /*
@@ -669,10 +669,10 @@ static journal_t * journal_init_common (void)
 	journal->j_commit_interval = (HZ * JBD_DEFAULT_MAX_COMMIT_AGE);
 
 	/* The journal is marked for error until we succeed with recovery! */
-	journal->j_flags = JFS_ABORT;
+	journal->j_flags = JBD2_ABORT;
 
 	/* Set up a default-sized revoke table for the new mount. */
-	err = journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
+	err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
 	if (err) {
 		kfree(journal);
 		goto fail;
@@ -682,7 +682,7 @@ fail:
 	return NULL;
 }
 
-/* journal_init_dev and journal_init_inode:
+/* jbd2_journal_init_dev and jbd2_journal_init_inode:
  *
  * Create a journal structure assigned some fixed set of disk blocks to
  * the journal.  We don't actually touch those disk blocks yet, but we
@@ -692,7 +692,7 @@ fail:
  */
 
 /**
- *  journal_t * journal_init_dev() - creates an initialises a journal structure
+ *  journal_t * jbd2_journal_init_dev() - creates an initialises a journal structure
  *  @bdev: Block device on which to create the journal
  *  @fs_dev: Device which hold journalled filesystem for this journal.
  *  @start: Block nr Start of journal.
@@ -700,11 +700,11 @@ fail:
  *  @blocksize: blocksize of journalling device
  *  @returns: a newly created journal_t *
  *
- *  journal_init_dev creates a journal which maps a fixed contiguous
+ *  jbd2_journal_init_dev creates a journal which maps a fixed contiguous
  *  range of blocks on an arbitrary block device.
  *
  */
-journal_t * journal_init_dev(struct block_device *bdev,
+journal_t * jbd2_journal_init_dev(struct block_device *bdev,
 			struct block_device *fs_dev,
 			int start, int len, int blocksize)
 {
@@ -740,14 +740,14 @@ journal_t * journal_init_dev(struct block_device *bdev,
 }
 
 /**
- *  journal_t * journal_init_inode () - creates a journal which maps to a inode.
+ *  journal_t * jbd2_journal_init_inode () - creates a journal which maps to a inode.
  *  @inode: An inode to create the journal in
  *
- * journal_init_inode creates a journal which maps an on-disk inode as
+ * jbd2_journal_init_inode creates a journal which maps an on-disk inode as
  * the journal.  The inode must exist already, must support bmap() and
  * must have all data blocks preallocated.
  */
-journal_t * journal_init_inode (struct inode *inode)
+journal_t * jbd2_journal_init_inode (struct inode *inode)
 {
 	struct buffer_head *bh;
 	journal_t *journal = journal_init_common();
@@ -780,7 +780,7 @@ journal_t * journal_init_inode (struct inode *inode)
 		return NULL;
 	}
 
-	err = journal_bmap(journal, 0, &blocknr);
+	err = jbd2_journal_bmap(journal, 0, &blocknr);
 	/* If that failed, give up */
 	if (err) {
 		printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
@@ -838,27 +838,27 @@ static int journal_reset(journal_t *journal)
 	journal->j_max_transaction_buffers = journal->j_maxlen / 4;
 
 	/* Add the dynamic fields and write it to disk. */
-	journal_update_superblock(journal, 1);
-	journal_start_thread(journal);
+	jbd2_journal_update_superblock(journal, 1);
+	jbd2_journal_start_thread(journal);
 	return 0;
 }
 
 /**
- * int journal_create() - Initialise the new journal file
+ * int jbd2_journal_create() - Initialise the new journal file
  * @journal: Journal to create. This structure must have been initialised
  *
  * Given a journal_t structure which tells us which disk blocks we can
  * use, create a new journal superblock and initialise all of the
  * journal fields from scratch.
  **/
-int journal_create(journal_t *journal)
+int jbd2_journal_create(journal_t *journal)
 {
 	unsigned long blocknr;
 	struct buffer_head *bh;
 	journal_superblock_t *sb;
 	int i, err;
 
-	if (journal->j_maxlen < JFS_MIN_JOURNAL_BLOCKS) {
+	if (journal->j_maxlen < JBD2_MIN_JOURNAL_BLOCKS) {
 		printk (KERN_ERR "Journal length (%d blocks) too short.\n",
 			journal->j_maxlen);
 		journal_fail_superblock(journal);
@@ -876,10 +876,10 @@ int journal_create(journal_t *journal)
 	}
 
 	/* Zero out the entire journal on disk.  We cannot afford to
-	   have any blocks on disk beginning with JFS_MAGIC_NUMBER. */
+	   have any blocks on disk beginning with JBD2_MAGIC_NUMBER. */
 	jbd_debug(1, "JBD: Zeroing out journal blocks...\n");
 	for (i = 0; i < journal->j_maxlen; i++) {
-		err = journal_bmap(journal, i, &blocknr);
+		err = jbd2_journal_bmap(journal, i, &blocknr);
 		if (err)
 			return err;
 		bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
@@ -899,8 +899,8 @@ int journal_create(journal_t *journal)
 	/* OK, fill in the initial static fields in the new superblock */
 	sb = journal->j_superblock;
 
-	sb->s_header.h_magic	 = cpu_to_be32(JFS_MAGIC_NUMBER);
-	sb->s_header.h_blocktype = cpu_to_be32(JFS_SUPERBLOCK_V2);
+	sb->s_header.h_magic	 = cpu_to_be32(JBD2_MAGIC_NUMBER);
+	sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2);
 
 	sb->s_blocksize	= cpu_to_be32(journal->j_blocksize);
 	sb->s_maxlen	= cpu_to_be32(journal->j_maxlen);
@@ -908,21 +908,21 @@ int journal_create(journal_t *journal)
 
 	journal->j_transaction_sequence = 1;
 
-	journal->j_flags &= ~JFS_ABORT;
+	journal->j_flags &= ~JBD2_ABORT;
 	journal->j_format_version = 2;
 
 	return journal_reset(journal);
 }
 
 /**
- * void journal_update_superblock() - Update journal sb on disk.
+ * void jbd2_journal_update_superblock() - Update journal sb on disk.
  * @journal: The journal to update.
  * @wait: Set to '0' if you don't want to wait for IO completion.
  *
  * Update a journal's dynamic superblock fields and write it to disk,
  * optionally waiting for the IO to complete.
  */
-void journal_update_superblock(journal_t *journal, int wait)
+void jbd2_journal_update_superblock(journal_t *journal, int wait)
 {
 	journal_superblock_t *sb = journal->j_superblock;
 	struct buffer_head *bh = journal->j_sb_buffer;
@@ -931,7 +931,7 @@ void journal_update_superblock(journal_t *journal, int wait)
 	 * As a special case, if the on-disk copy is already marked as needing
 	 * no recovery (s_start == 0) and there are no outstanding transactions
 	 * in the filesystem, then we can safely defer the superblock update
-	 * until the next commit by setting JFS_FLUSHED.  This avoids
+	 * until the next commit by setting JBD2_FLUSHED.  This avoids
 	 * attempting a write to a potential-readonly device.
 	 */
 	if (sb->s_start == 0 && journal->j_tail_sequence ==
@@ -966,9 +966,9 @@ out:
 
 	spin_lock(&journal->j_state_lock);
 	if (sb->s_start)
-		journal->j_flags &= ~JFS_FLUSHED;
+		journal->j_flags &= ~JBD2_FLUSHED;
 	else
-		journal->j_flags |= JFS_FLUSHED;
+		journal->j_flags |= JBD2_FLUSHED;
 	spin_unlock(&journal->j_state_lock);
 }
 
@@ -1000,17 +1000,17 @@ static int journal_get_superblock(journal_t *journal)
 
 	err = -EINVAL;
 
-	if (sb->s_header.h_magic != cpu_to_be32(JFS_MAGIC_NUMBER) ||
+	if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) ||
 	    sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
 		printk(KERN_WARNING "JBD: no valid journal superblock found\n");
 		goto out;
 	}
 
 	switch(be32_to_cpu(sb->s_header.h_blocktype)) {
-	case JFS_SUPERBLOCK_V1:
+	case JBD2_SUPERBLOCK_V1:
 		journal->j_format_version = 1;
 		break;
-	case JFS_SUPERBLOCK_V2:
+	case JBD2_SUPERBLOCK_V2:
 		journal->j_format_version = 2;
 		break;
 	default:
@@ -1059,14 +1059,14 @@ static int load_superblock(journal_t *journal)
 
 
 /**
- * int journal_load() - Read journal from disk.
+ * int jbd2_journal_load() - Read journal from disk.
  * @journal: Journal to act on.
  *
  * Given a journal_t structure which tells us which disk blocks contain
  * a journal, read the journal from disk to initialise the in-memory
  * structures.
  */
-int journal_load(journal_t *journal)
+int jbd2_journal_load(journal_t *journal)
 {
 	int err;
 	journal_superblock_t *sb;
@@ -1081,9 +1081,9 @@ int journal_load(journal_t *journal)
 
 	if (journal->j_format_version >= 2) {
 		if ((sb->s_feature_ro_compat &
-		     ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) ||
+		     ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) ||
 		    (sb->s_feature_incompat &
-		     ~cpu_to_be32(JFS_KNOWN_INCOMPAT_FEATURES))) {
+		     ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) {
 			printk (KERN_WARNING
 				"JBD: Unrecognised features on journal\n");
 			return -EINVAL;
@@ -1093,13 +1093,13 @@ int journal_load(journal_t *journal)
 	/*
 	 * Create a slab for this blocksize
 	 */
-	err = journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize));
+	err = jbd2_journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize));
 	if (err)
 		return err;
 
 	/* Let the recovery code check whether it needs to recover any
 	 * data from the journal. */
-	if (journal_recover(journal))
+	if (jbd2_journal_recover(journal))
 		goto recovery_error;
 
 	/* OK, we've finished with the dynamic journal bits:
@@ -1108,8 +1108,8 @@ int journal_load(journal_t *journal)
 	if (journal_reset(journal))
 		goto recovery_error;
 
-	journal->j_flags &= ~JFS_ABORT;
-	journal->j_flags |= JFS_LOADED;
+	journal->j_flags &= ~JBD2_ABORT;
+	journal->j_flags |= JBD2_LOADED;
 	return 0;
 
 recovery_error:
@@ -1118,20 +1118,20 @@ recovery_error:
 }
 
 /**
- * void journal_destroy() - Release a journal_t structure.
+ * void jbd2_journal_destroy() - Release a journal_t structure.
  * @journal: Journal to act on.
  *
  * Release a journal_t structure once it is no longer in use by the
  * journaled object.
  */
-void journal_destroy(journal_t *journal)
+void jbd2_journal_destroy(journal_t *journal)
 {
 	/* Wait for the commit thread to wake up and die. */
 	journal_kill_thread(journal);
 
 	/* Force a final log commit */
 	if (journal->j_running_transaction)
-		journal_commit_transaction(journal);
+		jbd2_journal_commit_transaction(journal);
 
 	/* Force any old transactions to disk */
 
@@ -1139,7 +1139,7 @@ void journal_destroy(journal_t *journal)
 	spin_lock(&journal->j_list_lock);
 	while (journal->j_checkpoint_transactions != NULL) {
 		spin_unlock(&journal->j_list_lock);
-		log_do_checkpoint(journal);
+		jbd2_log_do_checkpoint(journal);
 		spin_lock(&journal->j_list_lock);
 	}
 
@@ -1152,21 +1152,21 @@ void journal_destroy(journal_t *journal)
 	journal->j_tail = 0;
 	journal->j_tail_sequence = ++journal->j_transaction_sequence;
 	if (journal->j_sb_buffer) {
-		journal_update_superblock(journal, 1);
+		jbd2_journal_update_superblock(journal, 1);
 		brelse(journal->j_sb_buffer);
 	}
 
 	if (journal->j_inode)
 		iput(journal->j_inode);
 	if (journal->j_revoke)
-		journal_destroy_revoke(journal);
+		jbd2_journal_destroy_revoke(journal);
 	kfree(journal->j_wbuf);
 	kfree(journal);
 }
 
 
 /**
- *int journal_check_used_features () - Check if features specified are used.
+ *int jbd2_journal_check_used_features () - Check if features specified are used.
  * @journal: Journal to check.
  * @compat: bitmask of compatible features
  * @ro: bitmask of features that force read-only mount
@@ -1176,7 +1176,7 @@ void journal_destroy(journal_t *journal)
  * features.  Return true (non-zero) if it does.
  **/
 
-int journal_check_used_features (journal_t *journal, unsigned long compat,
+int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat,
 				 unsigned long ro, unsigned long incompat)
 {
 	journal_superblock_t *sb;
@@ -1197,7 +1197,7 @@ int journal_check_used_features (journal_t *journal, unsigned long compat,
 }
 
 /**
- * int journal_check_available_features() - Check feature set in journalling layer
+ * int jbd2_journal_check_available_features() - Check feature set in journalling layer
  * @journal: Journal to check.
  * @compat: bitmask of compatible features
  * @ro: bitmask of features that force read-only mount
@@ -1207,7 +1207,7 @@ int journal_check_used_features (journal_t *journal, unsigned long compat,
  * all of a given set of features on this journal.  Return true
  * (non-zero) if it can. */
 
-int journal_check_available_features (journal_t *journal, unsigned long compat,
+int jbd2_journal_check_available_features (journal_t *journal, unsigned long compat,
 				      unsigned long ro, unsigned long incompat)
 {
 	journal_superblock_t *sb;
@@ -1224,16 +1224,16 @@ int journal_check_available_features (journal_t *journal, unsigned long compat,
 	if (journal->j_format_version != 2)
 		return 0;
 
-	if ((compat   & JFS_KNOWN_COMPAT_FEATURES) == compat &&
-	    (ro       & JFS_KNOWN_ROCOMPAT_FEATURES) == ro &&
-	    (incompat & JFS_KNOWN_INCOMPAT_FEATURES) == incompat)
+	if ((compat   & JBD2_KNOWN_COMPAT_FEATURES) == compat &&
+	    (ro       & JBD2_KNOWN_ROCOMPAT_FEATURES) == ro &&
+	    (incompat & JBD2_KNOWN_INCOMPAT_FEATURES) == incompat)
 		return 1;
 
 	return 0;
 }
 
 /**
- * int journal_set_features () - Mark a given journal feature in the superblock
+ * int jbd2_journal_set_features () - Mark a given journal feature in the superblock
  * @journal: Journal to act on.
  * @compat: bitmask of compatible features
  * @ro: bitmask of features that force read-only mount
@@ -1244,15 +1244,15 @@ int journal_check_available_features (journal_t *journal, unsigned long compat,
  *
  */
 
-int journal_set_features (journal_t *journal, unsigned long compat,
+int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
 			  unsigned long ro, unsigned long incompat)
 {
 	journal_superblock_t *sb;
 
-	if (journal_check_used_features(journal, compat, ro, incompat))
+	if (jbd2_journal_check_used_features(journal, compat, ro, incompat))
 		return 1;
 
-	if (!journal_check_available_features(journal, compat, ro, incompat))
+	if (!jbd2_journal_check_available_features(journal, compat, ro, incompat))
 		return 0;
 
 	jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n",
@@ -1269,13 +1269,13 @@ int journal_set_features (journal_t *journal, unsigned long compat,
 
 
 /**
- * int journal_update_format () - Update on-disk journal structure.
+ * int jbd2_journal_update_format () - Update on-disk journal structure.
  * @journal: Journal to act on.
  *
  * Given an initialised but unloaded journal struct, poke about in the
  * on-disk structure to update it to the most recent supported version.
  */
-int journal_update_format (journal_t *journal)
+int jbd2_journal_update_format (journal_t *journal)
 {
 	journal_superblock_t *sb;
 	int err;
@@ -1287,9 +1287,9 @@ int journal_update_format (journal_t *journal)
 	sb = journal->j_superblock;
 
 	switch (be32_to_cpu(sb->s_header.h_blocktype)) {
-	case JFS_SUPERBLOCK_V2:
+	case JBD2_SUPERBLOCK_V2:
 		return 0;
-	case JFS_SUPERBLOCK_V1:
+	case JBD2_SUPERBLOCK_V1:
 		return journal_convert_superblock_v1(journal, sb);
 	default:
 		break;
@@ -1312,7 +1312,7 @@ static int journal_convert_superblock_v1(journal_t *journal,
 	memset(&sb->s_feature_compat, 0, blocksize-offset);
 
 	sb->s_nr_users = cpu_to_be32(1);
-	sb->s_header.h_blocktype = cpu_to_be32(JFS_SUPERBLOCK_V2);
+	sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2);
 	journal->j_format_version = 2;
 
 	bh = journal->j_sb_buffer;
@@ -1324,7 +1324,7 @@ static int journal_convert_superblock_v1(journal_t *journal,
 
 
 /**
- * int journal_flush () - Flush journal
+ * int jbd2_journal_flush () - Flush journal
  * @journal: Journal to act on.
  *
  * Flush all data for a given journal to disk and empty the journal.
@@ -1332,7 +1332,7 @@ static int journal_convert_superblock_v1(journal_t *journal,
  * recovery does not need to happen on remount.
  */
 
-int journal_flush(journal_t *journal)
+int jbd2_journal_flush(journal_t *journal)
 {
 	int err = 0;
 	transaction_t *transaction = NULL;
@@ -1343,7 +1343,7 @@ int journal_flush(journal_t *journal)
 	/* Force everything buffered to the log... */
 	if (journal->j_running_transaction) {
 		transaction = journal->j_running_transaction;
-		__log_start_commit(journal, transaction->t_tid);
+		__jbd2_log_start_commit(journal, transaction->t_tid);
 	} else if (journal->j_committing_transaction)
 		transaction = journal->j_committing_transaction;
 
@@ -1352,7 +1352,7 @@ int journal_flush(journal_t *journal)
 		tid_t tid = transaction->t_tid;
 
 		spin_unlock(&journal->j_state_lock);
-		log_wait_commit(journal, tid);
+		jbd2_log_wait_commit(journal, tid);
 	} else {
 		spin_unlock(&journal->j_state_lock);
 	}
@@ -1361,11 +1361,11 @@ int journal_flush(journal_t *journal)
 	spin_lock(&journal->j_list_lock);
 	while (!err && journal->j_checkpoint_transactions != NULL) {
 		spin_unlock(&journal->j_list_lock);
-		err = log_do_checkpoint(journal);
+		err = jbd2_log_do_checkpoint(journal);
 		spin_lock(&journal->j_list_lock);
 	}
 	spin_unlock(&journal->j_list_lock);
-	cleanup_journal_tail(journal);
+	jbd2_cleanup_journal_tail(journal);
 
 	/* Finally, mark the journal as really needing no recovery.
 	 * This sets s_start==0 in the underlying superblock, which is
@@ -1376,7 +1376,7 @@ int journal_flush(journal_t *journal)
 	old_tail = journal->j_tail;
 	journal->j_tail = 0;
 	spin_unlock(&journal->j_state_lock);
-	journal_update_superblock(journal, 1);
+	jbd2_journal_update_superblock(journal, 1);
 	spin_lock(&journal->j_state_lock);
 	journal->j_tail = old_tail;
 
@@ -1390,24 +1390,24 @@ int journal_flush(journal_t *journal)
 }
 
 /**
- * int journal_wipe() - Wipe journal contents
+ * int jbd2_journal_wipe() - Wipe journal contents
  * @journal: Journal to act on.
  * @write: flag (see below)
  *
  * Wipe out all of the contents of a journal, safely.  This will produce
  * a warning if the journal contains any valid recovery information.
- * Must be called between journal_init_*() and journal_load().
+ * Must be called between journal_init_*() and jbd2_journal_load().
  *
  * If 'write' is non-zero, then we wipe out the journal on disk; otherwise
  * we merely suppress recovery.
  */
 
-int journal_wipe(journal_t *journal, int write)
+int jbd2_journal_wipe(journal_t *journal, int write)
 {
 	journal_superblock_t *sb;
 	int err = 0;
 
-	J_ASSERT (!(journal->j_flags & JFS_LOADED));
+	J_ASSERT (!(journal->j_flags & JBD2_LOADED));
 
 	err = load_superblock(journal);
 	if (err)
@@ -1421,9 +1421,9 @@ int journal_wipe(journal_t *journal, int write)
 	printk (KERN_WARNING "JBD: %s recovery information on journal\n",
 		write ? "Clearing" : "Ignoring");
 
-	err = journal_skip_recovery(journal);
+	err = jbd2_journal_skip_recovery(journal);
 	if (write)
-		journal_update_superblock(journal, 1);
+		jbd2_journal_update_superblock(journal, 1);
 
  no_recovery:
 	return err;
@@ -1459,22 +1459,22 @@ static const char *journal_dev_name(journal_t *journal, char *buffer)
  * Aborts hard --- we mark the abort as occurred, but do _nothing_ else,
  * and don't attempt to make any other journal updates.
  */
-void __journal_abort_hard(journal_t *journal)
+void __jbd2_journal_abort_hard(journal_t *journal)
 {
 	transaction_t *transaction;
 	char b[BDEVNAME_SIZE];
 
-	if (journal->j_flags & JFS_ABORT)
+	if (journal->j_flags & JBD2_ABORT)
 		return;
 
 	printk(KERN_ERR "Aborting journal on device %s.\n",
 		journal_dev_name(journal, b));
 
 	spin_lock(&journal->j_state_lock);
-	journal->j_flags |= JFS_ABORT;
+	journal->j_flags |= JBD2_ABORT;
 	transaction = journal->j_running_transaction;
 	if (transaction)
-		__log_start_commit(journal, transaction->t_tid);
+		__jbd2_log_start_commit(journal, transaction->t_tid);
 	spin_unlock(&journal->j_state_lock);
 }
 
@@ -1482,20 +1482,20 @@ void __journal_abort_hard(journal_t *journal)
  * but don't do any other IO. */
 static void __journal_abort_soft (journal_t *journal, int errno)
 {
-	if (journal->j_flags & JFS_ABORT)
+	if (journal->j_flags & JBD2_ABORT)
 		return;
 
 	if (!journal->j_errno)
 		journal->j_errno = errno;
 
-	__journal_abort_hard(journal);
+	__jbd2_journal_abort_hard(journal);
 
 	if (errno)
-		journal_update_superblock(journal, 1);
+		jbd2_journal_update_superblock(journal, 1);
 }
 
 /**
- * void journal_abort () - Shutdown the journal immediately.
+ * void jbd2_journal_abort () - Shutdown the journal immediately.
  * @journal: the journal to shutdown.
  * @errno:   an error number to record in the journal indicating
  *           the reason for the shutdown.
@@ -1504,7 +1504,7 @@ static void __journal_abort_soft (journal_t *journal, int errno)
  * journal (not of a single transaction).  This operation cannot be
  * undone without closing and reopening the journal.
  *
- * The journal_abort function is intended to support higher level error
+ * The jbd2_journal_abort function is intended to support higher level error
  * recovery mechanisms such as the ext2/ext3 remount-readonly error
  * mode.
  *
@@ -1520,13 +1520,13 @@ static void __journal_abort_soft (journal_t *journal, int errno)
  *
  * Any attempt to get a new transaction handle on a journal which is in
  * ABORT state will just result in an -EROFS error return.  A
- * journal_stop on an existing handle will return -EIO if we have
+ * jbd2_journal_stop on an existing handle will return -EIO if we have
  * entered abort state during the update.
  *
  * Recursive transactions are not disturbed by journal abort until the
- * final journal_stop, which will receive the -EIO error.
+ * final jbd2_journal_stop, which will receive the -EIO error.
  *
- * Finally, the journal_abort call allows the caller to supply an errno
+ * Finally, the jbd2_journal_abort call allows the caller to supply an errno
  * which will be recorded (if possible) in the journal superblock.  This
  * allows a client to record failure conditions in the middle of a
  * transaction without having to complete the transaction to record the
@@ -1540,28 +1540,28 @@ static void __journal_abort_soft (journal_t *journal, int errno)
  *
  */
 
-void journal_abort(journal_t *journal, int errno)
+void jbd2_journal_abort(journal_t *journal, int errno)
 {
 	__journal_abort_soft(journal, errno);
 }
 
 /**
- * int journal_errno () - returns the journal's error state.
+ * int jbd2_journal_errno () - returns the journal's error state.
  * @journal: journal to examine.
  *
- * This is the errno numbet set with journal_abort(), the last
+ * This is the errno numbet set with jbd2_journal_abort(), the last
  * time the journal was mounted - if the journal was stopped
  * without calling abort this will be 0.
  *
  * If the journal has been aborted on this mount time -EROFS will
  * be returned.
  */
-int journal_errno(journal_t *journal)
+int jbd2_journal_errno(journal_t *journal)
 {
 	int err;
 
 	spin_lock(&journal->j_state_lock);
-	if (journal->j_flags & JFS_ABORT)
+	if (journal->j_flags & JBD2_ABORT)
 		err = -EROFS;
 	else
 		err = journal->j_errno;
@@ -1570,18 +1570,18 @@ int journal_errno(journal_t *journal)
 }
 
 /**
- * int journal_clear_err () - clears the journal's error state
+ * int jbd2_journal_clear_err () - clears the journal's error state
  * @journal: journal to act on.
  *
  * An error must be cleared or Acked to take a FS out of readonly
  * mode.
  */
-int journal_clear_err(journal_t *journal)
+int jbd2_journal_clear_err(journal_t *journal)
 {
 	int err = 0;
 
 	spin_lock(&journal->j_state_lock);
-	if (journal->j_flags & JFS_ABORT)
+	if (journal->j_flags & JBD2_ABORT)
 		err = -EROFS;
 	else
 		journal->j_errno = 0;
@@ -1590,21 +1590,21 @@ int journal_clear_err(journal_t *journal)
 }
 
 /**
- * void journal_ack_err() - Ack journal err.
+ * void jbd2_journal_ack_err() - Ack journal err.
  * @journal: journal to act on.
  *
  * An error must be cleared or Acked to take a FS out of readonly
  * mode.
  */
-void journal_ack_err(journal_t *journal)
+void jbd2_journal_ack_err(journal_t *journal)
 {
 	spin_lock(&journal->j_state_lock);
 	if (journal->j_errno)
-		journal->j_flags |= JFS_ACK_ERR;
+		journal->j_flags |= JBD2_ACK_ERR;
 	spin_unlock(&journal->j_state_lock);
 }
 
-int journal_blocks_per_page(struct inode *inode)
+int jbd2_journal_blocks_per_page(struct inode *inode)
 {
 	return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
 }
@@ -1613,7 +1613,7 @@ int journal_blocks_per_page(struct inode *inode)
  * Simple support for retrying memory allocations.  Introduced to help to
  * debug different VM deadlock avoidance strategies.
  */
-void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
+void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
 {
 	return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0));
 }
@@ -1634,7 +1634,7 @@ static const char *jbd_slab_names[JBD_MAX_SLABS] = {
 	"jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k"
 };
 
-static void journal_destroy_jbd_slabs(void)
+static void jbd2_journal_destroy_jbd_slabs(void)
 {
 	int i;
 
@@ -1645,7 +1645,7 @@ static void journal_destroy_jbd_slabs(void)
 	}
 }
 
-static int journal_create_jbd_slab(size_t slab_size)
+static int jbd2_journal_create_jbd_slab(size_t slab_size)
 {
 	int i = JBD_SLAB_INDEX(slab_size);
 
@@ -1671,7 +1671,7 @@ static int journal_create_jbd_slab(size_t slab_size)
 	return 0;
 }
 
-void * jbd_slab_alloc(size_t size, gfp_t flags)
+void * jbd2_slab_alloc(size_t size, gfp_t flags)
 {
 	int idx;
 
@@ -1680,7 +1680,7 @@ void * jbd_slab_alloc(size_t size, gfp_t flags)
 	return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
 }
 
-void jbd_slab_free(void *ptr,  size_t size)
+void jbd2_slab_free(void *ptr,  size_t size)
 {
 	int idx;
 
@@ -1692,35 +1692,35 @@ void jbd_slab_free(void *ptr,  size_t size)
 /*
  * Journal_head storage management
  */
-static kmem_cache_t *journal_head_cache;
+static kmem_cache_t *jbd2_journal_head_cache;
 #ifdef CONFIG_JBD_DEBUG
 static atomic_t nr_journal_heads = ATOMIC_INIT(0);
 #endif
 
-static int journal_init_journal_head_cache(void)
+static int journal_init_jbd2_journal_head_cache(void)
 {
 	int retval;
 
-	J_ASSERT(journal_head_cache == 0);
-	journal_head_cache = kmem_cache_create("journal_head",
+	J_ASSERT(jbd2_journal_head_cache == 0);
+	jbd2_journal_head_cache = kmem_cache_create("journal_head",
 				sizeof(struct journal_head),
 				0,		/* offset */
 				0,		/* flags */
 				NULL,		/* ctor */
 				NULL);		/* dtor */
 	retval = 0;
-	if (journal_head_cache == 0) {
+	if (jbd2_journal_head_cache == 0) {
 		retval = -ENOMEM;
 		printk(KERN_EMERG "JBD: no memory for journal_head cache\n");
 	}
 	return retval;
 }
 
-static void journal_destroy_journal_head_cache(void)
+static void jbd2_journal_destroy_jbd2_journal_head_cache(void)
 {
-	J_ASSERT(journal_head_cache != NULL);
-	kmem_cache_destroy(journal_head_cache);
-	journal_head_cache = NULL;
+	J_ASSERT(jbd2_journal_head_cache != NULL);
+	kmem_cache_destroy(jbd2_journal_head_cache);
+	jbd2_journal_head_cache = NULL;
 }
 
 /*
@@ -1734,7 +1734,7 @@ static struct journal_head *journal_alloc_journal_head(void)
 #ifdef CONFIG_JBD_DEBUG
 	atomic_inc(&nr_journal_heads);
 #endif
-	ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
+	ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS);
 	if (ret == 0) {
 		jbd_debug(1, "out of memory for journal_head\n");
 		if (time_after(jiffies, last_warning + 5*HZ)) {
@@ -1744,7 +1744,7 @@ static struct journal_head *journal_alloc_journal_head(void)
 		}
 		while (ret == 0) {
 			yield();
-			ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
+			ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS);
 		}
 	}
 	return ret;
@@ -1756,7 +1756,7 @@ static void journal_free_journal_head(struct journal_head *jh)
 	atomic_dec(&nr_journal_heads);
 	memset(jh, JBD_POISON_FREE, sizeof(*jh));
 #endif
-	kmem_cache_free(journal_head_cache, jh);
+	kmem_cache_free(jbd2_journal_head_cache, jh);
 }
 
 /*
@@ -1775,22 +1775,22 @@ static void journal_free_journal_head(struct journal_head *jh)
  *
  * A journal_head may be detached from its buffer_head when the journal_head's
  * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL.
- * Various places in JBD call journal_remove_journal_head() to indicate that the
+ * Various places in JBD call jbd2_journal_remove_journal_head() to indicate that the
  * journal_head can be dropped if needed.
  *
  * Various places in the kernel want to attach a journal_head to a buffer_head
  * _before_ attaching the journal_head to a transaction.  To protect the
- * journal_head in this situation, journal_add_journal_head elevates the
+ * journal_head in this situation, jbd2_journal_add_journal_head elevates the
  * journal_head's b_jcount refcount by one.  The caller must call
- * journal_put_journal_head() to undo this.
+ * jbd2_journal_put_journal_head() to undo this.
  *
  * So the typical usage would be:
  *
  *	(Attach a journal_head if needed.  Increments b_jcount)
- *	struct journal_head *jh = journal_add_journal_head(bh);
+ *	struct journal_head *jh = jbd2_journal_add_journal_head(bh);
  *	...
  *	jh->b_transaction = xxx;
- *	journal_put_journal_head(jh);
+ *	jbd2_journal_put_journal_head(jh);
  *
  * Now, the journal_head's b_jcount is zero, but it is safe from being released
  * because it has a non-zero b_transaction.
@@ -1802,7 +1802,7 @@ static void journal_free_journal_head(struct journal_head *jh)
  * Doesn't need the journal lock.
  * May sleep.
  */
-struct journal_head *journal_add_journal_head(struct buffer_head *bh)
+struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh)
 {
 	struct journal_head *jh;
 	struct journal_head *new_jh = NULL;
@@ -1845,7 +1845,7 @@ repeat:
  * Grab a ref against this buffer_head's journal_head.  If it ended up not
  * having a journal_head, return NULL
  */
-struct journal_head *journal_grab_journal_head(struct buffer_head *bh)
+struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh)
 {
 	struct journal_head *jh = NULL;
 
@@ -1877,13 +1877,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
 				printk(KERN_WARNING "%s: freeing "
 						"b_frozen_data\n",
 						__FUNCTION__);
-				jbd_slab_free(jh->b_frozen_data, bh->b_size);
+				jbd2_slab_free(jh->b_frozen_data, bh->b_size);
 			}
 			if (jh->b_committed_data) {
 				printk(KERN_WARNING "%s: freeing "
 						"b_committed_data\n",
 						__FUNCTION__);
-				jbd_slab_free(jh->b_committed_data, bh->b_size);
+				jbd2_slab_free(jh->b_committed_data, bh->b_size);
 			}
 			bh->b_private = NULL;
 			jh->b_bh = NULL;	/* debug, really */
@@ -1897,7 +1897,7 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
 }
 
 /*
- * journal_remove_journal_head(): if the buffer isn't attached to a transaction
+ * jbd2_journal_remove_journal_head(): if the buffer isn't attached to a transaction
  * and has a zero b_jcount then remove and release its journal_head.   If we did
  * see that the buffer is not used by any transaction we also "logically"
  * decrement ->b_count.
@@ -1905,11 +1905,11 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
  * We in fact take an additional increment on ->b_count as a convenience,
  * because the caller usually wants to do additional things with the bh
  * after calling here.
- * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
+ * The caller of jbd2_journal_remove_journal_head() *must* run __brelse(bh) at some
  * time.  Once the caller has run __brelse(), the buffer is eligible for
  * reaping by try_to_free_buffers().
  */
-void journal_remove_journal_head(struct buffer_head *bh)
+void jbd2_journal_remove_journal_head(struct buffer_head *bh)
 {
 	jbd_lock_bh_journal_head(bh);
 	__journal_remove_journal_head(bh);
@@ -1920,7 +1920,7 @@ void journal_remove_journal_head(struct buffer_head *bh)
  * Drop a reference on the passed journal_head.  If it fell to zero then try to
  * release the journal_head from the buffer_head.
  */
-void journal_put_journal_head(struct journal_head *jh)
+void jbd2_journal_put_journal_head(struct journal_head *jh)
 {
 	struct buffer_head *bh = jh2bh(jh);
 
@@ -1938,8 +1938,8 @@ void journal_put_journal_head(struct journal_head *jh)
  * /proc tunables
  */
 #if defined(CONFIG_JBD_DEBUG)
-int journal_enable_debug;
-EXPORT_SYMBOL(journal_enable_debug);
+int jbd2_journal_enable_debug;
+EXPORT_SYMBOL(jbd2_journal_enable_debug);
 #endif
 
 #if defined(CONFIG_JBD_DEBUG) && defined(CONFIG_PROC_FS)
@@ -1951,7 +1951,7 @@ static int read_jbd_debug(char *page, char **start, off_t off,
 {
 	int ret;
 
-	ret = sprintf(page + off, "%d\n", journal_enable_debug);
+	ret = sprintf(page + off, "%d\n", jbd2_journal_enable_debug);
 	*eof = 1;
 	return ret;
 }
@@ -1966,11 +1966,11 @@ static int write_jbd_debug(struct file *file, const char __user *buffer,
 	if (copy_from_user(buf, buffer, count))
 		return -EFAULT;
 	buf[ARRAY_SIZE(buf) - 1] = '\0';
-	journal_enable_debug = simple_strtoul(buf, NULL, 10);
+	jbd2_journal_enable_debug = simple_strtoul(buf, NULL, 10);
 	return count;
 }
 
-#define JBD_PROC_NAME "sys/fs/jbd-debug"
+#define JBD_PROC_NAME "sys/fs/jbd2-debug"
 
 static void __init create_jbd_proc_entry(void)
 {
@@ -1982,7 +1982,7 @@ static void __init create_jbd_proc_entry(void)
 	}
 }
 
-static void __exit remove_jbd_proc_entry(void)
+static void __exit jbd2_remove_jbd_proc_entry(void)
 {
 	if (proc_jbd_debug)
 		remove_proc_entry(JBD_PROC_NAME, NULL);
@@ -1991,31 +1991,31 @@ static void __exit remove_jbd_proc_entry(void)
 #else
 
 #define create_jbd_proc_entry() do {} while (0)
-#define remove_jbd_proc_entry() do {} while (0)
+#define jbd2_remove_jbd_proc_entry() do {} while (0)
 
 #endif
 
-kmem_cache_t *jbd_handle_cache;
+kmem_cache_t *jbd2_handle_cache;
 
 static int __init journal_init_handle_cache(void)
 {
-	jbd_handle_cache = kmem_cache_create("journal_handle",
+	jbd2_handle_cache = kmem_cache_create("journal_handle",
 				sizeof(handle_t),
 				0,		/* offset */
 				0,		/* flags */
 				NULL,		/* ctor */
 				NULL);		/* dtor */
-	if (jbd_handle_cache == NULL) {
+	if (jbd2_handle_cache == NULL) {
 		printk(KERN_EMERG "JBD: failed to create handle cache\n");
 		return -ENOMEM;
 	}
 	return 0;
 }
 
-static void journal_destroy_handle_cache(void)
+static void jbd2_journal_destroy_handle_cache(void)
 {
-	if (jbd_handle_cache)
-		kmem_cache_destroy(jbd_handle_cache);
+	if (jbd2_handle_cache)
+		kmem_cache_destroy(jbd2_handle_cache);
 }
 
 /*
@@ -2026,20 +2026,20 @@ static int __init journal_init_caches(void)
 {
 	int ret;
 
-	ret = journal_init_revoke_caches();
+	ret = jbd2_journal_init_revoke_caches();
 	if (ret == 0)
-		ret = journal_init_journal_head_cache();
+		ret = journal_init_jbd2_journal_head_cache();
 	if (ret == 0)
 		ret = journal_init_handle_cache();
 	return ret;
 }
 
-static void journal_destroy_caches(void)
+static void jbd2_journal_destroy_caches(void)
 {
-	journal_destroy_revoke_caches();
-	journal_destroy_journal_head_cache();
-	journal_destroy_handle_cache();
-	journal_destroy_jbd_slabs();
+	jbd2_journal_destroy_revoke_caches();
+	jbd2_journal_destroy_jbd2_journal_head_cache();
+	jbd2_journal_destroy_handle_cache();
+	jbd2_journal_destroy_jbd_slabs();
 }
 
 static int __init journal_init(void)
@@ -2050,7 +2050,7 @@ static int __init journal_init(void)
 
 	ret = journal_init_caches();
 	if (ret != 0)
-		journal_destroy_caches();
+		jbd2_journal_destroy_caches();
 	create_jbd_proc_entry();
 	return ret;
 }
@@ -2062,8 +2062,8 @@ static void __exit journal_exit(void)
 	if (n)
 		printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
 #endif
-	remove_jbd_proc_entry();
-	journal_destroy_caches();
+	jbd2_remove_jbd_proc_entry();
+	jbd2_journal_destroy_caches();
 }
 
 MODULE_LICENSE("GPL");
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 11563fe2a52b..b2012d112432 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -18,7 +18,7 @@
 #else
 #include <linux/time.h>
 #include <linux/fs.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
 #endif
@@ -86,7 +86,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
 	nbufs = 0;
 
 	for (next = start; next < max; next++) {
-		err = journal_bmap(journal, next, &blocknr);
+		err = jbd2_journal_bmap(journal, next, &blocknr);
 
 		if (err) {
 			printk (KERN_ERR "JBD: bad block at offset %u\n",
@@ -142,7 +142,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
 		return -EIO;
 	}
 
-	err = journal_bmap(journal, offset, &blocknr);
+	err = jbd2_journal_bmap(journal, offset, &blocknr);
 
 	if (err) {
 		printk (KERN_ERR "JBD: bad block at offset %u\n",
@@ -191,10 +191,10 @@ static int count_tags(struct buffer_head *bh, int size)
 
 		nr++;
 		tagp += sizeof(journal_block_tag_t);
-		if (!(tag->t_flags & cpu_to_be32(JFS_FLAG_SAME_UUID)))
+		if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID)))
 			tagp += 16;
 
-		if (tag->t_flags & cpu_to_be32(JFS_FLAG_LAST_TAG))
+		if (tag->t_flags & cpu_to_be32(JBD2_FLAG_LAST_TAG))
 			break;
 	}
 
@@ -210,7 +210,7 @@ do {									\
 } while (0)
 
 /**
- * journal_recover - recovers a on-disk journal
+ * jbd2_journal_recover - recovers a on-disk journal
  * @journal: the journal to recover
  *
  * The primary function for recovering the log contents when mounting a
@@ -221,7 +221,7 @@ do {									\
  * blocks.  In the third and final pass, we replay any un-revoked blocks
  * in the log.
  */
-int journal_recover(journal_t *journal)
+int jbd2_journal_recover(journal_t *journal)
 {
 	int			err;
 	journal_superblock_t *	sb;
@@ -260,13 +260,13 @@ int journal_recover(journal_t *journal)
 	 * any existing commit records in the log. */
 	journal->j_transaction_sequence = ++info.end_transaction;
 
-	journal_clear_revoke(journal);
+	jbd2_journal_clear_revoke(journal);
 	sync_blockdev(journal->j_fs_dev);
 	return err;
 }
 
 /**
- * journal_skip_recovery - Start journal and wipe exiting records
+ * jbd2_journal_skip_recovery - Start journal and wipe exiting records
  * @journal: journal to startup
  *
  * Locate any valid recovery information from the journal and set up the
@@ -278,7 +278,7 @@ int journal_recover(journal_t *journal)
  * much recovery information is being erased, and to let us initialise
  * the journal transaction sequence numbers to the next unused ID.
  */
-int journal_skip_recovery(journal_t *journal)
+int jbd2_journal_skip_recovery(journal_t *journal)
 {
 	int			err;
 	journal_superblock_t *	sb;
@@ -387,7 +387,7 @@ static int do_one_pass(journal_t *journal,
 
 		tmp = (journal_header_t *)bh->b_data;
 
-		if (tmp->h_magic != cpu_to_be32(JFS_MAGIC_NUMBER)) {
+		if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) {
 			brelse(bh);
 			break;
 		}
@@ -407,7 +407,7 @@ static int do_one_pass(journal_t *journal,
 		 * to do with it?  That depends on the pass... */
 
 		switch(blocktype) {
-		case JFS_DESCRIPTOR_BLOCK:
+		case JBD2_DESCRIPTOR_BLOCK:
 			/* If it is a valid descriptor block, replay it
 			 * in pass REPLAY; otherwise, just skip over the
 			 * blocks it describes. */
@@ -451,7 +451,7 @@ static int do_one_pass(journal_t *journal,
 					/* If the block has been
 					 * revoked, then we're all done
 					 * here. */
-					if (journal_test_revoke
+					if (jbd2_journal_test_revoke
 					    (journal, blocknr,
 					     next_commit_ID)) {
 						brelse(obh);
@@ -477,9 +477,9 @@ static int do_one_pass(journal_t *journal,
 					lock_buffer(nbh);
 					memcpy(nbh->b_data, obh->b_data,
 							journal->j_blocksize);
-					if (flags & JFS_FLAG_ESCAPE) {
+					if (flags & JBD2_FLAG_ESCAPE) {
 						*((__be32 *)bh->b_data) =
-						cpu_to_be32(JFS_MAGIC_NUMBER);
+						cpu_to_be32(JBD2_MAGIC_NUMBER);
 					}
 
 					BUFFER_TRACE(nbh, "marking dirty");
@@ -495,17 +495,17 @@ static int do_one_pass(journal_t *journal,
 
 			skip_write:
 				tagp += sizeof(journal_block_tag_t);
-				if (!(flags & JFS_FLAG_SAME_UUID))
+				if (!(flags & JBD2_FLAG_SAME_UUID))
 					tagp += 16;
 
-				if (flags & JFS_FLAG_LAST_TAG)
+				if (flags & JBD2_FLAG_LAST_TAG)
 					break;
 			}
 
 			brelse(bh);
 			continue;
 
-		case JFS_COMMIT_BLOCK:
+		case JBD2_COMMIT_BLOCK:
 			/* Found an expected commit block: not much to
 			 * do other than move on to the next sequence
 			 * number. */
@@ -513,7 +513,7 @@ static int do_one_pass(journal_t *journal,
 			next_commit_ID++;
 			continue;
 
-		case JFS_REVOKE_BLOCK:
+		case JBD2_REVOKE_BLOCK:
 			/* If we aren't in the REVOKE pass, then we can
 			 * just skip over this block. */
 			if (pass != PASS_REVOKE) {
@@ -570,11 +570,11 @@ static int do_one_pass(journal_t *journal,
 static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
 			       tid_t sequence, struct recovery_info *info)
 {
-	journal_revoke_header_t *header;
+	jbd2_journal_revoke_header_t *header;
 	int offset, max;
 
-	header = (journal_revoke_header_t *) bh->b_data;
-	offset = sizeof(journal_revoke_header_t);
+	header = (jbd2_journal_revoke_header_t *) bh->b_data;
+	offset = sizeof(jbd2_journal_revoke_header_t);
 	max = be32_to_cpu(header->r_count);
 
 	while (offset < max) {
@@ -583,7 +583,7 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
 
 		blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
 		offset += 4;
-		err = journal_set_revoke(journal, blocknr, sequence);
+		err = jbd2_journal_set_revoke(journal, blocknr, sequence);
 		if (err)
 			return err;
 		++info->nr_revokes;
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index c532429d8d9b..2fccddc7acad 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -62,7 +62,7 @@
 #else
 #include <linux/time.h>
 #include <linux/fs.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/list.h>
@@ -70,14 +70,14 @@
 #include <linux/init.h>
 #endif
 
-static kmem_cache_t *revoke_record_cache;
-static kmem_cache_t *revoke_table_cache;
+static kmem_cache_t *jbd2_revoke_record_cache;
+static kmem_cache_t *jbd2_revoke_table_cache;
 
 /* Each revoke record represents one single revoked block.  During
    journal replay, this involves recording the transaction ID of the
    last transaction to revoke this block. */
 
-struct jbd_revoke_record_s
+struct jbd2_revoke_record_s
 {
 	struct list_head  hash;
 	tid_t		  sequence;	/* Used for recovery only */
@@ -86,7 +86,7 @@ struct jbd_revoke_record_s
 
 
 /* The revoke table is just a simple hash table of revoke records. */
-struct jbd_revoke_table_s
+struct jbd2_revoke_table_s
 {
 	/* It is conceivable that we might want a larger hash table
 	 * for recovery.  Must be a power of two. */
@@ -99,7 +99,7 @@ struct jbd_revoke_table_s
 #ifdef __KERNEL__
 static void write_one_revoke_record(journal_t *, transaction_t *,
 				    struct journal_head **, int *,
-				    struct jbd_revoke_record_s *);
+				    struct jbd2_revoke_record_s *);
 static void flush_descriptor(journal_t *, struct journal_head *, int);
 #endif
 
@@ -108,7 +108,7 @@ static void flush_descriptor(journal_t *, struct journal_head *, int);
 /* Borrowed from buffer.c: this is a tried and tested block hash function */
 static inline int hash(journal_t *journal, unsigned long block)
 {
-	struct jbd_revoke_table_s *table = journal->j_revoke;
+	struct jbd2_revoke_table_s *table = journal->j_revoke;
 	int hash_shift = table->hash_shift;
 
 	return ((block << (hash_shift - 6)) ^
@@ -120,10 +120,10 @@ static int insert_revoke_hash(journal_t *journal, unsigned long blocknr,
 			      tid_t seq)
 {
 	struct list_head *hash_list;
-	struct jbd_revoke_record_s *record;
+	struct jbd2_revoke_record_s *record;
 
 repeat:
-	record = kmem_cache_alloc(revoke_record_cache, GFP_NOFS);
+	record = kmem_cache_alloc(jbd2_revoke_record_cache, GFP_NOFS);
 	if (!record)
 		goto oom;
 
@@ -145,57 +145,57 @@ oom:
 
 /* Find a revoke record in the journal's hash table. */
 
-static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal,
+static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal,
 						      unsigned long blocknr)
 {
 	struct list_head *hash_list;
-	struct jbd_revoke_record_s *record;
+	struct jbd2_revoke_record_s *record;
 
 	hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)];
 
 	spin_lock(&journal->j_revoke_lock);
-	record = (struct jbd_revoke_record_s *) hash_list->next;
+	record = (struct jbd2_revoke_record_s *) hash_list->next;
 	while (&(record->hash) != hash_list) {
 		if (record->blocknr == blocknr) {
 			spin_unlock(&journal->j_revoke_lock);
 			return record;
 		}
-		record = (struct jbd_revoke_record_s *) record->hash.next;
+		record = (struct jbd2_revoke_record_s *) record->hash.next;
 	}
 	spin_unlock(&journal->j_revoke_lock);
 	return NULL;
 }
 
-int __init journal_init_revoke_caches(void)
+int __init jbd2_journal_init_revoke_caches(void)
 {
-	revoke_record_cache = kmem_cache_create("revoke_record",
-					   sizeof(struct jbd_revoke_record_s),
+	jbd2_revoke_record_cache = kmem_cache_create("revoke_record",
+					   sizeof(struct jbd2_revoke_record_s),
 					   0, SLAB_HWCACHE_ALIGN, NULL, NULL);
-	if (revoke_record_cache == 0)
+	if (jbd2_revoke_record_cache == 0)
 		return -ENOMEM;
 
-	revoke_table_cache = kmem_cache_create("revoke_table",
-					   sizeof(struct jbd_revoke_table_s),
+	jbd2_revoke_table_cache = kmem_cache_create("revoke_table",
+					   sizeof(struct jbd2_revoke_table_s),
 					   0, 0, NULL, NULL);
-	if (revoke_table_cache == 0) {
-		kmem_cache_destroy(revoke_record_cache);
-		revoke_record_cache = NULL;
+	if (jbd2_revoke_table_cache == 0) {
+		kmem_cache_destroy(jbd2_revoke_record_cache);
+		jbd2_revoke_record_cache = NULL;
 		return -ENOMEM;
 	}
 	return 0;
 }
 
-void journal_destroy_revoke_caches(void)
+void jbd2_journal_destroy_revoke_caches(void)
 {
-	kmem_cache_destroy(revoke_record_cache);
-	revoke_record_cache = NULL;
-	kmem_cache_destroy(revoke_table_cache);
-	revoke_table_cache = NULL;
+	kmem_cache_destroy(jbd2_revoke_record_cache);
+	jbd2_revoke_record_cache = NULL;
+	kmem_cache_destroy(jbd2_revoke_table_cache);
+	jbd2_revoke_table_cache = NULL;
 }
 
 /* Initialise the revoke table for a given journal to a given size. */
 
-int journal_init_revoke(journal_t *journal, int hash_size)
+int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
 {
 	int shift, tmp;
 
@@ -206,7 +206,7 @@ int journal_init_revoke(journal_t *journal, int hash_size)
 	while((tmp >>= 1UL) != 0UL)
 		shift++;
 
-	journal->j_revoke_table[0] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
+	journal->j_revoke_table[0] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL);
 	if (!journal->j_revoke_table[0])
 		return -ENOMEM;
 	journal->j_revoke = journal->j_revoke_table[0];
@@ -221,7 +221,7 @@ int journal_init_revoke(journal_t *journal, int hash_size)
 	journal->j_revoke->hash_table =
 		kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
 	if (!journal->j_revoke->hash_table) {
-		kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
+		kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]);
 		journal->j_revoke = NULL;
 		return -ENOMEM;
 	}
@@ -229,10 +229,10 @@ int journal_init_revoke(journal_t *journal, int hash_size)
 	for (tmp = 0; tmp < hash_size; tmp++)
 		INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
 
-	journal->j_revoke_table[1] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
+	journal->j_revoke_table[1] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL);
 	if (!journal->j_revoke_table[1]) {
 		kfree(journal->j_revoke_table[0]->hash_table);
-		kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
+		kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]);
 		return -ENOMEM;
 	}
 
@@ -249,8 +249,8 @@ int journal_init_revoke(journal_t *journal, int hash_size)
 		kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
 	if (!journal->j_revoke->hash_table) {
 		kfree(journal->j_revoke_table[0]->hash_table);
-		kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
-		kmem_cache_free(revoke_table_cache, journal->j_revoke_table[1]);
+		kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]);
+		kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[1]);
 		journal->j_revoke = NULL;
 		return -ENOMEM;
 	}
@@ -265,9 +265,9 @@ int journal_init_revoke(journal_t *journal, int hash_size)
 
 /* Destoy a journal's revoke table.  The table must already be empty! */
 
-void journal_destroy_revoke(journal_t *journal)
+void jbd2_journal_destroy_revoke(journal_t *journal)
 {
-	struct jbd_revoke_table_s *table;
+	struct jbd2_revoke_table_s *table;
 	struct list_head *hash_list;
 	int i;
 
@@ -281,7 +281,7 @@ void journal_destroy_revoke(journal_t *journal)
 	}
 
 	kfree(table->hash_table);
-	kmem_cache_free(revoke_table_cache, table);
+	kmem_cache_free(jbd2_revoke_table_cache, table);
 	journal->j_revoke = NULL;
 
 	table = journal->j_revoke_table[1];
@@ -294,7 +294,7 @@ void journal_destroy_revoke(journal_t *journal)
 	}
 
 	kfree(table->hash_table);
-	kmem_cache_free(revoke_table_cache, table);
+	kmem_cache_free(jbd2_revoke_table_cache, table);
 	journal->j_revoke = NULL;
 }
 
@@ -302,7 +302,7 @@ void journal_destroy_revoke(journal_t *journal)
 #ifdef __KERNEL__
 
 /*
- * journal_revoke: revoke a given buffer_head from the journal.  This
+ * jbd2_journal_revoke: revoke a given buffer_head from the journal.  This
  * prevents the block from being replayed during recovery if we take a
  * crash after this current transaction commits.  Any subsequent
  * metadata writes of the buffer in this transaction cancel the
@@ -314,18 +314,18 @@ void journal_destroy_revoke(journal_t *journal)
  * revoke before clearing the block bitmap when we are deleting
  * metadata.
  *
- * Revoke performs a journal_forget on any buffer_head passed in as a
+ * Revoke performs a jbd2_journal_forget on any buffer_head passed in as a
  * parameter, but does _not_ forget the buffer_head if the bh was only
  * found implicitly.
  *
  * bh_in may not be a journalled buffer - it may have come off
  * the hash tables without an attached journal_head.
  *
- * If bh_in is non-zero, journal_revoke() will decrement its b_count
+ * If bh_in is non-zero, jbd2_journal_revoke() will decrement its b_count
  * by one.
  */
 
-int journal_revoke(handle_t *handle, unsigned long blocknr,
+int jbd2_journal_revoke(handle_t *handle, unsigned long blocknr,
 		   struct buffer_head *bh_in)
 {
 	struct buffer_head *bh = NULL;
@@ -338,7 +338,7 @@ int journal_revoke(handle_t *handle, unsigned long blocknr,
 		BUFFER_TRACE(bh_in, "enter");
 
 	journal = handle->h_transaction->t_journal;
-	if (!journal_set_features(journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)){
+	if (!jbd2_journal_set_features(journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)){
 		J_ASSERT (!"Cannot set revoke feature!");
 		return -EINVAL;
 	}
@@ -386,8 +386,8 @@ int journal_revoke(handle_t *handle, unsigned long blocknr,
 		set_buffer_revoked(bh);
 		set_buffer_revokevalid(bh);
 		if (bh_in) {
-			BUFFER_TRACE(bh_in, "call journal_forget");
-			journal_forget(handle, bh_in);
+			BUFFER_TRACE(bh_in, "call jbd2_journal_forget");
+			jbd2_journal_forget(handle, bh_in);
 		} else {
 			BUFFER_TRACE(bh, "call brelse");
 			__brelse(bh);
@@ -403,7 +403,7 @@ int journal_revoke(handle_t *handle, unsigned long blocknr,
 
 /*
  * Cancel an outstanding revoke.  For use only internally by the
- * journaling code (called from journal_get_write_access).
+ * journaling code (called from jbd2_journal_get_write_access).
  *
  * We trust buffer_revoked() on the buffer if the buffer is already
  * being journaled: if there is no revoke pending on the buffer, then we
@@ -418,9 +418,9 @@ int journal_revoke(handle_t *handle, unsigned long blocknr,
  *
  * The caller must have the journal locked.
  */
-int journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
+int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
 {
-	struct jbd_revoke_record_s *record;
+	struct jbd2_revoke_record_s *record;
 	journal_t *journal = handle->h_transaction->t_journal;
 	int need_cancel;
 	int did_revoke = 0;	/* akpm: debug */
@@ -447,7 +447,7 @@ int journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
 			spin_lock(&journal->j_revoke_lock);
 			list_del(&record->hash);
 			spin_unlock(&journal->j_revoke_lock);
-			kmem_cache_free(revoke_record_cache, record);
+			kmem_cache_free(jbd2_revoke_record_cache, record);
 			did_revoke = 1;
 		}
 	}
@@ -478,7 +478,7 @@ int journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
  * we do not want to suspend any processing until all revokes are
  * written -bzzz
  */
-void journal_switch_revoke_table(journal_t *journal)
+void jbd2_journal_switch_revoke_table(journal_t *journal)
 {
 	int i;
 
@@ -498,12 +498,12 @@ void journal_switch_revoke_table(journal_t *journal)
  * Called with the journal lock held.
  */
 
-void journal_write_revoke_records(journal_t *journal,
+void jbd2_journal_write_revoke_records(journal_t *journal,
 				  transaction_t *transaction)
 {
 	struct journal_head *descriptor;
-	struct jbd_revoke_record_s *record;
-	struct jbd_revoke_table_s *revoke;
+	struct jbd2_revoke_record_s *record;
+	struct jbd2_revoke_table_s *revoke;
 	struct list_head *hash_list;
 	int i, offset, count;
 
@@ -519,14 +519,14 @@ void journal_write_revoke_records(journal_t *journal,
 		hash_list = &revoke->hash_table[i];
 
 		while (!list_empty(hash_list)) {
-			record = (struct jbd_revoke_record_s *)
+			record = (struct jbd2_revoke_record_s *)
 				hash_list->next;
 			write_one_revoke_record(journal, transaction,
 						&descriptor, &offset,
 						record);
 			count++;
 			list_del(&record->hash);
-			kmem_cache_free(revoke_record_cache, record);
+			kmem_cache_free(jbd2_revoke_record_cache, record);
 		}
 	}
 	if (descriptor)
@@ -543,7 +543,7 @@ static void write_one_revoke_record(journal_t *journal,
 				    transaction_t *transaction,
 				    struct journal_head **descriptorp,
 				    int *offsetp,
-				    struct jbd_revoke_record_s *record)
+				    struct jbd2_revoke_record_s *record)
 {
 	struct journal_head *descriptor;
 	int offset;
@@ -551,7 +551,7 @@ static void write_one_revoke_record(journal_t *journal,
 
 	/* If we are already aborting, this all becomes a noop.  We
            still need to go round the loop in
-           journal_write_revoke_records in order to free all of the
+           jbd2_journal_write_revoke_records in order to free all of the
            revoke records: only the IO to the journal is omitted. */
 	if (is_journal_aborted(journal))
 		return;
@@ -568,19 +568,19 @@ static void write_one_revoke_record(journal_t *journal,
 	}
 
 	if (!descriptor) {
-		descriptor = journal_get_descriptor_buffer(journal);
+		descriptor = jbd2_journal_get_descriptor_buffer(journal);
 		if (!descriptor)
 			return;
 		header = (journal_header_t *) &jh2bh(descriptor)->b_data[0];
-		header->h_magic     = cpu_to_be32(JFS_MAGIC_NUMBER);
-		header->h_blocktype = cpu_to_be32(JFS_REVOKE_BLOCK);
+		header->h_magic     = cpu_to_be32(JBD2_MAGIC_NUMBER);
+		header->h_blocktype = cpu_to_be32(JBD2_REVOKE_BLOCK);
 		header->h_sequence  = cpu_to_be32(transaction->t_tid);
 
 		/* Record it so that we can wait for IO completion later */
 		JBUFFER_TRACE(descriptor, "file as BJ_LogCtl");
-		journal_file_buffer(descriptor, transaction, BJ_LogCtl);
+		jbd2_journal_file_buffer(descriptor, transaction, BJ_LogCtl);
 
-		offset = sizeof(journal_revoke_header_t);
+		offset = sizeof(jbd2_journal_revoke_header_t);
 		*descriptorp = descriptor;
 	}
 
@@ -601,7 +601,7 @@ static void flush_descriptor(journal_t *journal,
 			     struct journal_head *descriptor,
 			     int offset)
 {
-	journal_revoke_header_t *header;
+	jbd2_journal_revoke_header_t *header;
 	struct buffer_head *bh = jh2bh(descriptor);
 
 	if (is_journal_aborted(journal)) {
@@ -609,7 +609,7 @@ static void flush_descriptor(journal_t *journal,
 		return;
 	}
 
-	header = (journal_revoke_header_t *) jh2bh(descriptor)->b_data;
+	header = (jbd2_journal_revoke_header_t *) jh2bh(descriptor)->b_data;
 	header->r_count = cpu_to_be32(offset);
 	set_buffer_jwrite(bh);
 	BUFFER_TRACE(bh, "write");
@@ -640,11 +640,11 @@ static void flush_descriptor(journal_t *journal,
  * single block.
  */
 
-int journal_set_revoke(journal_t *journal,
+int jbd2_journal_set_revoke(journal_t *journal,
 		       unsigned long blocknr,
 		       tid_t sequence)
 {
-	struct jbd_revoke_record_s *record;
+	struct jbd2_revoke_record_s *record;
 
 	record = find_revoke_record(journal, blocknr);
 	if (record) {
@@ -664,11 +664,11 @@ int journal_set_revoke(journal_t *journal,
  * ones, but later transactions still need replayed.
  */
 
-int journal_test_revoke(journal_t *journal,
+int jbd2_journal_test_revoke(journal_t *journal,
 			unsigned long blocknr,
 			tid_t sequence)
 {
-	struct jbd_revoke_record_s *record;
+	struct jbd2_revoke_record_s *record;
 
 	record = find_revoke_record(journal, blocknr);
 	if (!record)
@@ -683,21 +683,21 @@ int journal_test_revoke(journal_t *journal,
  * that it can be reused by the running filesystem.
  */
 
-void journal_clear_revoke(journal_t *journal)
+void jbd2_journal_clear_revoke(journal_t *journal)
 {
 	int i;
 	struct list_head *hash_list;
-	struct jbd_revoke_record_s *record;
-	struct jbd_revoke_table_s *revoke;
+	struct jbd2_revoke_record_s *record;
+	struct jbd2_revoke_table_s *revoke;
 
 	revoke = journal->j_revoke;
 
 	for (i = 0; i < revoke->hash_size; i++) {
 		hash_list = &revoke->hash_table[i];
 		while (!list_empty(hash_list)) {
-			record = (struct jbd_revoke_record_s*) hash_list->next;
+			record = (struct jbd2_revoke_record_s*) hash_list->next;
 			list_del(&record->hash);
-			kmem_cache_free(revoke_record_cache, record);
+			kmem_cache_free(jbd2_revoke_record_cache, record);
 		}
 	}
 }
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e1b3c8af4d17..149957bef907 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -19,7 +19,7 @@
 
 #include <linux/time.h>
 #include <linux/fs.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/timer.h>
@@ -28,7 +28,7 @@
 #include <linux/highmem.h>
 
 /*
- * get_transaction: obtain a new transaction_t object.
+ * jbd2_get_transaction: obtain a new transaction_t object.
  *
  * Simply allocate and initialise a new transaction.  Create it in
  * RUNNING state and add it to the current journal (which should not
@@ -44,7 +44,7 @@
  */
 
 static transaction_t *
-get_transaction(journal_t *journal, transaction_t *transaction)
+jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
 {
 	transaction->t_journal = journal;
 	transaction->t_state = T_RUNNING;
@@ -115,7 +115,7 @@ repeat:
 	spin_lock(&journal->j_state_lock);
 repeat_locked:
 	if (is_journal_aborted(journal) ||
-	    (journal->j_errno != 0 && !(journal->j_flags & JFS_ACK_ERR))) {
+	    (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
 		spin_unlock(&journal->j_state_lock);
 		ret = -EROFS;
 		goto out;
@@ -134,7 +134,7 @@ repeat_locked:
 			spin_unlock(&journal->j_state_lock);
 			goto alloc_transaction;
 		}
-		get_transaction(journal, new_transaction);
+		jbd2_get_transaction(journal, new_transaction);
 		new_transaction = NULL;
 	}
 
@@ -175,7 +175,7 @@ repeat_locked:
 		spin_unlock(&transaction->t_handle_lock);
 		prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
 				TASK_UNINTERRUPTIBLE);
-		__log_start_commit(journal, transaction->t_tid);
+		__jbd2_log_start_commit(journal, transaction->t_tid);
 		spin_unlock(&journal->j_state_lock);
 		schedule();
 		finish_wait(&journal->j_wait_transaction_locked, &wait);
@@ -205,12 +205,12 @@ repeat_locked:
 	 * committing_transaction->t_outstanding_credits plus "enough" for
 	 * the log control blocks.
 	 * Also, this test is inconsitent with the matching one in
-	 * journal_extend().
+	 * jbd2_journal_extend().
 	 */
-	if (__log_space_left(journal) < jbd_space_needed(journal)) {
+	if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) {
 		jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
 		spin_unlock(&transaction->t_handle_lock);
-		__log_wait_for_space(journal);
+		__jbd2_log_wait_for_space(journal);
 		goto repeat_locked;
 	}
 
@@ -223,7 +223,7 @@ repeat_locked:
 	transaction->t_handle_count++;
 	jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n",
 		  handle, nblocks, transaction->t_outstanding_credits,
-		  __log_space_left(journal));
+		  __jbd2_log_space_left(journal));
 	spin_unlock(&transaction->t_handle_lock);
 	spin_unlock(&journal->j_state_lock);
 out:
@@ -246,7 +246,7 @@ static handle_t *new_handle(int nblocks)
 }
 
 /**
- * handle_t *journal_start() - Obtain a new handle.
+ * handle_t *jbd2_journal_start() - Obtain a new handle.
  * @journal: Journal to start transaction on.
  * @nblocks: number of block buffer we might modify
  *
@@ -259,7 +259,7 @@ static handle_t *new_handle(int nblocks)
  *
  * Return a pointer to a newly allocated handle, or NULL on failure
  */
-handle_t *journal_start(journal_t *journal, int nblocks)
+handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
 {
 	handle_t *handle = journal_current_handle();
 	int err;
@@ -289,7 +289,7 @@ handle_t *journal_start(journal_t *journal, int nblocks)
 }
 
 /**
- * int journal_extend() - extend buffer credits.
+ * int jbd2_journal_extend() - extend buffer credits.
  * @handle:  handle to 'extend'
  * @nblocks: nr blocks to try to extend by.
  *
@@ -298,7 +298,7 @@ handle_t *journal_start(journal_t *journal, int nblocks)
  * a credit for a number of buffer modications in advance, but can
  * extend its credit if it needs more.
  *
- * journal_extend tries to give the running handle more buffer credits.
+ * jbd2_journal_extend tries to give the running handle more buffer credits.
  * It does not guarantee that allocation - this is a best-effort only.
  * The calling process MUST be able to deal cleanly with a failure to
  * extend here.
@@ -308,7 +308,7 @@ handle_t *journal_start(journal_t *journal, int nblocks)
  * return code < 0 implies an error
  * return code > 0 implies normal transaction-full status.
  */
-int journal_extend(handle_t *handle, int nblocks)
+int jbd2_journal_extend(handle_t *handle, int nblocks)
 {
 	transaction_t *transaction = handle->h_transaction;
 	journal_t *journal = transaction->t_journal;
@@ -339,7 +339,7 @@ int journal_extend(handle_t *handle, int nblocks)
 		goto unlock;
 	}
 
-	if (wanted > __log_space_left(journal)) {
+	if (wanted > __jbd2_log_space_left(journal)) {
 		jbd_debug(3, "denied handle %p %d blocks: "
 			  "insufficient log space\n", handle, nblocks);
 		goto unlock;
@@ -360,21 +360,21 @@ out:
 
 
 /**
- * int journal_restart() - restart a handle .
+ * int jbd2_journal_restart() - restart a handle .
  * @handle:  handle to restart
  * @nblocks: nr credits requested
  *
  * Restart a handle for a multi-transaction filesystem
  * operation.
  *
- * If the journal_extend() call above fails to grant new buffer credits
- * to a running handle, a call to journal_restart will commit the
+ * If the jbd2_journal_extend() call above fails to grant new buffer credits
+ * to a running handle, a call to jbd2_journal_restart will commit the
  * handle's transaction so far and reattach the handle to a new
  * transaction capabable of guaranteeing the requested number of
  * credits.
  */
 
-int journal_restart(handle_t *handle, int nblocks)
+int jbd2_journal_restart(handle_t *handle, int nblocks)
 {
 	transaction_t *transaction = handle->h_transaction;
 	journal_t *journal = transaction->t_journal;
@@ -402,7 +402,7 @@ int journal_restart(handle_t *handle, int nblocks)
 	spin_unlock(&transaction->t_handle_lock);
 
 	jbd_debug(2, "restarting handle %p\n", handle);
-	__log_start_commit(journal, transaction->t_tid);
+	__jbd2_log_start_commit(journal, transaction->t_tid);
 	spin_unlock(&journal->j_state_lock);
 
 	handle->h_buffer_credits = nblocks;
@@ -412,7 +412,7 @@ int journal_restart(handle_t *handle, int nblocks)
 
 
 /**
- * void journal_lock_updates () - establish a transaction barrier.
+ * void jbd2_journal_lock_updates () - establish a transaction barrier.
  * @journal:  Journal to establish a barrier on.
  *
  * This locks out any further updates from being started, and blocks
@@ -421,7 +421,7 @@ int journal_restart(handle_t *handle, int nblocks)
  *
  * The journal lock should not be held on entry.
  */
-void journal_lock_updates(journal_t *journal)
+void jbd2_journal_lock_updates(journal_t *journal)
 {
 	DEFINE_WAIT(wait);
 
@@ -452,7 +452,7 @@ void journal_lock_updates(journal_t *journal)
 
 	/*
 	 * We have now established a barrier against other normal updates, but
-	 * we also need to barrier against other journal_lock_updates() calls
+	 * we also need to barrier against other jbd2_journal_lock_updates() calls
 	 * to make sure that we serialise special journal-locked operations
 	 * too.
 	 */
@@ -460,14 +460,14 @@ void journal_lock_updates(journal_t *journal)
 }
 
 /**
- * void journal_unlock_updates (journal_t* journal) - release barrier
+ * void jbd2_journal_unlock_updates (journal_t* journal) - release barrier
  * @journal:  Journal to release the barrier on.
  *
- * Release a transaction barrier obtained with journal_lock_updates().
+ * Release a transaction barrier obtained with jbd2_journal_lock_updates().
  *
  * Should be called without the journal lock held.
  */
-void journal_unlock_updates (journal_t *journal)
+void jbd2_journal_unlock_updates (journal_t *journal)
 {
 	J_ASSERT(journal->j_barrier_count != 0);
 
@@ -667,7 +667,7 @@ repeat:
 				JBUFFER_TRACE(jh, "allocate memory for buffer");
 				jbd_unlock_bh_state(bh);
 				frozen_buffer =
-					jbd_slab_alloc(jh2bh(jh)->b_size,
+					jbd2_slab_alloc(jh2bh(jh)->b_size,
 							 GFP_NOFS);
 				if (!frozen_buffer) {
 					printk(KERN_EMERG
@@ -699,7 +699,7 @@ repeat:
 		jh->b_transaction = transaction;
 		JBUFFER_TRACE(jh, "file as BJ_Reserved");
 		spin_lock(&journal->j_list_lock);
-		__journal_file_buffer(jh, transaction, BJ_Reserved);
+		__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
 		spin_unlock(&journal->j_list_lock);
 	}
 
@@ -723,18 +723,18 @@ done:
 	 * If we are about to journal a buffer, then any revoke pending on it is
 	 * no longer valid
 	 */
-	journal_cancel_revoke(handle, jh);
+	jbd2_journal_cancel_revoke(handle, jh);
 
 out:
 	if (unlikely(frozen_buffer))	/* It's usually NULL */
-		jbd_slab_free(frozen_buffer, bh->b_size);
+		jbd2_slab_free(frozen_buffer, bh->b_size);
 
 	JBUFFER_TRACE(jh, "exit");
 	return error;
 }
 
 /**
- * int journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
+ * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
  * @handle: transaction to add buffer modifications to
  * @bh:     bh to be used for metadata writes
  * @credits: variable that will receive credits for the buffer
@@ -745,16 +745,16 @@ out:
  * because we're write()ing a buffer which is also part of a shared mapping.
  */
 
-int journal_get_write_access(handle_t *handle, struct buffer_head *bh)
+int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
 {
-	struct journal_head *jh = journal_add_journal_head(bh);
+	struct journal_head *jh = jbd2_journal_add_journal_head(bh);
 	int rc;
 
 	/* We do not want to get caught playing with fields which the
 	 * log thread also manipulates.  Make sure that the buffer
 	 * completes any outstanding IO before proceeding. */
 	rc = do_get_write_access(handle, jh, 0);
-	journal_put_journal_head(jh);
+	jbd2_journal_put_journal_head(jh);
 	return rc;
 }
 
@@ -772,17 +772,17 @@ int journal_get_write_access(handle_t *handle, struct buffer_head *bh)
  * unlocked buffer beforehand. */
 
 /**
- * int journal_get_create_access () - notify intent to use newly created bh
+ * int jbd2_journal_get_create_access () - notify intent to use newly created bh
  * @handle: transaction to new buffer to
  * @bh: new buffer.
  *
  * Call this if you create a new bh.
  */
-int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
+int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
 {
 	transaction_t *transaction = handle->h_transaction;
 	journal_t *journal = transaction->t_journal;
-	struct journal_head *jh = journal_add_journal_head(bh);
+	struct journal_head *jh = jbd2_journal_add_journal_head(bh);
 	int err;
 
 	jbd_debug(5, "journal_head %p\n", jh);
@@ -812,7 +812,7 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
 	if (jh->b_transaction == NULL) {
 		jh->b_transaction = transaction;
 		JBUFFER_TRACE(jh, "file as BJ_Reserved");
-		__journal_file_buffer(jh, transaction, BJ_Reserved);
+		__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
 	} else if (jh->b_transaction == journal->j_committing_transaction) {
 		JBUFFER_TRACE(jh, "set next transaction");
 		jh->b_next_transaction = transaction;
@@ -828,14 +828,14 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
 	 * which hits an assertion error.
 	 */
 	JBUFFER_TRACE(jh, "cancelling revoke");
-	journal_cancel_revoke(handle, jh);
-	journal_put_journal_head(jh);
+	jbd2_journal_cancel_revoke(handle, jh);
+	jbd2_journal_put_journal_head(jh);
 out:
 	return err;
 }
 
 /**
- * int journal_get_undo_access() -  Notify intent to modify metadata with
+ * int jbd2_journal_get_undo_access() -  Notify intent to modify metadata with
  *     non-rewindable consequences
  * @handle: transaction
  * @bh: buffer to undo
@@ -848,7 +848,7 @@ out:
  * since if we overwrote that space we would make the delete
  * un-rewindable in case of a crash.
  *
- * To deal with that, journal_get_undo_access requests write access to a
+ * To deal with that, jbd2_journal_get_undo_access requests write access to a
  * buffer for parts of non-rewindable operations such as delete
  * operations on the bitmaps.  The journaling code must keep a copy of
  * the buffer's contents prior to the undo_access call until such time
@@ -861,10 +861,10 @@ out:
  *
  * Returns error number or 0 on success.
  */
-int journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
+int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
 {
 	int err;
-	struct journal_head *jh = journal_add_journal_head(bh);
+	struct journal_head *jh = jbd2_journal_add_journal_head(bh);
 	char *committed_data = NULL;
 
 	JBUFFER_TRACE(jh, "entry");
@@ -880,7 +880,7 @@ int journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
 
 repeat:
 	if (!jh->b_committed_data) {
-		committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
+		committed_data = jbd2_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
 		if (!committed_data) {
 			printk(KERN_EMERG "%s: No memory for committed data\n",
 				__FUNCTION__);
@@ -905,14 +905,14 @@ repeat:
 	}
 	jbd_unlock_bh_state(bh);
 out:
-	journal_put_journal_head(jh);
+	jbd2_journal_put_journal_head(jh);
 	if (unlikely(committed_data))
-		jbd_slab_free(committed_data, bh->b_size);
+		jbd2_slab_free(committed_data, bh->b_size);
 	return err;
 }
 
 /**
- * int journal_dirty_data() -  mark a buffer as containing dirty data which
+ * int jbd2_journal_dirty_data() -  mark a buffer as containing dirty data which
  *                             needs to be flushed before we can commit the
  *                             current transaction.
  * @handle: transaction
@@ -923,10 +923,10 @@ out:
  *
  * Returns error number or 0 on success.
  *
- * journal_dirty_data() can be called via page_launder->ext3_writepage
+ * jbd2_journal_dirty_data() can be called via page_launder->ext3_writepage
  * by kswapd.
  */
-int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
+int jbd2_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 {
 	journal_t *journal = handle->h_transaction->t_journal;
 	int need_brelse = 0;
@@ -935,7 +935,7 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 	if (is_handle_aborted(handle))
 		return 0;
 
-	jh = journal_add_journal_head(bh);
+	jh = jbd2_journal_add_journal_head(bh);
 	JBUFFER_TRACE(jh, "entry");
 
 	/*
@@ -984,7 +984,7 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 			 * And while we're in that state, someone does a
 			 * writepage() in an attempt to pageout the same area
 			 * of the file via a shared mapping.  At present that
-			 * calls journal_dirty_data(), and we get right here.
+			 * calls jbd2_journal_dirty_data(), and we get right here.
 			 * It may be too late to journal the data.  Simply
 			 * falling through to the next test will suffice: the
 			 * data will be dirty and wil be checkpointed.  The
@@ -1035,7 +1035,7 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 			/* journal_clean_data_list() may have got there first */
 			if (jh->b_transaction != NULL) {
 				JBUFFER_TRACE(jh, "unfile from commit");
-				__journal_temp_unlink_buffer(jh);
+				__jbd2_journal_temp_unlink_buffer(jh);
 				/* It still points to the committing
 				 * transaction; move it to this one so
 				 * that the refile assert checks are
@@ -1054,15 +1054,15 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 		if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) {
 			JBUFFER_TRACE(jh, "not on correct data list: unfile");
 			J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
-			__journal_temp_unlink_buffer(jh);
+			__jbd2_journal_temp_unlink_buffer(jh);
 			jh->b_transaction = handle->h_transaction;
 			JBUFFER_TRACE(jh, "file as data");
-			__journal_file_buffer(jh, handle->h_transaction,
+			__jbd2_journal_file_buffer(jh, handle->h_transaction,
 						BJ_SyncData);
 		}
 	} else {
 		JBUFFER_TRACE(jh, "not on a transaction");
-		__journal_file_buffer(jh, handle->h_transaction, BJ_SyncData);
+		__jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_SyncData);
 	}
 no_journal:
 	spin_unlock(&journal->j_list_lock);
@@ -1072,12 +1072,12 @@ no_journal:
 		__brelse(bh);
 	}
 	JBUFFER_TRACE(jh, "exit");
-	journal_put_journal_head(jh);
+	jbd2_journal_put_journal_head(jh);
 	return 0;
 }
 
 /**
- * int journal_dirty_metadata() -  mark a buffer as containing dirty metadata
+ * int jbd2_journal_dirty_metadata() -  mark a buffer as containing dirty metadata
  * @handle: transaction to add buffer to.
  * @bh: buffer to mark
  *
@@ -1095,7 +1095,7 @@ no_journal:
  * buffer: that only gets done when the old transaction finally
  * completes its commit.
  */
-int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
+int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 {
 	transaction_t *transaction = handle->h_transaction;
 	journal_t *journal = transaction->t_journal;
@@ -1156,7 +1156,7 @@ int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 
 	JBUFFER_TRACE(jh, "file as BJ_Metadata");
 	spin_lock(&journal->j_list_lock);
-	__journal_file_buffer(jh, handle->h_transaction, BJ_Metadata);
+	__jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_Metadata);
 	spin_unlock(&journal->j_list_lock);
 out_unlock_bh:
 	jbd_unlock_bh_state(bh);
@@ -1166,18 +1166,18 @@ out:
 }
 
 /*
- * journal_release_buffer: undo a get_write_access without any buffer
+ * jbd2_journal_release_buffer: undo a get_write_access without any buffer
  * updates, if the update decided in the end that it didn't need access.
  *
  */
 void
-journal_release_buffer(handle_t *handle, struct buffer_head *bh)
+jbd2_journal_release_buffer(handle_t *handle, struct buffer_head *bh)
 {
 	BUFFER_TRACE(bh, "entry");
 }
 
 /**
- * void journal_forget() - bforget() for potentially-journaled buffers.
+ * void jbd2_journal_forget() - bforget() for potentially-journaled buffers.
  * @handle: transaction handle
  * @bh:     bh to 'forget'
  *
@@ -1193,7 +1193,7 @@ journal_release_buffer(handle_t *handle, struct buffer_head *bh)
  * Allow this call even if the handle has aborted --- it may be part of
  * the caller's cleanup after an abort.
  */
-int journal_forget (handle_t *handle, struct buffer_head *bh)
+int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
 {
 	transaction_t *transaction = handle->h_transaction;
 	journal_t *journal = transaction->t_journal;
@@ -1250,11 +1250,11 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
 		 */
 
 		if (jh->b_cp_transaction) {
-			__journal_temp_unlink_buffer(jh);
-			__journal_file_buffer(jh, transaction, BJ_Forget);
+			__jbd2_journal_temp_unlink_buffer(jh);
+			__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
 		} else {
-			__journal_unfile_buffer(jh);
-			journal_remove_journal_head(bh);
+			__jbd2_journal_unfile_buffer(jh);
+			jbd2_journal_remove_journal_head(bh);
 			__brelse(bh);
 			if (!buffer_jbd(bh)) {
 				spin_unlock(&journal->j_list_lock);
@@ -1292,7 +1292,7 @@ drop:
 }
 
 /**
- * int journal_stop() - complete a transaction
+ * int jbd2_journal_stop() - complete a transaction
  * @handle: tranaction to complete.
  *
  * All done for a particular handle.
@@ -1302,12 +1302,12 @@ drop:
  * complication is that we need to start a commit operation if the
  * filesystem is marked for synchronous update.
  *
- * journal_stop itself will not usually return an error, but it may
+ * jbd2_journal_stop itself will not usually return an error, but it may
  * do so in unusual circumstances.  In particular, expect it to
- * return -EIO if a journal_abort has been executed since the
+ * return -EIO if a jbd2_journal_abort has been executed since the
  * transaction began.
  */
-int journal_stop(handle_t *handle)
+int jbd2_journal_stop(handle_t *handle)
 {
 	transaction_t *transaction = handle->h_transaction;
 	journal_t *journal = transaction->t_journal;
@@ -1383,15 +1383,15 @@ int journal_stop(handle_t *handle)
 		jbd_debug(2, "transaction too old, requesting commit for "
 					"handle %p\n", handle);
 		/* This is non-blocking */
-		__log_start_commit(journal, transaction->t_tid);
+		__jbd2_log_start_commit(journal, transaction->t_tid);
 		spin_unlock(&journal->j_state_lock);
 
 		/*
-		 * Special case: JFS_SYNC synchronous updates require us
+		 * Special case: JBD2_SYNC synchronous updates require us
 		 * to wait for the commit to complete.
 		 */
 		if (handle->h_sync && !(current->flags & PF_MEMALLOC))
-			err = log_wait_commit(journal, tid);
+			err = jbd2_log_wait_commit(journal, tid);
 	} else {
 		spin_unlock(&transaction->t_handle_lock);
 		spin_unlock(&journal->j_state_lock);
@@ -1401,24 +1401,24 @@ int journal_stop(handle_t *handle)
 	return err;
 }
 
-/**int journal_force_commit() - force any uncommitted transactions
+/**int jbd2_journal_force_commit() - force any uncommitted transactions
  * @journal: journal to force
  *
  * For synchronous operations: force any uncommitted transactions
  * to disk.  May seem kludgy, but it reuses all the handle batching
  * code in a very simple manner.
  */
-int journal_force_commit(journal_t *journal)
+int jbd2_journal_force_commit(journal_t *journal)
 {
 	handle_t *handle;
 	int ret;
 
-	handle = journal_start(journal, 1);
+	handle = jbd2_journal_start(journal, 1);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
 	} else {
 		handle->h_sync = 1;
-		ret = journal_stop(handle);
+		ret = jbd2_journal_stop(handle);
 	}
 	return ret;
 }
@@ -1486,7 +1486,7 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
  *
  * Called under j_list_lock.  The journal may not be locked.
  */
-void __journal_temp_unlink_buffer(struct journal_head *jh)
+void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
 {
 	struct journal_head **list = NULL;
 	transaction_t *transaction;
@@ -1538,23 +1538,23 @@ void __journal_temp_unlink_buffer(struct journal_head *jh)
 		mark_buffer_dirty(bh);	/* Expose it to the VM */
 }
 
-void __journal_unfile_buffer(struct journal_head *jh)
+void __jbd2_journal_unfile_buffer(struct journal_head *jh)
 {
-	__journal_temp_unlink_buffer(jh);
+	__jbd2_journal_temp_unlink_buffer(jh);
 	jh->b_transaction = NULL;
 }
 
-void journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
+void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
 {
 	jbd_lock_bh_state(jh2bh(jh));
 	spin_lock(&journal->j_list_lock);
-	__journal_unfile_buffer(jh);
+	__jbd2_journal_unfile_buffer(jh);
 	spin_unlock(&journal->j_list_lock);
 	jbd_unlock_bh_state(jh2bh(jh));
 }
 
 /*
- * Called from journal_try_to_free_buffers().
+ * Called from jbd2_journal_try_to_free_buffers().
  *
  * Called under jbd_lock_bh_state(bh)
  */
@@ -1576,16 +1576,16 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
 		if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) {
 			/* A written-back ordered data buffer */
 			JBUFFER_TRACE(jh, "release data");
-			__journal_unfile_buffer(jh);
-			journal_remove_journal_head(bh);
+			__jbd2_journal_unfile_buffer(jh);
+			jbd2_journal_remove_journal_head(bh);
 			__brelse(bh);
 		}
 	} else if (jh->b_cp_transaction != 0 && jh->b_transaction == 0) {
 		/* written-back checkpointed metadata buffer */
 		if (jh->b_jlist == BJ_None) {
 			JBUFFER_TRACE(jh, "remove from checkpoint list");
-			__journal_remove_checkpoint(jh);
-			journal_remove_journal_head(bh);
+			__jbd2_journal_remove_checkpoint(jh);
+			jbd2_journal_remove_journal_head(bh);
 			__brelse(bh);
 		}
 	}
@@ -1596,7 +1596,7 @@ out:
 
 
 /**
- * int journal_try_to_free_buffers() - try to free page buffers.
+ * int jbd2_journal_try_to_free_buffers() - try to free page buffers.
  * @journal: journal for operation
  * @page: to try and free
  * @unused_gfp_mask: unused
@@ -1613,13 +1613,13 @@ out:
  *
  * This complicates JBD locking somewhat.  We aren't protected by the
  * BKL here.  We wish to remove the buffer from its committing or
- * running transaction's ->t_datalist via __journal_unfile_buffer.
+ * running transaction's ->t_datalist via __jbd2_journal_unfile_buffer.
  *
  * This may *change* the value of transaction_t->t_datalist, so anyone
  * who looks at t_datalist needs to lock against this function.
  *
- * Even worse, someone may be doing a journal_dirty_data on this
- * buffer.  So we need to lock against that.  journal_dirty_data()
+ * Even worse, someone may be doing a jbd2_journal_dirty_data on this
+ * buffer.  So we need to lock against that.  jbd2_journal_dirty_data()
  * will come out of the lock with the buffer dirty, which makes it
  * ineligible for release here.
  *
@@ -1629,7 +1629,7 @@ out:
  * cannot happen because we never reallocate freed data as metadata
  * while the data is part of a transaction.  Yes?
  */
-int journal_try_to_free_buffers(journal_t *journal,
+int jbd2_journal_try_to_free_buffers(journal_t *journal,
 				struct page *page, gfp_t unused_gfp_mask)
 {
 	struct buffer_head *head;
@@ -1646,15 +1646,15 @@ int journal_try_to_free_buffers(journal_t *journal,
 		/*
 		 * We take our own ref against the journal_head here to avoid
 		 * having to add tons of locking around each instance of
-		 * journal_remove_journal_head() and journal_put_journal_head().
+		 * jbd2_journal_remove_journal_head() and jbd2_journal_put_journal_head().
 		 */
-		jh = journal_grab_journal_head(bh);
+		jh = jbd2_journal_grab_journal_head(bh);
 		if (!jh)
 			continue;
 
 		jbd_lock_bh_state(bh);
 		__journal_try_to_free_buffer(journal, bh);
-		journal_put_journal_head(jh);
+		jbd2_journal_put_journal_head(jh);
 		jbd_unlock_bh_state(bh);
 		if (buffer_jbd(bh))
 			goto busy;
@@ -1681,23 +1681,23 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
 	int may_free = 1;
 	struct buffer_head *bh = jh2bh(jh);
 
-	__journal_unfile_buffer(jh);
+	__jbd2_journal_unfile_buffer(jh);
 
 	if (jh->b_cp_transaction) {
 		JBUFFER_TRACE(jh, "on running+cp transaction");
-		__journal_file_buffer(jh, transaction, BJ_Forget);
+		__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
 		clear_buffer_jbddirty(bh);
 		may_free = 0;
 	} else {
 		JBUFFER_TRACE(jh, "on running transaction");
-		journal_remove_journal_head(bh);
+		jbd2_journal_remove_journal_head(bh);
 		__brelse(bh);
 	}
 	return may_free;
 }
 
 /*
- * journal_invalidatepage
+ * jbd2_journal_invalidatepage
  *
  * This code is tricky.  It has a number of cases to deal with.
  *
@@ -1765,7 +1765,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 	jbd_lock_bh_state(bh);
 	spin_lock(&journal->j_list_lock);
 
-	jh = journal_grab_journal_head(bh);
+	jh = jbd2_journal_grab_journal_head(bh);
 	if (!jh)
 		goto zap_buffer_no_jh;
 
@@ -1796,7 +1796,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 			JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
 			ret = __dispose_buffer(jh,
 					journal->j_running_transaction);
-			journal_put_journal_head(jh);
+			jbd2_journal_put_journal_head(jh);
 			spin_unlock(&journal->j_list_lock);
 			jbd_unlock_bh_state(bh);
 			spin_unlock(&journal->j_state_lock);
@@ -1810,7 +1810,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 				JBUFFER_TRACE(jh, "give to committing trans");
 				ret = __dispose_buffer(jh,
 					journal->j_committing_transaction);
-				journal_put_journal_head(jh);
+				jbd2_journal_put_journal_head(jh);
 				spin_unlock(&journal->j_list_lock);
 				jbd_unlock_bh_state(bh);
 				spin_unlock(&journal->j_state_lock);
@@ -1844,7 +1844,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 					journal->j_running_transaction);
 			jh->b_next_transaction = NULL;
 		}
-		journal_put_journal_head(jh);
+		jbd2_journal_put_journal_head(jh);
 		spin_unlock(&journal->j_list_lock);
 		jbd_unlock_bh_state(bh);
 		spin_unlock(&journal->j_state_lock);
@@ -1861,7 +1861,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 	}
 
 zap_buffer:
-	journal_put_journal_head(jh);
+	jbd2_journal_put_journal_head(jh);
 zap_buffer_no_jh:
 	spin_unlock(&journal->j_list_lock);
 	jbd_unlock_bh_state(bh);
@@ -1877,7 +1877,7 @@ zap_buffer_unlocked:
 }
 
 /**
- * void journal_invalidatepage()
+ * void jbd2_journal_invalidatepage()
  * @journal: journal to use for flush...
  * @page:    page to flush
  * @offset:  length of page to invalidate.
@@ -1885,7 +1885,7 @@ zap_buffer_unlocked:
  * Reap page buffers containing data after offset in page.
  *
  */
-void journal_invalidatepage(journal_t *journal,
+void jbd2_journal_invalidatepage(journal_t *journal,
 		      struct page *page,
 		      unsigned long offset)
 {
@@ -1927,7 +1927,7 @@ void journal_invalidatepage(journal_t *journal,
 /*
  * File a buffer on the given transaction list.
  */
-void __journal_file_buffer(struct journal_head *jh,
+void __jbd2_journal_file_buffer(struct journal_head *jh,
 			transaction_t *transaction, int jlist)
 {
 	struct journal_head **list = NULL;
@@ -1956,7 +1956,7 @@ void __journal_file_buffer(struct journal_head *jh,
 	}
 
 	if (jh->b_transaction)
-		__journal_temp_unlink_buffer(jh);
+		__jbd2_journal_temp_unlink_buffer(jh);
 	jh->b_transaction = transaction;
 
 	switch (jlist) {
@@ -1998,12 +1998,12 @@ void __journal_file_buffer(struct journal_head *jh,
 		set_buffer_jbddirty(bh);
 }
 
-void journal_file_buffer(struct journal_head *jh,
+void jbd2_journal_file_buffer(struct journal_head *jh,
 				transaction_t *transaction, int jlist)
 {
 	jbd_lock_bh_state(jh2bh(jh));
 	spin_lock(&transaction->t_journal->j_list_lock);
-	__journal_file_buffer(jh, transaction, jlist);
+	__jbd2_journal_file_buffer(jh, transaction, jlist);
 	spin_unlock(&transaction->t_journal->j_list_lock);
 	jbd_unlock_bh_state(jh2bh(jh));
 }
@@ -2018,7 +2018,7 @@ void journal_file_buffer(struct journal_head *jh,
  *
  * Called under jbd_lock_bh_state(jh2bh(jh))
  */
-void __journal_refile_buffer(struct journal_head *jh)
+void __jbd2_journal_refile_buffer(struct journal_head *jh)
 {
 	int was_dirty;
 	struct buffer_head *bh = jh2bh(jh);
@@ -2029,7 +2029,7 @@ void __journal_refile_buffer(struct journal_head *jh)
 
 	/* If the buffer is now unused, just drop it. */
 	if (jh->b_next_transaction == NULL) {
-		__journal_unfile_buffer(jh);
+		__jbd2_journal_unfile_buffer(jh);
 		return;
 	}
 
@@ -2039,10 +2039,10 @@ void __journal_refile_buffer(struct journal_head *jh)
 	 */
 
 	was_dirty = test_clear_buffer_jbddirty(bh);
-	__journal_temp_unlink_buffer(jh);
+	__jbd2_journal_temp_unlink_buffer(jh);
 	jh->b_transaction = jh->b_next_transaction;
 	jh->b_next_transaction = NULL;
-	__journal_file_buffer(jh, jh->b_transaction,
+	__jbd2_journal_file_buffer(jh, jh->b_transaction,
 				was_dirty ? BJ_Metadata : BJ_Reserved);
 	J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
 
@@ -2054,26 +2054,26 @@ void __journal_refile_buffer(struct journal_head *jh)
  * For the unlocked version of this call, also make sure that any
  * hanging journal_head is cleaned up if necessary.
  *
- * __journal_refile_buffer is usually called as part of a single locked
+ * __jbd2_journal_refile_buffer is usually called as part of a single locked
  * operation on a buffer_head, in which the caller is probably going to
  * be hooking the journal_head onto other lists.  In that case it is up
  * to the caller to remove the journal_head if necessary.  For the
- * unlocked journal_refile_buffer call, the caller isn't going to be
+ * unlocked jbd2_journal_refile_buffer call, the caller isn't going to be
  * doing anything else to the buffer so we need to do the cleanup
  * ourselves to avoid a jh leak.
  *
  * *** The journal_head may be freed by this call! ***
  */
-void journal_refile_buffer(journal_t *journal, struct journal_head *jh)
+void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
 {
 	struct buffer_head *bh = jh2bh(jh);
 
 	jbd_lock_bh_state(bh);
 	spin_lock(&journal->j_list_lock);
 
-	__journal_refile_buffer(jh);
+	__jbd2_journal_refile_buffer(jh);
 	jbd_unlock_bh_state(bh);
-	journal_remove_journal_head(bh);
+	jbd2_journal_remove_journal_head(bh);
 
 	spin_unlock(&journal->j_list_lock);
 	__brelse(bh);
-- 
cgit v1.2.3


From a920e9416b3469994860ab552dfd7fd5a5aff162 Mon Sep 17 00:00:00 2001
From: Johann Lombardi <johann.lombardi@bull.net>
Date: Wed, 11 Oct 2006 01:21:00 -0700
Subject: [PATCH] jbd2: rename slab

jbd and jbd2 currently use the same slab names which must be unique.  The
patch below just renames jbd2's slabs.

Signed-off-by: Johann Lombardi <johann.lombardi@bull.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd2/journal.c | 6 +++---
 fs/jbd2/revoke.c  | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 3fbbba20a516..8d0f71e562fe 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1631,7 +1631,7 @@ void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
 
 static kmem_cache_t *jbd_slab[JBD_MAX_SLABS];
 static const char *jbd_slab_names[JBD_MAX_SLABS] = {
-	"jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k"
+	"jbd2_1k", "jbd2_2k", "jbd2_4k", NULL, "jbd2_8k"
 };
 
 static void jbd2_journal_destroy_jbd_slabs(void)
@@ -1702,7 +1702,7 @@ static int journal_init_jbd2_journal_head_cache(void)
 	int retval;
 
 	J_ASSERT(jbd2_journal_head_cache == 0);
-	jbd2_journal_head_cache = kmem_cache_create("journal_head",
+	jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head",
 				sizeof(struct journal_head),
 				0,		/* offset */
 				0,		/* flags */
@@ -1999,7 +1999,7 @@ kmem_cache_t *jbd2_handle_cache;
 
 static int __init journal_init_handle_cache(void)
 {
-	jbd2_handle_cache = kmem_cache_create("journal_handle",
+	jbd2_handle_cache = kmem_cache_create("jbd2_journal_handle",
 				sizeof(handle_t),
 				0,		/* offset */
 				0,		/* flags */
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 2fccddc7acad..5820a0c5ad26 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -168,13 +168,13 @@ static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal,
 
 int __init jbd2_journal_init_revoke_caches(void)
 {
-	jbd2_revoke_record_cache = kmem_cache_create("revoke_record",
+	jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record",
 					   sizeof(struct jbd2_revoke_record_s),
 					   0, SLAB_HWCACHE_ALIGN, NULL, NULL);
 	if (jbd2_revoke_record_cache == 0)
 		return -ENOMEM;
 
-	jbd2_revoke_table_cache = kmem_cache_create("revoke_table",
+	jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table",
 					   sizeof(struct jbd2_revoke_table_s),
 					   0, 0, NULL, NULL);
 	if (jbd2_revoke_table_cache == 0) {
-- 
cgit v1.2.3


From dab291af8d6307a3075c3d67d0cc8f98e646cb94 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Wed, 11 Oct 2006 01:21:01 -0700
Subject: [PATCH] jbd2: enable building of jbd2 and have ext4 use it rather
 than jbd

Reworked from a patch by Mingming Cao and Randy Dunlap

Signed-off-By: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/Kconfig               | 48 ++++++++++++++++++++++-----
 fs/Makefile              |  1 +
 fs/ext4/acl.c            |  2 +-
 fs/ext4/balloc.c         | 10 +++---
 fs/ext4/bitmap.c         |  2 +-
 fs/ext4/dir.c            |  2 +-
 fs/ext4/file.c           |  4 +--
 fs/ext4/fsync.c          |  4 +--
 fs/ext4/hash.c           |  2 +-
 fs/ext4/ialloc.c         |  6 ++--
 fs/ext4/inode.c          | 52 +++++++++++++++---------------
 fs/ext4/ioctl.c          | 16 ++++-----
 fs/ext4/namei.c          |  4 +--
 fs/ext4/resize.c         |  2 +-
 fs/ext4/super.c          | 84 ++++++++++++++++++++++++------------------------
 fs/ext4/symlink.c        |  2 +-
 fs/ext4/xattr.c          |  2 +-
 fs/ext4/xattr_security.c |  2 +-
 fs/ext4/xattr_trusted.c  |  2 +-
 fs/ext4/xattr_user.c     |  2 +-
 20 files changed, 141 insertions(+), 108 deletions(-)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index ac9ba1c30935..db4d13324c36 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -143,24 +143,24 @@ config EXT3_FS_SECURITY
 config EXT4DEV_FS
 	tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
-        select JBD
-        help
+	select JBD2
+	help
 	  Ext4dev is a predecessor filesystem of the next generation
 	  extended fs ext4, based on ext3 filesystem code. It will be
 	  renamed ext4 fs later, once ext4dev is mature and stabilized.
 
-          Unlike the change from ext2 filesystem to ext3 filesystem,
-          the on-disk format of ext4dev is not the same as ext3 any more:
+	  Unlike the change from ext2 filesystem to ext3 filesystem,
+	  the on-disk format of ext4dev is not the same as ext3 any more:
 	  it is based on extent maps and it supports 48-bit physical block
-          numbers. These combined on-disk format changes will allow
+	  numbers. These combined on-disk format changes will allow
 	  ext4dev/ext4 to handle more than 16 TB filesystem volumes --
 	  a hard limit that ext3 cannot overcome without changing the
-          on-disk format.
+	  on-disk format.
 
 	  Other than extent maps and 48-bit block numbers, ext4dev also is
-          likely to have other new features such as persistent preallocation,
+	  likely to have other new features such as persistent preallocation,
 	  high resolution time stamps, and larger file support etc.  These
-          features will be added to ext4dev gradually.
+	  features will be added to ext4dev gradually.
 
 	  To compile this file system support as a module, choose M here. The
 	  module will be called ext4dev.  Be aware, however, that the filesystem
@@ -239,6 +239,38 @@ config JBD_DEBUG
 	  generated.  To turn debugging off again, do
 	  "echo 0 > /proc/sys/fs/jbd-debug".
 
+config JBD2
+	tristate
+	help
+	  This is a generic journaling layer for block devices that support
+	  both 32-bit and 64-bit block numbers.  It is currently used by
+	  the ext4dev/ext4 filesystem, but it could also be used to add
+	  journal support to other file systems or block devices such
+	  as RAID or LVM.
+
+	  If you are using ext4dev/ext4, you need to say Y here. If you are not
+	  using ext4dev/ext4 then you will probably want to say N.
+
+	  To compile this device as a module, choose M here. The module will be
+	  called jbd2.  If you are compiling ext4dev/ext4 into the kernel,
+	  you cannot compile this code as a module.
+
+config JBD2_DEBUG
+	bool "JBD2 (ext4dev/ext4) debugging support"
+	depends on JBD2
+	help
+	  If you are using the ext4dev/ext4 journaled file system (or
+	  potentially any other filesystem/device using JBD2), this option
+	  allows you to enable debugging output while the system is running,
+	  in order to help track down any problems you are having.
+	  By default, the debugging output will be turned off.
+
+	  If you select Y here, then you will be able to turn on debugging
+	  with "echo N > /proc/sys/fs/jbd2-debug", where N is a number between
+	  1 and 5. The higher the number, the more debugging output is
+	  generated.  To turn debugging off again, do
+	  "echo 0 > /proc/sys/fs/jbd2-debug".
+
 config FS_MBCACHE
 # Meta block cache for Extended Attributes (ext2/ext3/ext4)
 	tristate
diff --git a/fs/Makefile b/fs/Makefile
index 64396af37b2a..9a5ce9323bfd 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -64,6 +64,7 @@ obj-$(CONFIG_REISERFS_FS)	+= reiserfs/
 obj-$(CONFIG_EXT3_FS)		+= ext3/ # Before ext2 so root fs can be ext3
 obj-$(CONFIG_EXT4DEV_FS)	+= ext4/ # Before ext2 so root fs can be ext4dev
 obj-$(CONFIG_JBD)		+= jbd/
+obj-$(CONFIG_JBD2)		+= jbd2/
 obj-$(CONFIG_EXT2_FS)		+= ext2/
 obj-$(CONFIG_CRAMFS)		+= cramfs/
 obj-$(CONFIG_RAMFS)		+= ramfs/
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index d143489aeb4c..0a965dd5664e 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -9,7 +9,7 @@
 #include <linux/slab.h>
 #include <linux/capability.h>
 #include <linux/fs.h>
-#include <linux/ext4_jbd.h>
+#include <linux/ext4_jbd2.h>
 #include <linux/ext4_fs.h>
 #include "xattr.h"
 #include "acl.h"
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 357e4e50374a..e9e98449137b 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -14,9 +14,9 @@
 #include <linux/time.h>
 #include <linux/capability.h>
 #include <linux/fs.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/ext4_fs.h>
-#include <linux/ext4_jbd.h>
+#include <linux/ext4_jbd2.h>
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
 
@@ -526,12 +526,12 @@ do_more:
 		 * transaction.
 		 *
 		 * Ideally we would want to allow that to happen, but to
-		 * do so requires making journal_forget() capable of
+		 * do so requires making jbd2_journal_forget() capable of
 		 * revoking the queued write of a data block, which
 		 * implies blocking on the journal lock.  *forget()
 		 * cannot block due to truncate races.
 		 *
-		 * Eventually we can fix this by making journal_forget()
+		 * Eventually we can fix this by making jbd2_journal_forget()
 		 * return a status indicating whether or not it was able
 		 * to revoke the buffer.  On successful revoke, it is
 		 * safe not to set the allocation bit in the committed
@@ -1382,7 +1382,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
 
 	jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
 
-	return journal_force_commit_nested(EXT4_SB(sb)->s_journal);
+	return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
 }
 
 /**
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index f4b35706f39c..11e93c169bcf 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -8,7 +8,7 @@
  */
 
 #include <linux/buffer_head.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/ext4_fs.h>
 
 #ifdef EXT4FS_DEBUG
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index ec114d7886cc..9833d5d00c46 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -22,7 +22,7 @@
  */
 
 #include <linux/fs.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/ext4_fs.h>
 #include <linux/buffer_head.h>
 #include <linux/smp_lock.h>
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d938fbe1e08b..0b622c0624b7 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -20,9 +20,9 @@
 
 #include <linux/time.h>
 #include <linux/fs.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/ext4_fs.h>
-#include <linux/ext4_jbd.h>
+#include <linux/ext4_jbd2.h>
 #include "xattr.h"
 #include "acl.h"
 
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 272faa27761d..2a167d7131fa 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -26,9 +26,9 @@
 #include <linux/fs.h>
 #include <linux/sched.h>
 #include <linux/writeback.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/ext4_fs.h>
-#include <linux/ext4_jbd.h>
+#include <linux/ext4_jbd2.h>
 
 /*
  * akpm: A new design for ext4_sync_file().
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index d15bb4274428..a67966385e06 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -10,7 +10,7 @@
  */
 
 #include <linux/fs.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/sched.h>
 #include <linux/ext4_fs.h>
 #include <linux/cryptohash.h>
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 4b92066ca08f..34d39ae966f7 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -14,9 +14,9 @@
 
 #include <linux/time.h>
 #include <linux/fs.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/ext4_fs.h>
-#include <linux/ext4_jbd.h>
+#include <linux/ext4_jbd2.h>
 #include <linux/stat.h>
 #include <linux/string.h>
 #include <linux/quotaops.h>
@@ -497,7 +497,7 @@ repeat_in_this_group:
 				goto got;
 			}
 			/* we lost it */
-			journal_release_buffer(handle, bitmap_bh);
+			jbd2_journal_release_buffer(handle, bitmap_bh);
 
 			if (++ino < EXT4_INODES_PER_GROUP(sb))
 				goto repeat_in_this_group;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7275d60dcc59..0d96c7d3bb5b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -25,8 +25,8 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/time.h>
-#include <linux/ext4_jbd.h>
-#include <linux/jbd.h>
+#include <linux/ext4_jbd2.h>
+#include <linux/jbd2.h>
 #include <linux/smp_lock.h>
 #include <linux/highuid.h>
 #include <linux/pagemap.h>
@@ -84,7 +84,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
 	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
 	    (!is_metadata && !ext4_should_journal_data(inode))) {
 		if (bh) {
-			BUFFER_TRACE(bh, "call journal_forget");
+			BUFFER_TRACE(bh, "call jbd2_journal_forget");
 			return ext4_journal_forget(handle, bh);
 		}
 		return 0;
@@ -657,7 +657,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
 failed:
 	/* Allocation failed, free what we already allocated */
 	for (i = 1; i <= n ; i++) {
-		BUFFER_TRACE(branch[i].bh, "call journal_forget");
+		BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget");
 		ext4_journal_forget(handle, branch[i].bh);
 	}
 	for (i = 0; i <indirect_blks; i++)
@@ -758,7 +758,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
 
 err_out:
 	for (i = 1; i <= num; i++) {
-		BUFFER_TRACE(where[i].bh, "call journal_forget");
+		BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget");
 		ext4_journal_forget(handle, where[i].bh);
 		ext4_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1);
 	}
@@ -1119,7 +1119,7 @@ static int walk_page_buffers(	handle_t *handle,
  * To preserve ordering, it is essential that the hole instantiation and
  * the data write be encapsulated in a single transaction.  We cannot
  * close off a transaction and start a new one between the ext4_get_block()
- * and the commit_write().  So doing the journal_start at the start of
+ * and the commit_write().  So doing the jbd2_journal_start at the start of
  * prepare_write() is the right place.
  *
  * Also, this function can nest inside ext4_writepage() ->
@@ -1135,7 +1135,7 @@ static int walk_page_buffers(	handle_t *handle,
  * transaction open and was blocking on the quota lock - a ranking
  * violation.
  *
- * So what we do is to rely on the fact that journal_stop/journal_start
+ * So what we do is to rely on the fact that jbd2_journal_stop/journal_start
  * will _not_ run commit under these circumstances because handle->h_ref
  * is elevated.  We'll still have enough credits for the tiny quotafile
  * write.
@@ -1184,7 +1184,7 @@ out:
 
 int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 {
-	int err = journal_dirty_data(handle, bh);
+	int err = jbd2_journal_dirty_data(handle, bh);
 	if (err)
 		ext4_journal_abort_handle(__FUNCTION__, __FUNCTION__,
 						bh, handle,err);
@@ -1333,9 +1333,9 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
 
 		EXT4_I(inode)->i_state &= ~EXT4_STATE_JDATA;
 		journal = EXT4_JOURNAL(inode);
-		journal_lock_updates(journal);
-		err = journal_flush(journal);
-		journal_unlock_updates(journal);
+		jbd2_journal_lock_updates(journal);
+		err = jbd2_journal_flush(journal);
+		jbd2_journal_unlock_updates(journal);
 
 		if (err)
 			return 0;
@@ -1356,7 +1356,7 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
 	return 0;
 }
 
-static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
+static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
 {
 	if (buffer_mapped(bh))
 		return ext4_journal_dirty_data(handle, bh);
@@ -1464,7 +1464,7 @@ static int ext4_ordered_writepage(struct page *page,
 	 */
 	if (ret == 0) {
 		err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
-					NULL, journal_dirty_data_fn);
+					NULL, jbd2_journal_dirty_data_fn);
 		if (!ret)
 			ret = err;
 	}
@@ -1595,7 +1595,7 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
 	if (offset == 0)
 		ClearPageChecked(page);
 
-	journal_invalidatepage(journal, page, offset);
+	jbd2_journal_invalidatepage(journal, page, offset);
 }
 
 static int ext4_releasepage(struct page *page, gfp_t wait)
@@ -1605,7 +1605,7 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
 	WARN_ON(PageChecked(page));
 	if (!page_has_buffers(page))
 		return 0;
-	return journal_try_to_free_buffers(journal, page, wait);
+	return jbd2_journal_try_to_free_buffers(journal, page, wait);
 }
 
 /*
@@ -1982,11 +1982,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
 
 	/*
 	 * Any buffers which are on the journal will be in memory. We find
-	 * them on the hash table so journal_revoke() will run journal_forget()
+	 * them on the hash table so jbd2_journal_revoke() will run jbd2_journal_forget()
 	 * on them.  We've already detached each block from the file, so
-	 * bforget() in journal_forget() should be safe.
+	 * bforget() in jbd2_journal_forget() should be safe.
 	 *
-	 * AKPM: turn on bforget in journal_forget()!!!
+	 * AKPM: turn on bforget in jbd2_journal_forget()!!!
 	 */
 	for (p = first; p < last; p++) {
 		u32 nr = le32_to_cpu(*p);
@@ -2132,11 +2132,11 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
 			 * We've probably journalled the indirect block several
 			 * times during the truncate.  But it's no longer
 			 * needed and we now drop it from the transaction via
-			 * journal_revoke().
+			 * jbd2_journal_revoke().
 			 *
 			 * That's easy if it's exclusively part of this
 			 * transaction.  But if it's part of the committing
-			 * transaction then journal_forget() will simply
+			 * transaction then jbd2_journal_forget() will simply
 			 * brelse() it.  That means that if the underlying
 			 * block is reallocated in ext4_get_block(),
 			 * unmap_underlying_metadata() will find this block
@@ -2251,7 +2251,7 @@ void ext4_truncate(struct inode *inode)
 
 	/*
 	 * We have to lock the EOF page here, because lock_page() nests
-	 * outside journal_start().
+	 * outside jbd2_journal_start().
 	 */
 	if ((inode->i_size & (blocksize - 1)) == 0) {
 		/* Block boundary? Nothing to do */
@@ -3035,7 +3035,7 @@ int ext4_mark_iloc_dirty(handle_t *handle,
 	/* the do_update_inode consumes one bh->b_count */
 	get_bh(iloc->bh);
 
-	/* ext4_do_update_inode() does journal_dirty_metadata */
+	/* ext4_do_update_inode() does jbd2_journal_dirty_metadata */
 	err = ext4_do_update_inode(handle, inode, iloc);
 	put_bh(iloc->bh);
 	return err;
@@ -3153,7 +3153,7 @@ static int ext4_pin_inode(handle_t *handle, struct inode *inode)
 		err = ext4_get_inode_loc(inode, &iloc);
 		if (!err) {
 			BUFFER_TRACE(iloc.bh, "get_write_access");
-			err = journal_get_write_access(handle, iloc.bh);
+			err = jbd2_journal_get_write_access(handle, iloc.bh);
 			if (!err)
 				err = ext4_journal_dirty_metadata(handle,
 								  iloc.bh);
@@ -3185,8 +3185,8 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
 	if (is_journal_aborted(journal) || IS_RDONLY(inode))
 		return -EROFS;
 
-	journal_lock_updates(journal);
-	journal_flush(journal);
+	jbd2_journal_lock_updates(journal);
+	jbd2_journal_flush(journal);
 
 	/*
 	 * OK, there are no updates running now, and all cached data is
@@ -3202,7 +3202,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
 		EXT4_I(inode)->i_flags &= ~EXT4_JOURNAL_DATA_FL;
 	ext4_set_aops(inode);
 
-	journal_unlock_updates(journal);
+	jbd2_journal_unlock_updates(journal);
 
 	/* Finally we can mark the inode as dirty. */
 
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index a567af161b06..a63dce2117b8 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -8,10 +8,10 @@
  */
 
 #include <linux/fs.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/capability.h>
 #include <linux/ext4_fs.h>
-#include <linux/ext4_jbd.h>
+#include <linux/ext4_jbd2.h>
 #include <linux/time.h>
 #include <linux/compat.h>
 #include <linux/smp_lock.h>
@@ -219,9 +219,9 @@ flags_err:
 			return -EFAULT;
 
 		err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
-		journal_lock_updates(EXT4_SB(sb)->s_journal);
-		journal_flush(EXT4_SB(sb)->s_journal);
-		journal_unlock_updates(EXT4_SB(sb)->s_journal);
+		jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
+		jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
 
 		return err;
 	}
@@ -241,9 +241,9 @@ flags_err:
 			return -EFAULT;
 
 		err = ext4_group_add(sb, &input);
-		journal_lock_updates(EXT4_SB(sb)->s_journal);
-		journal_flush(EXT4_SB(sb)->s_journal);
-		journal_unlock_updates(EXT4_SB(sb)->s_journal);
+		jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
+		jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
 
 		return err;
 	}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 956b38113f62..f98b9994e36c 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -26,10 +26,10 @@
 
 #include <linux/fs.h>
 #include <linux/pagemap.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/time.h>
 #include <linux/ext4_fs.h>
-#include <linux/ext4_jbd.h>
+#include <linux/ext4_jbd2.h>
 #include <linux/fcntl.h>
 #include <linux/stat.h>
 #include <linux/string.h>
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 4a47895d9d6d..5b2828d21180 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -13,7 +13,7 @@
 
 #include <linux/sched.h>
 #include <linux/smp_lock.h>
-#include <linux/ext4_jbd.h>
+#include <linux/ext4_jbd2.h>
 
 #include <linux/errno.h>
 #include <linux/slab.h>
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9e32a2a8d286..f131bb69b62e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -20,9 +20,9 @@
 #include <linux/string.h>
 #include <linux/fs.h>
 #include <linux/time.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/ext4_fs.h>
-#include <linux/ext4_jbd.h>
+#include <linux/ext4_jbd2.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/blkdev.h>
@@ -63,7 +63,7 @@ static void ext4_write_super (struct super_block * sb);
 static void ext4_write_super_lockfs(struct super_block *sb);
 
 /*
- * Wrappers for journal_start/end.
+ * Wrappers for jbd2_journal_start/end.
  *
  * The only special thing we need to do here is to make sure that all
  * journal_end calls result in the superblock being marked dirty, so
@@ -87,12 +87,12 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
 		return ERR_PTR(-EROFS);
 	}
 
-	return journal_start(journal, nblocks);
+	return jbd2_journal_start(journal, nblocks);
 }
 
 /*
  * The only special thing we need to do here is to make sure that all
- * journal_stop calls result in the superblock being marked dirty, so
+ * jbd2_journal_stop calls result in the superblock being marked dirty, so
  * that sync() will call the filesystem's write_super callback if
  * appropriate.
  */
@@ -104,7 +104,7 @@ int __ext4_journal_stop(const char *where, handle_t *handle)
 
 	sb = handle->h_transaction->t_journal->j_private;
 	err = handle->h_err;
-	rc = journal_stop(handle);
+	rc = jbd2_journal_stop(handle);
 
 	if (!err)
 		err = rc;
@@ -131,7 +131,7 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn,
 	printk(KERN_ERR "%s: aborting transaction: %s in %s\n",
 	       caller, errstr, err_fn);
 
-	journal_abort_handle(handle);
+	jbd2_journal_abort_handle(handle);
 }
 
 /* Deal with the reporting of failure conditions on a filesystem such as
@@ -144,7 +144,7 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn,
  * be aborted, we can't rely on the current, or future, transactions to
  * write out the superblock safely.
  *
- * We'll just use the journal_abort() error code to record an error in
+ * We'll just use the jbd2_journal_abort() error code to record an error in
  * the journal instead.  On recovery, the journal will compain about
  * that error until we've noted it down and cleared it.
  */
@@ -164,7 +164,7 @@ static void ext4_handle_error(struct super_block *sb)
 
 		EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
 		if (journal)
-			journal_abort(journal, -EIO);
+			jbd2_journal_abort(journal, -EIO);
 	}
 	if (test_opt (sb, ERRORS_RO)) {
 		printk (KERN_CRIT "Remounting filesystem read-only\n");
@@ -203,7 +203,7 @@ static const char *ext4_decode_error(struct super_block * sb, int errno,
 		errstr = "Out of memory";
 		break;
 	case -EROFS:
-		if (!sb || EXT4_SB(sb)->s_journal->j_flags & JFS_ABORT)
+		if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)
 			errstr = "Journal has aborted";
 		else
 			errstr = "Readonly filesystem";
@@ -279,7 +279,7 @@ void ext4_abort (struct super_block * sb, const char * function,
 	EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 	sb->s_flags |= MS_RDONLY;
 	EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
-	journal_abort(EXT4_SB(sb)->s_journal, -EIO);
+	jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 }
 
 void ext4_warning (struct super_block * sb, const char * function,
@@ -391,7 +391,7 @@ static void ext4_put_super (struct super_block * sb)
 	int i;
 
 	ext4_xattr_put_super(sb);
-	journal_destroy(sbi->s_journal);
+	jbd2_journal_destroy(sbi->s_journal);
 	if (!(sb->s_flags & MS_RDONLY)) {
 		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 		es->s_state = cpu_to_le16(sbi->s_mount_state);
@@ -1722,8 +1722,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 		/* No mode set, assume a default based on the journal
                    capabilities: ORDERED_DATA if the journal can
                    cope, else JOURNAL_DATA */
-		if (journal_check_available_features
-		    (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE))
+		if (jbd2_journal_check_available_features
+		    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
 			set_opt(sbi->s_mount_opt, ORDERED_DATA);
 		else
 			set_opt(sbi->s_mount_opt, JOURNAL_DATA);
@@ -1731,8 +1731,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 
 	case EXT4_MOUNT_ORDERED_DATA:
 	case EXT4_MOUNT_WRITEBACK_DATA:
-		if (!journal_check_available_features
-		    (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) {
+		if (!jbd2_journal_check_available_features
+		    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
 			printk(KERN_ERR "EXT4-fs: Journal does not support "
 			       "requested data journaling mode\n");
 			goto failed_mount4;
@@ -1749,7 +1749,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 		}
 	}
 	/*
-	 * The journal_load will have done any necessary log recovery,
+	 * The jbd2_journal_load will have done any necessary log recovery,
 	 * so we can safely mount the rest of the filesystem now.
 	 */
 
@@ -1797,7 +1797,7 @@ cantfind_ext4:
 	goto failed_mount;
 
 failed_mount4:
-	journal_destroy(sbi->s_journal);
+	jbd2_journal_destroy(sbi->s_journal);
 failed_mount3:
 	percpu_counter_destroy(&sbi->s_freeblocks_counter);
 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
@@ -1837,9 +1837,9 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
 
 	spin_lock(&journal->j_state_lock);
 	if (test_opt(sb, BARRIER))
-		journal->j_flags |= JFS_BARRIER;
+		journal->j_flags |= JBD2_BARRIER;
 	else
-		journal->j_flags &= ~JFS_BARRIER;
+		journal->j_flags &= ~JBD2_BARRIER;
 	spin_unlock(&journal->j_state_lock);
 }
 
@@ -1873,7 +1873,7 @@ static journal_t *ext4_get_journal(struct super_block *sb,
 		return NULL;
 	}
 
-	journal = journal_init_inode(journal_inode);
+	journal = jbd2_journal_init_inode(journal_inode);
 	if (!journal) {
 		printk(KERN_ERR "EXT4-fs: Could not load journal inode\n");
 		iput(journal_inode);
@@ -1945,7 +1945,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
 	start = sb_block + 1;
 	brelse(bh);	/* we're done with the superblock */
 
-	journal = journal_init_dev(bdev, sb->s_bdev,
+	journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
 					start, len, blocksize);
 	if (!journal) {
 		printk(KERN_ERR "EXT4-fs: failed to create device journal\n");
@@ -1968,7 +1968,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
 	ext4_init_journal_params(sb, journal);
 	return journal;
 out_journal:
-	journal_destroy(journal);
+	jbd2_journal_destroy(journal);
 out_bdev:
 	ext4_blkdev_put(bdev);
 	return NULL;
@@ -2029,22 +2029,22 @@ static int ext4_load_journal(struct super_block *sb,
 	}
 
 	if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
-		err = journal_update_format(journal);
+		err = jbd2_journal_update_format(journal);
 		if (err)  {
 			printk(KERN_ERR "EXT4-fs: error updating journal.\n");
-			journal_destroy(journal);
+			jbd2_journal_destroy(journal);
 			return err;
 		}
 	}
 
 	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
-		err = journal_wipe(journal, !really_read_only);
+		err = jbd2_journal_wipe(journal, !really_read_only);
 	if (!err)
-		err = journal_load(journal);
+		err = jbd2_journal_load(journal);
 
 	if (err) {
 		printk(KERN_ERR "EXT4-fs: error loading journal.\n");
-		journal_destroy(journal);
+		jbd2_journal_destroy(journal);
 		return err;
 	}
 
@@ -2081,9 +2081,9 @@ static int ext4_create_journal(struct super_block * sb,
 	printk(KERN_INFO "EXT4-fs: creating new journal on inode %u\n",
 	       journal_inum);
 
-	if (journal_create(journal)) {
+	if (jbd2_journal_create(journal)) {
 		printk(KERN_ERR "EXT4-fs: error creating journal.\n");
-		journal_destroy(journal);
+		jbd2_journal_destroy(journal);
 		return -EIO;
 	}
 
@@ -2130,15 +2130,15 @@ static void ext4_mark_recovery_complete(struct super_block * sb,
 {
 	journal_t *journal = EXT4_SB(sb)->s_journal;
 
-	journal_lock_updates(journal);
-	journal_flush(journal);
+	jbd2_journal_lock_updates(journal);
+	jbd2_journal_flush(journal);
 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
 	    sb->s_flags & MS_RDONLY) {
 		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 		sb->s_dirt = 0;
 		ext4_commit_super(sb, es, 1);
 	}
-	journal_unlock_updates(journal);
+	jbd2_journal_unlock_updates(journal);
 }
 
 /*
@@ -2160,7 +2160,7 @@ static void ext4_clear_journal_err(struct super_block * sb,
 	 * journal by a prior ext4_error() or ext4_abort()
 	 */
 
-	j_errno = journal_errno(journal);
+	j_errno = jbd2_journal_errno(journal);
 	if (j_errno) {
 		char nbuf[16];
 
@@ -2174,7 +2174,7 @@ static void ext4_clear_journal_err(struct super_block * sb,
 		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
 		ext4_commit_super (sb, es, 1);
 
-		journal_clear_err(journal);
+		jbd2_journal_clear_err(journal);
 	}
 }
 
@@ -2217,9 +2217,9 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
 	tid_t target;
 
 	sb->s_dirt = 0;
-	if (journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
+	if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
 		if (wait)
-			log_wait_commit(EXT4_SB(sb)->s_journal, target);
+			jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
 	}
 	return 0;
 }
@@ -2236,8 +2236,8 @@ static void ext4_write_super_lockfs(struct super_block *sb)
 		journal_t *journal = EXT4_SB(sb)->s_journal;
 
 		/* Now we set up the journal barrier. */
-		journal_lock_updates(journal);
-		journal_flush(journal);
+		jbd2_journal_lock_updates(journal);
+		jbd2_journal_flush(journal);
 
 		/* Journal blocked and flushed, clear needs_recovery flag. */
 		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@ -2257,7 +2257,7 @@ static void ext4_unlockfs(struct super_block *sb)
 		EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 		ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
 		unlock_super(sb);
-		journal_unlock_updates(EXT4_SB(sb)->s_journal);
+		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
 	}
 }
 
@@ -2438,9 +2438,9 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
  * is locked for write. Otherwise the are possible deadlocks:
  * Process 1                         Process 2
  * ext4_create()                     quota_sync()
- *   journal_start()                   write_dquot()
+ *   jbd2_journal_start()                   write_dquot()
  *   DQUOT_INIT()                        down(dqio_mutex)
- *     down(dqio_mutex)                    journal_start()
+ *     down(dqio_mutex)                    jbd2_journal_start()
  *
  */
 
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index 9e4c75f912f7..fcf527286d75 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -18,7 +18,7 @@
  */
 
 #include <linux/fs.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/ext4_fs.h>
 #include <linux/namei.h>
 #include "xattr.h"
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index d3a408154101..90f7d5c0bae4 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -53,7 +53,7 @@
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
-#include <linux/ext4_jbd.h>
+#include <linux/ext4_jbd2.h>
 #include <linux/ext4_fs.h>
 #include <linux/mbcache.h>
 #include <linux/quotaops.h>
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index d84b1dabeb16..b6a6861951f9 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -7,7 +7,7 @@
 #include <linux/string.h>
 #include <linux/fs.h>
 #include <linux/smp_lock.h>
-#include <linux/ext4_jbd.h>
+#include <linux/ext4_jbd2.h>
 #include <linux/ext4_fs.h>
 #include <linux/security.h>
 #include "xattr.h"
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index 11bd58c95a61..b76f2dbc82da 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -10,7 +10,7 @@
 #include <linux/capability.h>
 #include <linux/fs.h>
 #include <linux/smp_lock.h>
-#include <linux/ext4_jbd.h>
+#include <linux/ext4_jbd2.h>
 #include <linux/ext4_fs.h>
 #include "xattr.h"
 
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index 9c5a665e0837..c53cded0761a 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -9,7 +9,7 @@
 #include <linux/string.h>
 #include <linux/fs.h>
 #include <linux/smp_lock.h>
-#include <linux/ext4_jbd.h>
+#include <linux/ext4_jbd2.h>
 #include <linux/ext4_fs.h>
 #include "xattr.h"
 
-- 
cgit v1.2.3


From a86c61812637c7dd0c57e29880cffd477b62f2e7 Mon Sep 17 00:00:00 2001
From: Alex Tomas <alex@clusterfs.com>
Date: Wed, 11 Oct 2006 01:21:03 -0700
Subject: [PATCH] ext3: add extent map support

On disk extents format:
/*
* this is extent on-disk structure
* it's used at the bottom of the tree
*/
struct ext3_extent {
__le32  ee_block;       /* first logical block extent covers */
__le16  ee_len;         /* number of blocks covered by extent */
__le16  ee_start_hi;    /* high 16 bits of physical block */
__le32  ee_start;       /* low 32 bigs of physical block */
};

Signed-off-by: Alex Tomas <alex@clusterfs.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/Makefile  |    2 +-
 fs/ext4/dir.c     |    3 +-
 fs/ext4/extents.c | 2075 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ext4/ialloc.c  |   11 +
 fs/ext4/inode.c   |   17 +-
 fs/ext4/ioctl.c   |    1 -
 fs/ext4/super.c   |   10 +-
 7 files changed, 2108 insertions(+), 11 deletions(-)
 create mode 100644 fs/ext4/extents.c

(limited to 'fs')

diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 09c487893e4a..a6acb96ebeb9 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -5,7 +5,7 @@
 obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o
 
 ext4dev-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
-	   ioctl.o namei.o super.o symlink.o hash.o resize.o
+	   ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o
 
 ext4dev-$(CONFIG_EXT4DEV_FS_XATTR)	+= xattr.o xattr_user.o xattr_trusted.o
 ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL)	+= acl.o
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 9833d5d00c46..18ac173af575 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -134,8 +134,7 @@ static int ext4_readdir(struct file * filp,
 		struct buffer_head *bh = NULL;
 
 		map_bh.b_state = 0;
-		err = ext4_get_blocks_handle(NULL, inode, blk, 1,
-						&map_bh, 0, 0);
+		err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, 0, 0);
 		if (err > 0) {
 			page_cache_readahead(sb->s_bdev->bd_inode->i_mapping,
 				&filp->f_ra,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
new file mode 100644
index 000000000000..f67b2ef6a71f
--- /dev/null
+++ b/fs/ext4/extents.c
@@ -0,0 +1,2075 @@
+/*
+ * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
+ * Written by Alex Tomas <alex@clusterfs.com>
+ *
+ * Architecture independence:
+ *   Copyright (c) 2005, Bull S.A.
+ *   Written by Pierre Peiffer <pierre.peiffer@bull.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
+ */
+
+/*
+ * Extents support for EXT4
+ *
+ * TODO:
+ *   - ext4*_error() should be used in some situations
+ *   - analyze all BUG()/BUG_ON(), use -EIO where appropriate
+ *   - smart tree reduction
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/time.h>
+#include <linux/ext4_jbd2.h>
+#include <linux/jbd.h>
+#include <linux/smp_lock.h>
+#include <linux/highuid.h>
+#include <linux/pagemap.h>
+#include <linux/quotaops.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/ext4_fs_extents.h>
+#include <asm/uaccess.h>
+
+
+static int ext4_ext_check_header(const char *function, struct inode *inode,
+				struct ext4_extent_header *eh)
+{
+	const char *error_msg = NULL;
+
+	if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) {
+		error_msg = "invalid magic";
+		goto corrupted;
+	}
+	if (unlikely(eh->eh_max == 0)) {
+		error_msg = "invalid eh_max";
+		goto corrupted;
+	}
+	if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) {
+		error_msg = "invalid eh_entries";
+		goto corrupted;
+	}
+	return 0;
+
+corrupted:
+	ext4_error(inode->i_sb, function,
+			"bad header in inode #%lu: %s - magic %x, "
+			"entries %u, max %u, depth %u",
+			inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic),
+			le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
+			le16_to_cpu(eh->eh_depth));
+
+	return -EIO;
+}
+
+static handle_t *ext4_ext_journal_restart(handle_t *handle, int needed)
+{
+	int err;
+
+	if (handle->h_buffer_credits > needed)
+		return handle;
+	if (!ext4_journal_extend(handle, needed))
+		return handle;
+	err = ext4_journal_restart(handle, needed);
+
+	return handle;
+}
+
+/*
+ * could return:
+ *  - EROFS
+ *  - ENOMEM
+ */
+static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
+				struct ext4_ext_path *path)
+{
+	if (path->p_bh) {
+		/* path points to block */
+		return ext4_journal_get_write_access(handle, path->p_bh);
+	}
+	/* path points to leaf/index in inode body */
+	/* we use in-core data, no need to protect them */
+	return 0;
+}
+
+/*
+ * could return:
+ *  - EROFS
+ *  - ENOMEM
+ *  - EIO
+ */
+static int ext4_ext_dirty(handle_t *handle, struct inode *inode,
+				struct ext4_ext_path *path)
+{
+	int err;
+	if (path->p_bh) {
+		/* path points to block */
+		err = ext4_journal_dirty_metadata(handle, path->p_bh);
+	} else {
+		/* path points to leaf/index in inode body */
+		err = ext4_mark_inode_dirty(handle, inode);
+	}
+	return err;
+}
+
+static int ext4_ext_find_goal(struct inode *inode,
+			      struct ext4_ext_path *path,
+			      unsigned long block)
+{
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	unsigned long bg_start;
+	unsigned long colour;
+	int depth;
+
+	if (path) {
+		struct ext4_extent *ex;
+		depth = path->p_depth;
+
+		/* try to predict block placement */
+		if ((ex = path[depth].p_ext))
+			return le32_to_cpu(ex->ee_start)
+					+ (block - le32_to_cpu(ex->ee_block));
+
+		/* it looks index is empty
+		 * try to find starting from index itself */
+		if (path[depth].p_bh)
+			return path[depth].p_bh->b_blocknr;
+	}
+
+	/* OK. use inode's group */
+	bg_start = (ei->i_block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) +
+		le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block);
+	colour = (current->pid % 16) *
+			(EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
+	return bg_start + colour + block;
+}
+
+static int
+ext4_ext_new_block(handle_t *handle, struct inode *inode,
+			struct ext4_ext_path *path,
+			struct ext4_extent *ex, int *err)
+{
+	int goal, newblock;
+
+	goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
+	newblock = ext4_new_block(handle, inode, goal, err);
+	return newblock;
+}
+
+static inline int ext4_ext_space_block(struct inode *inode)
+{
+	int size;
+
+	size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
+			/ sizeof(struct ext4_extent);
+#ifdef AGRESSIVE_TEST
+	if (size > 6)
+		size = 6;
+#endif
+	return size;
+}
+
+static inline int ext4_ext_space_block_idx(struct inode *inode)
+{
+	int size;
+
+	size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
+			/ sizeof(struct ext4_extent_idx);
+#ifdef AGRESSIVE_TEST
+	if (size > 5)
+		size = 5;
+#endif
+	return size;
+}
+
+static inline int ext4_ext_space_root(struct inode *inode)
+{
+	int size;
+
+	size = sizeof(EXT4_I(inode)->i_data);
+	size -= sizeof(struct ext4_extent_header);
+	size /= sizeof(struct ext4_extent);
+#ifdef AGRESSIVE_TEST
+	if (size > 3)
+		size = 3;
+#endif
+	return size;
+}
+
+static inline int ext4_ext_space_root_idx(struct inode *inode)
+{
+	int size;
+
+	size = sizeof(EXT4_I(inode)->i_data);
+	size -= sizeof(struct ext4_extent_header);
+	size /= sizeof(struct ext4_extent_idx);
+#ifdef AGRESSIVE_TEST
+	if (size > 4)
+		size = 4;
+#endif
+	return size;
+}
+
+#ifdef EXT_DEBUG
+static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
+{
+	int k, l = path->p_depth;
+
+	ext_debug("path:");
+	for (k = 0; k <= l; k++, path++) {
+		if (path->p_idx) {
+		  ext_debug("  %d->%d", le32_to_cpu(path->p_idx->ei_block),
+			    le32_to_cpu(path->p_idx->ei_leaf));
+		} else if (path->p_ext) {
+			ext_debug("  %d:%d:%d",
+				  le32_to_cpu(path->p_ext->ee_block),
+				  le16_to_cpu(path->p_ext->ee_len),
+				  le32_to_cpu(path->p_ext->ee_start));
+		} else
+			ext_debug("  []");
+	}
+	ext_debug("\n");
+}
+
+static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
+{
+	int depth = ext_depth(inode);
+	struct ext4_extent_header *eh;
+	struct ext4_extent *ex;
+	int i;
+
+	if (!path)
+		return;
+
+	eh = path[depth].p_hdr;
+	ex = EXT_FIRST_EXTENT(eh);
+
+	for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
+		ext_debug("%d:%d:%d ", le32_to_cpu(ex->ee_block),
+			  le16_to_cpu(ex->ee_len),
+			  le32_to_cpu(ex->ee_start));
+	}
+	ext_debug("\n");
+}
+#else
+#define ext4_ext_show_path(inode,path)
+#define ext4_ext_show_leaf(inode,path)
+#endif
+
+static void ext4_ext_drop_refs(struct ext4_ext_path *path)
+{
+	int depth = path->p_depth;
+	int i;
+
+	for (i = 0; i <= depth; i++, path++)
+		if (path->p_bh) {
+			brelse(path->p_bh);
+			path->p_bh = NULL;
+		}
+}
+
+/*
+ * binary search for closest index by given block
+ */
+static void
+ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int block)
+{
+	struct ext4_extent_header *eh = path->p_hdr;
+	struct ext4_extent_idx *r, *l, *m;
+
+	BUG_ON(eh->eh_magic != EXT4_EXT_MAGIC);
+	BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max));
+	BUG_ON(le16_to_cpu(eh->eh_entries) <= 0);
+
+	ext_debug("binsearch for %d(idx):  ", block);
+
+	l = EXT_FIRST_INDEX(eh) + 1;
+	r = EXT_FIRST_INDEX(eh) + le16_to_cpu(eh->eh_entries) - 1;
+	while (l <= r) {
+		m = l + (r - l) / 2;
+		if (block < le32_to_cpu(m->ei_block))
+			r = m - 1;
+		else
+			l = m + 1;
+		ext_debug("%p(%u):%p(%u):%p(%u) ", l, l->ei_block,
+				m, m->ei_block, r, r->ei_block);
+	}
+
+	path->p_idx = l - 1;
+	ext_debug("  -> %d->%d ", le32_to_cpu(path->p_idx->ei_block),
+		  le32_to_cpu(path->p_idx->ei_leaf));
+
+#ifdef CHECK_BINSEARCH
+	{
+		struct ext4_extent_idx *chix, *ix;
+		int k;
+
+		chix = ix = EXT_FIRST_INDEX(eh);
+		for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
+		  if (k != 0 &&
+		      le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) {
+				printk("k=%d, ix=0x%p, first=0x%p\n", k,
+					ix, EXT_FIRST_INDEX(eh));
+				printk("%u <= %u\n",
+				       le32_to_cpu(ix->ei_block),
+				       le32_to_cpu(ix[-1].ei_block));
+			}
+			BUG_ON(k && le32_to_cpu(ix->ei_block)
+				           <= le32_to_cpu(ix[-1].ei_block));
+			if (block < le32_to_cpu(ix->ei_block))
+				break;
+			chix = ix;
+		}
+		BUG_ON(chix != path->p_idx);
+	}
+#endif
+
+}
+
+/*
+ * binary search for closest extent by given block
+ */
+static void
+ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
+{
+	struct ext4_extent_header *eh = path->p_hdr;
+	struct ext4_extent *r, *l, *m;
+
+	BUG_ON(eh->eh_magic != EXT4_EXT_MAGIC);
+	BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max));
+
+	if (eh->eh_entries == 0) {
+		/*
+		 * this leaf is empty yet:
+		 *  we get such a leaf in split/add case
+		 */
+		return;
+	}
+
+	ext_debug("binsearch for %d:  ", block);
+
+	l = EXT_FIRST_EXTENT(eh) + 1;
+	r = EXT_FIRST_EXTENT(eh) + le16_to_cpu(eh->eh_entries) - 1;
+
+	while (l <= r) {
+		m = l + (r - l) / 2;
+		if (block < le32_to_cpu(m->ee_block))
+			r = m - 1;
+		else
+			l = m + 1;
+		ext_debug("%p(%u):%p(%u):%p(%u) ", l, l->ee_block,
+				m, m->ee_block, r, r->ee_block);
+	}
+
+	path->p_ext = l - 1;
+	ext_debug("  -> %d:%d:%d ",
+		        le32_to_cpu(path->p_ext->ee_block),
+		        le32_to_cpu(path->p_ext->ee_start),
+		        le16_to_cpu(path->p_ext->ee_len));
+
+#ifdef CHECK_BINSEARCH
+	{
+		struct ext4_extent *chex, *ex;
+		int k;
+
+		chex = ex = EXT_FIRST_EXTENT(eh);
+		for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) {
+			BUG_ON(k && le32_to_cpu(ex->ee_block)
+				          <= le32_to_cpu(ex[-1].ee_block));
+			if (block < le32_to_cpu(ex->ee_block))
+				break;
+			chex = ex;
+		}
+		BUG_ON(chex != path->p_ext);
+	}
+#endif
+
+}
+
+int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
+{
+	struct ext4_extent_header *eh;
+
+	eh = ext_inode_hdr(inode);
+	eh->eh_depth = 0;
+	eh->eh_entries = 0;
+	eh->eh_magic = EXT4_EXT_MAGIC;
+	eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode));
+	ext4_mark_inode_dirty(handle, inode);
+	ext4_ext_invalidate_cache(inode);
+	return 0;
+}
+
+struct ext4_ext_path *
+ext4_ext_find_extent(struct inode *inode, int block, struct ext4_ext_path *path)
+{
+	struct ext4_extent_header *eh;
+	struct buffer_head *bh;
+	short int depth, i, ppos = 0, alloc = 0;
+
+	eh = ext_inode_hdr(inode);
+	BUG_ON(eh == NULL);
+	if (ext4_ext_check_header(__FUNCTION__, inode, eh))
+		return ERR_PTR(-EIO);
+
+	i = depth = ext_depth(inode);
+
+	/* account possible depth increase */
+	if (!path) {
+		path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 2),
+				GFP_NOFS);
+		if (!path)
+			return ERR_PTR(-ENOMEM);
+		alloc = 1;
+	}
+	memset(path, 0, sizeof(struct ext4_ext_path) * (depth + 1));
+	path[0].p_hdr = eh;
+
+	/* walk through the tree */
+	while (i) {
+		ext_debug("depth %d: num %d, max %d\n",
+			  ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
+		ext4_ext_binsearch_idx(inode, path + ppos, block);
+		path[ppos].p_block = le32_to_cpu(path[ppos].p_idx->ei_leaf);
+		path[ppos].p_depth = i;
+		path[ppos].p_ext = NULL;
+
+		bh = sb_bread(inode->i_sb, path[ppos].p_block);
+		if (!bh)
+			goto err;
+
+		eh = ext_block_hdr(bh);
+		ppos++;
+		BUG_ON(ppos > depth);
+		path[ppos].p_bh = bh;
+		path[ppos].p_hdr = eh;
+		i--;
+
+		if (ext4_ext_check_header(__FUNCTION__, inode, eh))
+			goto err;
+	}
+
+	path[ppos].p_depth = i;
+	path[ppos].p_hdr = eh;
+	path[ppos].p_ext = NULL;
+	path[ppos].p_idx = NULL;
+
+	if (ext4_ext_check_header(__FUNCTION__, inode, eh))
+		goto err;
+
+	/* find extent */
+	ext4_ext_binsearch(inode, path + ppos, block);
+
+	ext4_ext_show_path(inode, path);
+
+	return path;
+
+err:
+	ext4_ext_drop_refs(path);
+	if (alloc)
+		kfree(path);
+	return ERR_PTR(-EIO);
+}
+
+/*
+ * insert new index [logical;ptr] into the block at cupr
+ * it check where to insert: before curp or after curp
+ */
+static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
+				struct ext4_ext_path *curp,
+				int logical, int ptr)
+{
+	struct ext4_extent_idx *ix;
+	int len, err;
+
+	if ((err = ext4_ext_get_access(handle, inode, curp)))
+		return err;
+
+	BUG_ON(logical == le32_to_cpu(curp->p_idx->ei_block));
+	len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
+	if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
+		/* insert after */
+		if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) {
+			len = (len - 1) * sizeof(struct ext4_extent_idx);
+			len = len < 0 ? 0 : len;
+			ext_debug("insert new index %d after: %d. "
+					"move %d from 0x%p to 0x%p\n",
+					logical, ptr, len,
+					(curp->p_idx + 1), (curp->p_idx + 2));
+			memmove(curp->p_idx + 2, curp->p_idx + 1, len);
+		}
+		ix = curp->p_idx + 1;
+	} else {
+		/* insert before */
+		len = len * sizeof(struct ext4_extent_idx);
+		len = len < 0 ? 0 : len;
+		ext_debug("insert new index %d before: %d. "
+				"move %d from 0x%p to 0x%p\n",
+				logical, ptr, len,
+				curp->p_idx, (curp->p_idx + 1));
+		memmove(curp->p_idx + 1, curp->p_idx, len);
+		ix = curp->p_idx;
+	}
+
+	ix->ei_block = cpu_to_le32(logical);
+	ix->ei_leaf = cpu_to_le32(ptr);
+	curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1);
+
+	BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries)
+	                     > le16_to_cpu(curp->p_hdr->eh_max));
+	BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr));
+
+	err = ext4_ext_dirty(handle, inode, curp);
+	ext4_std_error(inode->i_sb, err);
+
+	return err;
+}
+
+/*
+ * routine inserts new subtree into the path, using free index entry
+ * at depth 'at:
+ *  - allocates all needed blocks (new leaf and all intermediate index blocks)
+ *  - makes decision where to split
+ *  - moves remaining extens and index entries (right to the split point)
+ *    into the newly allocated blocks
+ *  - initialize subtree
+ */
+static int ext4_ext_split(handle_t *handle, struct inode *inode,
+				struct ext4_ext_path *path,
+				struct ext4_extent *newext, int at)
+{
+	struct buffer_head *bh = NULL;
+	int depth = ext_depth(inode);
+	struct ext4_extent_header *neh;
+	struct ext4_extent_idx *fidx;
+	struct ext4_extent *ex;
+	int i = at, k, m, a;
+	unsigned long newblock, oldblock;
+	__le32 border;
+	int *ablocks = NULL; /* array of allocated blocks */
+	int err = 0;
+
+	/* make decision: where to split? */
+	/* FIXME: now desicion is simplest: at current extent */
+
+	/* if current leaf will be splitted, then we should use
+	 * border from split point */
+	BUG_ON(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr));
+	if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
+		border = path[depth].p_ext[1].ee_block;
+		ext_debug("leaf will be splitted."
+				" next leaf starts at %d\n",
+			          le32_to_cpu(border));
+	} else {
+		border = newext->ee_block;
+		ext_debug("leaf will be added."
+				" next leaf starts at %d\n",
+			        le32_to_cpu(border));
+	}
+
+	/*
+	 * if error occurs, then we break processing
+	 * and turn filesystem read-only. so, index won't
+	 * be inserted and tree will be in consistent
+	 * state. next mount will repair buffers too
+	 */
+
+	/*
+	 * get array to track all allocated blocks
+	 * we need this to handle errors and free blocks
+	 * upon them
+	 */
+	ablocks = kmalloc(sizeof(unsigned long) * depth, GFP_NOFS);
+	if (!ablocks)
+		return -ENOMEM;
+	memset(ablocks, 0, sizeof(unsigned long) * depth);
+
+	/* allocate all needed blocks */
+	ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
+	for (a = 0; a < depth - at; a++) {
+		newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
+		if (newblock == 0)
+			goto cleanup;
+		ablocks[a] = newblock;
+	}
+
+	/* initialize new leaf */
+	newblock = ablocks[--a];
+	BUG_ON(newblock == 0);
+	bh = sb_getblk(inode->i_sb, newblock);
+	if (!bh) {
+		err = -EIO;
+		goto cleanup;
+	}
+	lock_buffer(bh);
+
+	if ((err = ext4_journal_get_create_access(handle, bh)))
+		goto cleanup;
+
+	neh = ext_block_hdr(bh);
+	neh->eh_entries = 0;
+	neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode));
+	neh->eh_magic = EXT4_EXT_MAGIC;
+	neh->eh_depth = 0;
+	ex = EXT_FIRST_EXTENT(neh);
+
+	/* move remain of path[depth] to the new leaf */
+	BUG_ON(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max);
+	/* start copy from next extent */
+	/* TODO: we could do it by single memmove */
+	m = 0;
+	path[depth].p_ext++;
+	while (path[depth].p_ext <=
+			EXT_MAX_EXTENT(path[depth].p_hdr)) {
+		ext_debug("move %d:%d:%d in new leaf %lu\n",
+			        le32_to_cpu(path[depth].p_ext->ee_block),
+			        le32_to_cpu(path[depth].p_ext->ee_start),
+			        le16_to_cpu(path[depth].p_ext->ee_len),
+				newblock);
+		/*memmove(ex++, path[depth].p_ext++,
+				sizeof(struct ext4_extent));
+		neh->eh_entries++;*/
+		path[depth].p_ext++;
+		m++;
+	}
+	if (m) {
+		memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m);
+		neh->eh_entries = cpu_to_le16(le16_to_cpu(neh->eh_entries)+m);
+	}
+
+	set_buffer_uptodate(bh);
+	unlock_buffer(bh);
+
+	if ((err = ext4_journal_dirty_metadata(handle, bh)))
+		goto cleanup;
+	brelse(bh);
+	bh = NULL;
+
+	/* correct old leaf */
+	if (m) {
+		if ((err = ext4_ext_get_access(handle, inode, path + depth)))
+			goto cleanup;
+		path[depth].p_hdr->eh_entries =
+		     cpu_to_le16(le16_to_cpu(path[depth].p_hdr->eh_entries)-m);
+		if ((err = ext4_ext_dirty(handle, inode, path + depth)))
+			goto cleanup;
+
+	}
+
+	/* create intermediate indexes */
+	k = depth - at - 1;
+	BUG_ON(k < 0);
+	if (k)
+		ext_debug("create %d intermediate indices\n", k);
+	/* insert new index into current index block */
+	/* current depth stored in i var */
+	i = depth - 1;
+	while (k--) {
+		oldblock = newblock;
+		newblock = ablocks[--a];
+		bh = sb_getblk(inode->i_sb, newblock);
+		if (!bh) {
+			err = -EIO;
+			goto cleanup;
+		}
+		lock_buffer(bh);
+
+		if ((err = ext4_journal_get_create_access(handle, bh)))
+			goto cleanup;
+
+		neh = ext_block_hdr(bh);
+		neh->eh_entries = cpu_to_le16(1);
+		neh->eh_magic = EXT4_EXT_MAGIC;
+		neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode));
+		neh->eh_depth = cpu_to_le16(depth - i);
+		fidx = EXT_FIRST_INDEX(neh);
+		fidx->ei_block = border;
+		fidx->ei_leaf = cpu_to_le32(oldblock);
+
+		ext_debug("int.index at %d (block %lu): %lu -> %lu\n", i,
+				newblock, (unsigned long) le32_to_cpu(border),
+				oldblock);
+		/* copy indexes */
+		m = 0;
+		path[i].p_idx++;
+
+		ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx,
+				EXT_MAX_INDEX(path[i].p_hdr));
+		BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) !=
+				EXT_LAST_INDEX(path[i].p_hdr));
+		while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
+			ext_debug("%d: move %d:%d in new index %lu\n", i,
+				        le32_to_cpu(path[i].p_idx->ei_block),
+				        le32_to_cpu(path[i].p_idx->ei_leaf),
+				        newblock);
+			/*memmove(++fidx, path[i].p_idx++,
+					sizeof(struct ext4_extent_idx));
+			neh->eh_entries++;
+			BUG_ON(neh->eh_entries > neh->eh_max);*/
+			path[i].p_idx++;
+			m++;
+		}
+		if (m) {
+			memmove(++fidx, path[i].p_idx - m,
+				sizeof(struct ext4_extent_idx) * m);
+			neh->eh_entries =
+				cpu_to_le16(le16_to_cpu(neh->eh_entries) + m);
+		}
+		set_buffer_uptodate(bh);
+		unlock_buffer(bh);
+
+		if ((err = ext4_journal_dirty_metadata(handle, bh)))
+			goto cleanup;
+		brelse(bh);
+		bh = NULL;
+
+		/* correct old index */
+		if (m) {
+			err = ext4_ext_get_access(handle, inode, path + i);
+			if (err)
+				goto cleanup;
+			path[i].p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path[i].p_hdr->eh_entries)-m);
+			err = ext4_ext_dirty(handle, inode, path + i);
+			if (err)
+				goto cleanup;
+		}
+
+		i--;
+	}
+
+	/* insert new index */
+	if (err)
+		goto cleanup;
+
+	err = ext4_ext_insert_index(handle, inode, path + at,
+				    le32_to_cpu(border), newblock);
+
+cleanup:
+	if (bh) {
+		if (buffer_locked(bh))
+			unlock_buffer(bh);
+		brelse(bh);
+	}
+
+	if (err) {
+		/* free all allocated blocks in error case */
+		for (i = 0; i < depth; i++) {
+			if (!ablocks[i])
+				continue;
+			ext4_free_blocks(handle, inode, ablocks[i], 1);
+		}
+	}
+	kfree(ablocks);
+
+	return err;
+}
+
+/*
+ * routine implements tree growing procedure:
+ *  - allocates new block
+ *  - moves top-level data (index block or leaf) into the new block
+ *  - initialize new top-level, creating index that points to the
+ *    just created block
+ */
+static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
+					struct ext4_ext_path *path,
+					struct ext4_extent *newext)
+{
+	struct ext4_ext_path *curp = path;
+	struct ext4_extent_header *neh;
+	struct ext4_extent_idx *fidx;
+	struct buffer_head *bh;
+	unsigned long newblock;
+	int err = 0;
+
+	newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
+	if (newblock == 0)
+		return err;
+
+	bh = sb_getblk(inode->i_sb, newblock);
+	if (!bh) {
+		err = -EIO;
+		ext4_std_error(inode->i_sb, err);
+		return err;
+	}
+	lock_buffer(bh);
+
+	if ((err = ext4_journal_get_create_access(handle, bh))) {
+		unlock_buffer(bh);
+		goto out;
+	}
+
+	/* move top-level index/leaf into new block */
+	memmove(bh->b_data, curp->p_hdr, sizeof(EXT4_I(inode)->i_data));
+
+	/* set size of new block */
+	neh = ext_block_hdr(bh);
+	/* old root could have indexes or leaves
+	 * so calculate e_max right way */
+	if (ext_depth(inode))
+	  neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode));
+	else
+	  neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode));
+	neh->eh_magic = EXT4_EXT_MAGIC;
+	set_buffer_uptodate(bh);
+	unlock_buffer(bh);
+
+	if ((err = ext4_journal_dirty_metadata(handle, bh)))
+		goto out;
+
+	/* create index in new top-level index: num,max,pointer */
+	if ((err = ext4_ext_get_access(handle, inode, curp)))
+		goto out;
+
+	curp->p_hdr->eh_magic = EXT4_EXT_MAGIC;
+	curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode));
+	curp->p_hdr->eh_entries = cpu_to_le16(1);
+	curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
+	/* FIXME: it works, but actually path[0] can be index */
+	curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block;
+	curp->p_idx->ei_leaf = cpu_to_le32(newblock);
+
+	neh = ext_inode_hdr(inode);
+	fidx = EXT_FIRST_INDEX(neh);
+	ext_debug("new root: num %d(%d), lblock %d, ptr %d\n",
+		  le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
+		  le32_to_cpu(fidx->ei_block), le32_to_cpu(fidx->ei_leaf));
+
+	neh->eh_depth = cpu_to_le16(path->p_depth + 1);
+	err = ext4_ext_dirty(handle, inode, curp);
+out:
+	brelse(bh);
+
+	return err;
+}
+
+/*
+ * routine finds empty index and adds new leaf. if no free index found
+ * then it requests in-depth growing
+ */
+static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
+					struct ext4_ext_path *path,
+					struct ext4_extent *newext)
+{
+	struct ext4_ext_path *curp;
+	int depth, i, err = 0;
+
+repeat:
+	i = depth = ext_depth(inode);
+
+	/* walk up to the tree and look for free index entry */
+	curp = path + depth;
+	while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
+		i--;
+		curp--;
+	}
+
+	/* we use already allocated block for index block
+	 * so, subsequent data blocks should be contigoues */
+	if (EXT_HAS_FREE_INDEX(curp)) {
+		/* if we found index with free entry, then use that
+		 * entry: create all needed subtree and add new leaf */
+		err = ext4_ext_split(handle, inode, path, newext, i);
+
+		/* refill path */
+		ext4_ext_drop_refs(path);
+		path = ext4_ext_find_extent(inode,
+					    le32_to_cpu(newext->ee_block),
+					    path);
+		if (IS_ERR(path))
+			err = PTR_ERR(path);
+	} else {
+		/* tree is full, time to grow in depth */
+		err = ext4_ext_grow_indepth(handle, inode, path, newext);
+		if (err)
+			goto out;
+
+		/* refill path */
+		ext4_ext_drop_refs(path);
+		path = ext4_ext_find_extent(inode,
+					    le32_to_cpu(newext->ee_block),
+					    path);
+		if (IS_ERR(path)) {
+			err = PTR_ERR(path);
+			goto out;
+		}
+
+		/*
+		 * only first (depth 0 -> 1) produces free space
+		 * in all other cases we have to split growed tree
+		 */
+		depth = ext_depth(inode);
+		if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
+			/* now we need split */
+			goto repeat;
+		}
+	}
+
+out:
+	return err;
+}
+
+/*
+ * returns allocated block in subsequent extent or EXT_MAX_BLOCK
+ * NOTE: it consider block number from index entry as
+ * allocated block. thus, index entries have to be consistent
+ * with leafs
+ */
+static unsigned long
+ext4_ext_next_allocated_block(struct ext4_ext_path *path)
+{
+	int depth;
+
+	BUG_ON(path == NULL);
+	depth = path->p_depth;
+
+	if (depth == 0 && path->p_ext == NULL)
+		return EXT_MAX_BLOCK;
+
+	while (depth >= 0) {
+		if (depth == path->p_depth) {
+			/* leaf */
+			if (path[depth].p_ext !=
+					EXT_LAST_EXTENT(path[depth].p_hdr))
+			  return le32_to_cpu(path[depth].p_ext[1].ee_block);
+		} else {
+			/* index */
+			if (path[depth].p_idx !=
+					EXT_LAST_INDEX(path[depth].p_hdr))
+			  return le32_to_cpu(path[depth].p_idx[1].ei_block);
+		}
+		depth--;
+	}
+
+	return EXT_MAX_BLOCK;
+}
+
+/*
+ * returns first allocated block from next leaf or EXT_MAX_BLOCK
+ */
+static unsigned ext4_ext_next_leaf_block(struct inode *inode,
+                                               struct ext4_ext_path *path)
+{
+	int depth;
+
+	BUG_ON(path == NULL);
+	depth = path->p_depth;
+
+	/* zero-tree has no leaf blocks at all */
+	if (depth == 0)
+		return EXT_MAX_BLOCK;
+
+	/* go to index block */
+	depth--;
+
+	while (depth >= 0) {
+		if (path[depth].p_idx !=
+				EXT_LAST_INDEX(path[depth].p_hdr))
+		  return le32_to_cpu(path[depth].p_idx[1].ei_block);
+		depth--;
+	}
+
+	return EXT_MAX_BLOCK;
+}
+
+/*
+ * if leaf gets modified and modified extent is first in the leaf
+ * then we have to correct all indexes above
+ * TODO: do we need to correct tree in all cases?
+ */
+int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
+				struct ext4_ext_path *path)
+{
+	struct ext4_extent_header *eh;
+	int depth = ext_depth(inode);
+	struct ext4_extent *ex;
+	__le32 border;
+	int k, err = 0;
+
+	eh = path[depth].p_hdr;
+	ex = path[depth].p_ext;
+	BUG_ON(ex == NULL);
+	BUG_ON(eh == NULL);
+
+	if (depth == 0) {
+		/* there is no tree at all */
+		return 0;
+	}
+
+	if (ex != EXT_FIRST_EXTENT(eh)) {
+		/* we correct tree if first leaf got modified only */
+		return 0;
+	}
+
+	/*
+	 * TODO: we need correction if border is smaller then current one
+	 */
+	k = depth - 1;
+	border = path[depth].p_ext->ee_block;
+	if ((err = ext4_ext_get_access(handle, inode, path + k)))
+		return err;
+	path[k].p_idx->ei_block = border;
+	if ((err = ext4_ext_dirty(handle, inode, path + k)))
+		return err;
+
+	while (k--) {
+		/* change all left-side indexes */
+		if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
+			break;
+		if ((err = ext4_ext_get_access(handle, inode, path + k)))
+			break;
+		path[k].p_idx->ei_block = border;
+		if ((err = ext4_ext_dirty(handle, inode, path + k)))
+			break;
+	}
+
+	return err;
+}
+
+static int inline
+ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
+				struct ext4_extent *ex2)
+{
+	/* FIXME: 48bit support */
+        if (le32_to_cpu(ex1->ee_block) + le16_to_cpu(ex1->ee_len)
+	    != le32_to_cpu(ex2->ee_block))
+		return 0;
+
+#ifdef AGRESSIVE_TEST
+	if (le16_to_cpu(ex1->ee_len) >= 4)
+		return 0;
+#endif
+
+        if (le32_to_cpu(ex1->ee_start) + le16_to_cpu(ex1->ee_len)
+			== le32_to_cpu(ex2->ee_start))
+		return 1;
+	return 0;
+}
+
+/*
+ * this routine tries to merge requsted extent into the existing
+ * extent or inserts requested extent as new one into the tree,
+ * creating new leaf in no-space case
+ */
+int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
+				struct ext4_ext_path *path,
+				struct ext4_extent *newext)
+{
+	struct ext4_extent_header * eh;
+	struct ext4_extent *ex, *fex;
+	struct ext4_extent *nearex; /* nearest extent */
+	struct ext4_ext_path *npath = NULL;
+	int depth, len, err, next;
+
+	BUG_ON(newext->ee_len == 0);
+	depth = ext_depth(inode);
+	ex = path[depth].p_ext;
+	BUG_ON(path[depth].p_hdr == NULL);
+
+	/* try to insert block into found extent and return */
+	if (ex && ext4_can_extents_be_merged(inode, ex, newext)) {
+		ext_debug("append %d block to %d:%d (from %d)\n",
+				le16_to_cpu(newext->ee_len),
+				le32_to_cpu(ex->ee_block),
+				le16_to_cpu(ex->ee_len),
+				le32_to_cpu(ex->ee_start));
+		if ((err = ext4_ext_get_access(handle, inode, path + depth)))
+			return err;
+		ex->ee_len = cpu_to_le16(le16_to_cpu(ex->ee_len)
+					 + le16_to_cpu(newext->ee_len));
+		eh = path[depth].p_hdr;
+		nearex = ex;
+		goto merge;
+	}
+
+repeat:
+	depth = ext_depth(inode);
+	eh = path[depth].p_hdr;
+	if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max))
+		goto has_space;
+
+	/* probably next leaf has space for us? */
+	fex = EXT_LAST_EXTENT(eh);
+	next = ext4_ext_next_leaf_block(inode, path);
+	if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)
+	    && next != EXT_MAX_BLOCK) {
+		ext_debug("next leaf block - %d\n", next);
+		BUG_ON(npath != NULL);
+		npath = ext4_ext_find_extent(inode, next, NULL);
+		if (IS_ERR(npath))
+			return PTR_ERR(npath);
+		BUG_ON(npath->p_depth != path->p_depth);
+		eh = npath[depth].p_hdr;
+		if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
+			ext_debug("next leaf isnt full(%d)\n",
+				  le16_to_cpu(eh->eh_entries));
+			path = npath;
+			goto repeat;
+		}
+		ext_debug("next leaf has no free space(%d,%d)\n",
+			  le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
+	}
+
+	/*
+	 * there is no free space in found leaf
+	 * we're gonna add new leaf in the tree
+	 */
+	err = ext4_ext_create_new_leaf(handle, inode, path, newext);
+	if (err)
+		goto cleanup;
+	depth = ext_depth(inode);
+	eh = path[depth].p_hdr;
+
+has_space:
+	nearex = path[depth].p_ext;
+
+	if ((err = ext4_ext_get_access(handle, inode, path + depth)))
+		goto cleanup;
+
+	if (!nearex) {
+		/* there is no extent in this leaf, create first one */
+		ext_debug("first extent in the leaf: %d:%d:%d\n",
+			        le32_to_cpu(newext->ee_block),
+			        le32_to_cpu(newext->ee_start),
+			        le16_to_cpu(newext->ee_len));
+		path[depth].p_ext = EXT_FIRST_EXTENT(eh);
+	} else if (le32_to_cpu(newext->ee_block)
+		           > le32_to_cpu(nearex->ee_block)) {
+/*		BUG_ON(newext->ee_block == nearex->ee_block); */
+		if (nearex != EXT_LAST_EXTENT(eh)) {
+			len = EXT_MAX_EXTENT(eh) - nearex;
+			len = (len - 1) * sizeof(struct ext4_extent);
+			len = len < 0 ? 0 : len;
+			ext_debug("insert %d:%d:%d after: nearest 0x%p, "
+					"move %d from 0x%p to 0x%p\n",
+				        le32_to_cpu(newext->ee_block),
+				        le32_to_cpu(newext->ee_start),
+				        le16_to_cpu(newext->ee_len),
+					nearex, len, nearex + 1, nearex + 2);
+			memmove(nearex + 2, nearex + 1, len);
+		}
+		path[depth].p_ext = nearex + 1;
+	} else {
+		BUG_ON(newext->ee_block == nearex->ee_block);
+		len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent);
+		len = len < 0 ? 0 : len;
+		ext_debug("insert %d:%d:%d before: nearest 0x%p, "
+				"move %d from 0x%p to 0x%p\n",
+				le32_to_cpu(newext->ee_block),
+				le32_to_cpu(newext->ee_start),
+				le16_to_cpu(newext->ee_len),
+				nearex, len, nearex + 1, nearex + 2);
+		memmove(nearex + 1, nearex, len);
+		path[depth].p_ext = nearex;
+	}
+
+	eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1);
+	nearex = path[depth].p_ext;
+	nearex->ee_block = newext->ee_block;
+	nearex->ee_start = newext->ee_start;
+	nearex->ee_len = newext->ee_len;
+	/* FIXME: support for large fs */
+	nearex->ee_start_hi = 0;
+
+merge:
+	/* try to merge extents to the right */
+	while (nearex < EXT_LAST_EXTENT(eh)) {
+		if (!ext4_can_extents_be_merged(inode, nearex, nearex + 1))
+			break;
+		/* merge with next extent! */
+		nearex->ee_len = cpu_to_le16(le16_to_cpu(nearex->ee_len)
+					     + le16_to_cpu(nearex[1].ee_len));
+		if (nearex + 1 < EXT_LAST_EXTENT(eh)) {
+			len = (EXT_LAST_EXTENT(eh) - nearex - 1)
+					* sizeof(struct ext4_extent);
+			memmove(nearex + 1, nearex + 2, len);
+		}
+		eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1);
+		BUG_ON(eh->eh_entries == 0);
+	}
+
+	/* try to merge extents to the left */
+
+	/* time to correct all indexes above */
+	err = ext4_ext_correct_indexes(handle, inode, path);
+	if (err)
+		goto cleanup;
+
+	err = ext4_ext_dirty(handle, inode, path + depth);
+
+cleanup:
+	if (npath) {
+		ext4_ext_drop_refs(npath);
+		kfree(npath);
+	}
+	ext4_ext_tree_changed(inode);
+	ext4_ext_invalidate_cache(inode);
+	return err;
+}
+
+int ext4_ext_walk_space(struct inode *inode, unsigned long block,
+			unsigned long num, ext_prepare_callback func,
+			void *cbdata)
+{
+	struct ext4_ext_path *path = NULL;
+	struct ext4_ext_cache cbex;
+	struct ext4_extent *ex;
+	unsigned long next, start = 0, end = 0;
+	unsigned long last = block + num;
+	int depth, exists, err = 0;
+
+	BUG_ON(func == NULL);
+	BUG_ON(inode == NULL);
+
+	while (block < last && block != EXT_MAX_BLOCK) {
+		num = last - block;
+		/* find extent for this block */
+		path = ext4_ext_find_extent(inode, block, path);
+		if (IS_ERR(path)) {
+			err = PTR_ERR(path);
+			path = NULL;
+			break;
+		}
+
+		depth = ext_depth(inode);
+		BUG_ON(path[depth].p_hdr == NULL);
+		ex = path[depth].p_ext;
+		next = ext4_ext_next_allocated_block(path);
+
+		exists = 0;
+		if (!ex) {
+			/* there is no extent yet, so try to allocate
+			 * all requested space */
+			start = block;
+			end = block + num;
+		} else if (le32_to_cpu(ex->ee_block) > block) {
+			/* need to allocate space before found extent */
+			start = block;
+			end = le32_to_cpu(ex->ee_block);
+			if (block + num < end)
+				end = block + num;
+		} else if (block >=
+			     le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len)) {
+			/* need to allocate space after found extent */
+			start = block;
+			end = block + num;
+			if (end >= next)
+				end = next;
+		} else if (block >= le32_to_cpu(ex->ee_block)) {
+			/*
+			 * some part of requested space is covered
+			 * by found extent
+			 */
+			start = block;
+			end = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len);
+			if (block + num < end)
+				end = block + num;
+			exists = 1;
+		} else {
+			BUG();
+		}
+		BUG_ON(end <= start);
+
+		if (!exists) {
+			cbex.ec_block = start;
+			cbex.ec_len = end - start;
+			cbex.ec_start = 0;
+			cbex.ec_type = EXT4_EXT_CACHE_GAP;
+		} else {
+		        cbex.ec_block = le32_to_cpu(ex->ee_block);
+		        cbex.ec_len = le16_to_cpu(ex->ee_len);
+		        cbex.ec_start = le32_to_cpu(ex->ee_start);
+			cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
+		}
+
+		BUG_ON(cbex.ec_len == 0);
+		err = func(inode, path, &cbex, cbdata);
+		ext4_ext_drop_refs(path);
+
+		if (err < 0)
+			break;
+		if (err == EXT_REPEAT)
+			continue;
+		else if (err == EXT_BREAK) {
+			err = 0;
+			break;
+		}
+
+		if (ext_depth(inode) != depth) {
+			/* depth was changed. we have to realloc path */
+			kfree(path);
+			path = NULL;
+		}
+
+		block = cbex.ec_block + cbex.ec_len;
+	}
+
+	if (path) {
+		ext4_ext_drop_refs(path);
+		kfree(path);
+	}
+
+	return err;
+}
+
+static inline void
+ext4_ext_put_in_cache(struct inode *inode, __u32 block,
+			__u32 len, __u32 start, int type)
+{
+	struct ext4_ext_cache *cex;
+	BUG_ON(len == 0);
+	cex = &EXT4_I(inode)->i_cached_extent;
+	cex->ec_type = type;
+	cex->ec_block = block;
+	cex->ec_len = len;
+	cex->ec_start = start;
+}
+
+/*
+ * this routine calculate boundaries of the gap requested block fits into
+ * and cache this gap
+ */
+static inline void
+ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
+				unsigned long block)
+{
+	int depth = ext_depth(inode);
+	unsigned long lblock, len;
+	struct ext4_extent *ex;
+
+	ex = path[depth].p_ext;
+	if (ex == NULL) {
+		/* there is no extent yet, so gap is [0;-] */
+		lblock = 0;
+		len = EXT_MAX_BLOCK;
+		ext_debug("cache gap(whole file):");
+	} else if (block < le32_to_cpu(ex->ee_block)) {
+		lblock = block;
+		len = le32_to_cpu(ex->ee_block) - block;
+		ext_debug("cache gap(before): %lu [%lu:%lu]",
+				(unsigned long) block,
+			        (unsigned long) le32_to_cpu(ex->ee_block),
+			        (unsigned long) le16_to_cpu(ex->ee_len));
+	} else if (block >= le32_to_cpu(ex->ee_block)
+		            + le16_to_cpu(ex->ee_len)) {
+	        lblock = le32_to_cpu(ex->ee_block)
+		         + le16_to_cpu(ex->ee_len);
+		len = ext4_ext_next_allocated_block(path);
+		ext_debug("cache gap(after): [%lu:%lu] %lu",
+			        (unsigned long) le32_to_cpu(ex->ee_block),
+			        (unsigned long) le16_to_cpu(ex->ee_len),
+				(unsigned long) block);
+		BUG_ON(len == lblock);
+		len = len - lblock;
+	} else {
+		lblock = len = 0;
+		BUG();
+	}
+
+	ext_debug(" -> %lu:%lu\n", (unsigned long) lblock, len);
+	ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP);
+}
+
+static inline int
+ext4_ext_in_cache(struct inode *inode, unsigned long block,
+			struct ext4_extent *ex)
+{
+	struct ext4_ext_cache *cex;
+
+	cex = &EXT4_I(inode)->i_cached_extent;
+
+	/* has cache valid data? */
+	if (cex->ec_type == EXT4_EXT_CACHE_NO)
+		return EXT4_EXT_CACHE_NO;
+
+	BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
+			cex->ec_type != EXT4_EXT_CACHE_EXTENT);
+	if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) {
+	        ex->ee_block = cpu_to_le32(cex->ec_block);
+	        ex->ee_start = cpu_to_le32(cex->ec_start);
+	        ex->ee_len = cpu_to_le16(cex->ec_len);
+		ext_debug("%lu cached by %lu:%lu:%lu\n",
+				(unsigned long) block,
+				(unsigned long) cex->ec_block,
+				(unsigned long) cex->ec_len,
+				(unsigned long) cex->ec_start);
+		return cex->ec_type;
+	}
+
+	/* not in cache */
+	return EXT4_EXT_CACHE_NO;
+}
+
+/*
+ * routine removes index from the index block
+ * it's used in truncate case only. thus all requests are for
+ * last index in the block only
+ */
+int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
+			struct ext4_ext_path *path)
+{
+	struct buffer_head *bh;
+	int err;
+	unsigned long leaf;
+
+	/* free index block */
+	path--;
+	leaf = le32_to_cpu(path->p_idx->ei_leaf);
+	BUG_ON(path->p_hdr->eh_entries == 0);
+	if ((err = ext4_ext_get_access(handle, inode, path)))
+		return err;
+	path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1);
+	if ((err = ext4_ext_dirty(handle, inode, path)))
+		return err;
+	ext_debug("index is empty, remove it, free block %lu\n", leaf);
+	bh = sb_find_get_block(inode->i_sb, leaf);
+	ext4_forget(handle, 1, inode, bh, leaf);
+	ext4_free_blocks(handle, inode, leaf, 1);
+	return err;
+}
+
+/*
+ * This routine returns max. credits extent tree can consume.
+ * It should be OK for low-performance paths like ->writepage()
+ * To allow many writing process to fit a single transaction,
+ * caller should calculate credits under truncate_mutex and
+ * pass actual path.
+ */
+int inline ext4_ext_calc_credits_for_insert(struct inode *inode,
+						struct ext4_ext_path *path)
+{
+	int depth, needed;
+
+	if (path) {
+		/* probably there is space in leaf? */
+		depth = ext_depth(inode);
+		if (le16_to_cpu(path[depth].p_hdr->eh_entries)
+				< le16_to_cpu(path[depth].p_hdr->eh_max))
+			return 1;
+	}
+
+	/*
+	 * given 32bit logical block (4294967296 blocks), max. tree
+	 * can be 4 levels in depth -- 4 * 340^4 == 53453440000.
+	 * let's also add one more level for imbalance.
+	 */
+	depth = 5;
+
+	/* allocation of new data block(s) */
+	needed = 2;
+
+	/*
+	 * tree can be full, so it'd need to grow in depth:
+	 * allocation + old root + new root
+	 */
+	needed += 2 + 1 + 1;
+
+	/*
+	 * Index split can happen, we'd need:
+	 *    allocate intermediate indexes (bitmap + group)
+	 *  + change two blocks at each level, but root (already included)
+	 */
+	needed = (depth * 2) + (depth * 2);
+
+	/* any allocation modifies superblock */
+	needed += 1;
+
+	return needed;
+}
+
+static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
+				struct ext4_extent *ex,
+				unsigned long from, unsigned long to)
+{
+	struct buffer_head *bh;
+	int i;
+
+#ifdef EXTENTS_STATS
+	{
+		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+		unsigned short ee_len =  le16_to_cpu(ex->ee_len);
+		spin_lock(&sbi->s_ext_stats_lock);
+		sbi->s_ext_blocks += ee_len;
+		sbi->s_ext_extents++;
+		if (ee_len < sbi->s_ext_min)
+			sbi->s_ext_min = ee_len;
+		if (ee_len > sbi->s_ext_max)
+			sbi->s_ext_max = ee_len;
+		if (ext_depth(inode) > sbi->s_depth_max)
+			sbi->s_depth_max = ext_depth(inode);
+		spin_unlock(&sbi->s_ext_stats_lock);
+	}
+#endif
+	if (from >= le32_to_cpu(ex->ee_block)
+	    && to == le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) {
+		/* tail removal */
+		unsigned long num, start;
+		num = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - from;
+		start = le32_to_cpu(ex->ee_start) + le16_to_cpu(ex->ee_len) - num;
+		ext_debug("free last %lu blocks starting %lu\n", num, start);
+		for (i = 0; i < num; i++) {
+			bh = sb_find_get_block(inode->i_sb, start + i);
+			ext4_forget(handle, 0, inode, bh, start + i);
+		}
+		ext4_free_blocks(handle, inode, start, num);
+	} else if (from == le32_to_cpu(ex->ee_block)
+		   && to <= le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) {
+		printk("strange request: removal %lu-%lu from %u:%u\n",
+		       from, to, le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len));
+	} else {
+		printk("strange request: removal(2) %lu-%lu from %u:%u\n",
+		       from, to, le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len));
+	}
+	return 0;
+}
+
+static int
+ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
+		struct ext4_ext_path *path, unsigned long start)
+{
+	int err = 0, correct_index = 0;
+	int depth = ext_depth(inode), credits;
+	struct ext4_extent_header *eh;
+	unsigned a, b, block, num;
+	unsigned long ex_ee_block;
+	unsigned short ex_ee_len;
+	struct ext4_extent *ex;
+
+	ext_debug("truncate since %lu in leaf\n", start);
+	if (!path[depth].p_hdr)
+		path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
+	eh = path[depth].p_hdr;
+	BUG_ON(eh == NULL);
+	BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max));
+	BUG_ON(eh->eh_magic != EXT4_EXT_MAGIC);
+
+	/* find where to start removing */
+	ex = EXT_LAST_EXTENT(eh);
+
+	ex_ee_block = le32_to_cpu(ex->ee_block);
+	ex_ee_len = le16_to_cpu(ex->ee_len);
+
+	while (ex >= EXT_FIRST_EXTENT(eh) &&
+			ex_ee_block + ex_ee_len > start) {
+		ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len);
+		path[depth].p_ext = ex;
+
+		a = ex_ee_block > start ? ex_ee_block : start;
+		b = ex_ee_block + ex_ee_len - 1 < EXT_MAX_BLOCK ?
+			ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCK;
+
+		ext_debug("  border %u:%u\n", a, b);
+
+		if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) {
+			block = 0;
+			num = 0;
+			BUG();
+		} else if (a != ex_ee_block) {
+			/* remove tail of the extent */
+			block = ex_ee_block;
+			num = a - block;
+		} else if (b != ex_ee_block + ex_ee_len - 1) {
+			/* remove head of the extent */
+			block = a;
+			num = b - a;
+			/* there is no "make a hole" API yet */
+			BUG();
+		} else {
+			/* remove whole extent: excellent! */
+			block = ex_ee_block;
+			num = 0;
+			BUG_ON(a != ex_ee_block);
+			BUG_ON(b != ex_ee_block + ex_ee_len - 1);
+		}
+
+		/* at present, extent can't cross block group */
+		/* leaf + bitmap + group desc + sb + inode */
+		credits = 5;
+		if (ex == EXT_FIRST_EXTENT(eh)) {
+			correct_index = 1;
+			credits += (ext_depth(inode)) + 1;
+		}
+#ifdef CONFIG_QUOTA
+		credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
+#endif
+
+		handle = ext4_ext_journal_restart(handle, credits);
+		if (IS_ERR(handle)) {
+			err = PTR_ERR(handle);
+			goto out;
+		}
+
+		err = ext4_ext_get_access(handle, inode, path + depth);
+		if (err)
+			goto out;
+
+		err = ext4_remove_blocks(handle, inode, ex, a, b);
+		if (err)
+			goto out;
+
+		if (num == 0) {
+			/* this extent is removed entirely mark slot unused */
+			ex->ee_start = 0;
+			eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1);
+		}
+
+		ex->ee_block = cpu_to_le32(block);
+		ex->ee_len = cpu_to_le16(num);
+
+		err = ext4_ext_dirty(handle, inode, path + depth);
+		if (err)
+			goto out;
+
+		ext_debug("new extent: %u:%u:%u\n", block, num,
+				le32_to_cpu(ex->ee_start));
+		ex--;
+		ex_ee_block = le32_to_cpu(ex->ee_block);
+		ex_ee_len = le16_to_cpu(ex->ee_len);
+	}
+
+	if (correct_index && eh->eh_entries)
+		err = ext4_ext_correct_indexes(handle, inode, path);
+
+	/* if this leaf is free, then we should
+	 * remove it from index block above */
+	if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
+		err = ext4_ext_rm_idx(handle, inode, path + depth);
+
+out:
+	return err;
+}
+
+/*
+ * returns 1 if current index have to be freed (even partial)
+ */
+static int inline
+ext4_ext_more_to_rm(struct ext4_ext_path *path)
+{
+	BUG_ON(path->p_idx == NULL);
+
+	if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr))
+		return 0;
+
+	/*
+	 * if truncate on deeper level happened it it wasn't partial
+	 * so we have to consider current index for truncation
+	 */
+	if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block)
+		return 0;
+	return 1;
+}
+
+int ext4_ext_remove_space(struct inode *inode, unsigned long start)
+{
+	struct super_block *sb = inode->i_sb;
+	int depth = ext_depth(inode);
+	struct ext4_ext_path *path;
+	handle_t *handle;
+	int i = 0, err = 0;
+
+	ext_debug("truncate since %lu\n", start);
+
+	/* probably first extent we're gonna free will be last in block */
+	handle = ext4_journal_start(inode, depth + 1);
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	ext4_ext_invalidate_cache(inode);
+
+	/*
+	 * we start scanning from right side freeing all the blocks
+	 * after i_size and walking into the deep
+	 */
+	path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL);
+	if (path == NULL) {
+		ext4_journal_stop(handle);
+		return -ENOMEM;
+	}
+	memset(path, 0, sizeof(struct ext4_ext_path) * (depth + 1));
+	path[0].p_hdr = ext_inode_hdr(inode);
+	if (ext4_ext_check_header(__FUNCTION__, inode, path[0].p_hdr)) {
+		err = -EIO;
+		goto out;
+	}
+	path[0].p_depth = depth;
+
+	while (i >= 0 && err == 0) {
+		if (i == depth) {
+			/* this is leaf block */
+			err = ext4_ext_rm_leaf(handle, inode, path, start);
+			/* root level have p_bh == NULL, brelse() eats this */
+			brelse(path[i].p_bh);
+			path[i].p_bh = NULL;
+			i--;
+			continue;
+		}
+
+		/* this is index block */
+		if (!path[i].p_hdr) {
+			ext_debug("initialize header\n");
+			path[i].p_hdr = ext_block_hdr(path[i].p_bh);
+			if (ext4_ext_check_header(__FUNCTION__, inode,
+							path[i].p_hdr)) {
+				err = -EIO;
+				goto out;
+			}
+		}
+
+		BUG_ON(le16_to_cpu(path[i].p_hdr->eh_entries)
+			   > le16_to_cpu(path[i].p_hdr->eh_max));
+		BUG_ON(path[i].p_hdr->eh_magic != EXT4_EXT_MAGIC);
+
+		if (!path[i].p_idx) {
+			/* this level hasn't touched yet */
+			path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
+			path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1;
+			ext_debug("init index ptr: hdr 0x%p, num %d\n",
+				  path[i].p_hdr,
+				  le16_to_cpu(path[i].p_hdr->eh_entries));
+		} else {
+			/* we've already was here, see at next index */
+			path[i].p_idx--;
+		}
+
+		ext_debug("level %d - index, first 0x%p, cur 0x%p\n",
+				i, EXT_FIRST_INDEX(path[i].p_hdr),
+				path[i].p_idx);
+		if (ext4_ext_more_to_rm(path + i)) {
+			/* go to the next level */
+			ext_debug("move to level %d (block %d)\n",
+				  i + 1, le32_to_cpu(path[i].p_idx->ei_leaf));
+			memset(path + i + 1, 0, sizeof(*path));
+			path[i+1].p_bh =
+				sb_bread(sb, le32_to_cpu(path[i].p_idx->ei_leaf));
+			if (!path[i+1].p_bh) {
+				/* should we reset i_size? */
+				err = -EIO;
+				break;
+			}
+
+			/* put actual number of indexes to know is this
+			 * number got changed at the next iteration */
+			path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries);
+			i++;
+		} else {
+			/* we finish processing this index, go up */
+			if (path[i].p_hdr->eh_entries == 0 && i > 0) {
+				/* index is empty, remove it
+				 * handle must be already prepared by the
+				 * truncatei_leaf() */
+				err = ext4_ext_rm_idx(handle, inode, path + i);
+			}
+			/* root level have p_bh == NULL, brelse() eats this */
+			brelse(path[i].p_bh);
+			path[i].p_bh = NULL;
+			i--;
+			ext_debug("return to level %d\n", i);
+		}
+	}
+
+	/* TODO: flexible tree reduction should be here */
+	if (path->p_hdr->eh_entries == 0) {
+		/*
+		 * truncate to zero freed all the tree
+		 * so, we need to correct eh_depth
+		 */
+		err = ext4_ext_get_access(handle, inode, path);
+		if (err == 0) {
+			ext_inode_hdr(inode)->eh_depth = 0;
+			ext_inode_hdr(inode)->eh_max =
+				cpu_to_le16(ext4_ext_space_root(inode));
+			err = ext4_ext_dirty(handle, inode, path);
+		}
+	}
+out:
+	ext4_ext_tree_changed(inode);
+	ext4_ext_drop_refs(path);
+	kfree(path);
+	ext4_journal_stop(handle);
+
+	return err;
+}
+
+/*
+ * called at mount time
+ */
+void ext4_ext_init(struct super_block *sb)
+{
+	/*
+	 * possible initialization would be here
+	 */
+
+	if (test_opt(sb, EXTENTS)) {
+		printk("EXT4-fs: file extents enabled");
+#ifdef AGRESSIVE_TEST
+		printk(", agressive tests");
+#endif
+#ifdef CHECK_BINSEARCH
+		printk(", check binsearch");
+#endif
+#ifdef EXTENTS_STATS
+		printk(", stats");
+#endif
+		printk("\n");
+#ifdef EXTENTS_STATS
+		spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock);
+		EXT4_SB(sb)->s_ext_min = 1 << 30;
+		EXT4_SB(sb)->s_ext_max = 0;
+#endif
+	}
+}
+
+/*
+ * called at umount time
+ */
+void ext4_ext_release(struct super_block *sb)
+{
+	if (!test_opt(sb, EXTENTS))
+		return;
+
+#ifdef EXTENTS_STATS
+	if (EXT4_SB(sb)->s_ext_blocks && EXT4_SB(sb)->s_ext_extents) {
+		struct ext4_sb_info *sbi = EXT4_SB(sb);
+		printk(KERN_ERR "EXT4-fs: %lu blocks in %lu extents (%lu ave)\n",
+			sbi->s_ext_blocks, sbi->s_ext_extents,
+			sbi->s_ext_blocks / sbi->s_ext_extents);
+		printk(KERN_ERR "EXT4-fs: extents: %lu min, %lu max, max depth %lu\n",
+			sbi->s_ext_min, sbi->s_ext_max, sbi->s_depth_max);
+	}
+#endif
+}
+
+int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, sector_t iblock,
+			unsigned long max_blocks, struct buffer_head *bh_result,
+			int create, int extend_disksize)
+{
+	struct ext4_ext_path *path = NULL;
+	struct ext4_extent newex, *ex;
+	int goal, newblock, err = 0, depth;
+	unsigned long allocated = 0;
+
+	__clear_bit(BH_New, &bh_result->b_state);
+	ext_debug("blocks %d/%lu requested for inode %u\n", (int) iblock,
+			max_blocks, (unsigned) inode->i_ino);
+	mutex_lock(&EXT4_I(inode)->truncate_mutex);
+
+	/* check in cache */
+	if ((goal = ext4_ext_in_cache(inode, iblock, &newex))) {
+		if (goal == EXT4_EXT_CACHE_GAP) {
+			if (!create) {
+				/* block isn't allocated yet and
+				 * user don't want to allocate it */
+				goto out2;
+			}
+			/* we should allocate requested block */
+		} else if (goal == EXT4_EXT_CACHE_EXTENT) {
+			/* block is already allocated */
+		        newblock = iblock
+		                   - le32_to_cpu(newex.ee_block)
+			           + le32_to_cpu(newex.ee_start);
+			/* number of remain blocks in the extent */
+			allocated = le16_to_cpu(newex.ee_len) -
+					(iblock - le32_to_cpu(newex.ee_block));
+			goto out;
+		} else {
+			BUG();
+		}
+	}
+
+	/* find extent for this block */
+	path = ext4_ext_find_extent(inode, iblock, NULL);
+	if (IS_ERR(path)) {
+		err = PTR_ERR(path);
+		path = NULL;
+		goto out2;
+	}
+
+	depth = ext_depth(inode);
+
+	/*
+	 * consistent leaf must not be empty
+	 * this situations is possible, though, _during_ tree modification
+	 * this is why assert can't be put in ext4_ext_find_extent()
+	 */
+	BUG_ON(path[depth].p_ext == NULL && depth != 0);
+
+	if ((ex = path[depth].p_ext)) {
+	        unsigned long ee_block = le32_to_cpu(ex->ee_block);
+		unsigned long ee_start = le32_to_cpu(ex->ee_start);
+		unsigned short ee_len  = le16_to_cpu(ex->ee_len);
+		/* if found exent covers block, simple return it */
+	        if (iblock >= ee_block && iblock < ee_block + ee_len) {
+			newblock = iblock - ee_block + ee_start;
+			/* number of remain blocks in the extent */
+			allocated = ee_len - (iblock - ee_block);
+			ext_debug("%d fit into %lu:%d -> %d\n", (int) iblock,
+					ee_block, ee_len, newblock);
+			ext4_ext_put_in_cache(inode, ee_block, ee_len,
+						ee_start, EXT4_EXT_CACHE_EXTENT);
+			goto out;
+		}
+	}
+
+	/*
+	 * requested block isn't allocated yet
+	 * we couldn't try to create block if create flag is zero
+	 */
+	if (!create) {
+		/* put just found gap into cache to speedup subsequest reqs */
+		ext4_ext_put_gap_in_cache(inode, path, iblock);
+		goto out2;
+	}
+	/*
+         * Okay, we need to do block allocation.  Lazily initialize the block
+         * allocation info here if necessary
+        */
+	if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info))
+		ext4_init_block_alloc_info(inode);
+
+	/* allocate new block */
+	goal = ext4_ext_find_goal(inode, path, iblock);
+	allocated = max_blocks;
+	newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err);
+	if (!newblock)
+		goto out2;
+	ext_debug("allocate new block: goal %d, found %d/%lu\n",
+			goal, newblock, allocated);
+
+	/* try to insert new extent into found leaf and return */
+	newex.ee_block = cpu_to_le32(iblock);
+	newex.ee_start = cpu_to_le32(newblock);
+	newex.ee_len = cpu_to_le16(allocated);
+	err = ext4_ext_insert_extent(handle, inode, path, &newex);
+	if (err)
+		goto out2;
+
+	if (extend_disksize && inode->i_size > EXT4_I(inode)->i_disksize)
+		EXT4_I(inode)->i_disksize = inode->i_size;
+
+	/* previous routine could use block we allocated */
+	newblock = le32_to_cpu(newex.ee_start);
+	__set_bit(BH_New, &bh_result->b_state);
+
+	ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
+				EXT4_EXT_CACHE_EXTENT);
+out:
+	if (allocated > max_blocks)
+		allocated = max_blocks;
+	ext4_ext_show_leaf(inode, path);
+	__set_bit(BH_Mapped, &bh_result->b_state);
+	bh_result->b_bdev = inode->i_sb->s_bdev;
+	bh_result->b_blocknr = newblock;
+out2:
+	if (path) {
+		ext4_ext_drop_refs(path);
+		kfree(path);
+	}
+	mutex_unlock(&EXT4_I(inode)->truncate_mutex);
+
+	return err ? err : allocated;
+}
+
+void ext4_ext_truncate(struct inode * inode, struct page *page)
+{
+	struct address_space *mapping = inode->i_mapping;
+	struct super_block *sb = inode->i_sb;
+	unsigned long last_block;
+	handle_t *handle;
+	int err = 0;
+
+	/*
+	 * probably first extent we're gonna free will be last in block
+	 */
+	err = ext4_writepage_trans_blocks(inode) + 3;
+	handle = ext4_journal_start(inode, err);
+	if (IS_ERR(handle)) {
+		if (page) {
+			clear_highpage(page);
+			flush_dcache_page(page);
+			unlock_page(page);
+			page_cache_release(page);
+		}
+		return;
+	}
+
+	if (page)
+		ext4_block_truncate_page(handle, page, mapping, inode->i_size);
+
+	mutex_lock(&EXT4_I(inode)->truncate_mutex);
+	ext4_ext_invalidate_cache(inode);
+
+	/*
+	 * TODO: optimization is possible here
+	 * probably we need not scaning at all,
+	 * because page truncation is enough
+	 */
+	if (ext4_orphan_add(handle, inode))
+		goto out_stop;
+
+	/* we have to know where to truncate from in crash case */
+	EXT4_I(inode)->i_disksize = inode->i_size;
+	ext4_mark_inode_dirty(handle, inode);
+
+	last_block = (inode->i_size + sb->s_blocksize - 1)
+			>> EXT4_BLOCK_SIZE_BITS(sb);
+	err = ext4_ext_remove_space(inode, last_block);
+
+	/* In a multi-transaction truncate, we only make the final
+	 * transaction synchronous */
+	if (IS_SYNC(inode))
+		handle->h_sync = 1;
+
+out_stop:
+	/*
+	 * If this was a simple ftruncate(), and the file will remain alive
+	 * then we need to clear up the orphan record which we created above.
+	 * However, if this was a real unlink then we were called by
+	 * ext4_delete_inode(), and we allow that function to clean up the
+	 * orphan info for us.
+	 */
+	if (inode->i_nlink)
+		ext4_orphan_del(handle, inode);
+
+	mutex_unlock(&EXT4_I(inode)->truncate_mutex);
+	ext4_journal_stop(handle);
+}
+
+/*
+ * this routine calculate max number of blocks we could modify
+ * in order to allocate new block for an inode
+ */
+int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
+{
+	int needed;
+
+	needed = ext4_ext_calc_credits_for_insert(inode, NULL);
+
+	/* caller want to allocate num blocks, but note it includes sb */
+	needed = needed * num - (num - 1);
+
+#ifdef CONFIG_QUOTA
+	needed += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
+#endif
+
+	return needed;
+}
+
+EXPORT_SYMBOL(ext4_mark_inode_dirty);
+EXPORT_SYMBOL(ext4_ext_invalidate_cache);
+EXPORT_SYMBOL(ext4_ext_insert_extent);
+EXPORT_SYMBOL(ext4_ext_walk_space);
+EXPORT_SYMBOL(ext4_ext_find_goal);
+EXPORT_SYMBOL(ext4_ext_calc_credits_for_insert);
+
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 34d39ae966f7..e17a6c918d72 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -615,6 +615,17 @@ got:
 		ext4_std_error(sb, err);
 		goto fail_free_drop;
 	}
+	if (test_opt(sb, EXTENTS)) {
+		EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
+		ext4_ext_tree_init(handle, inode);
+		if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
+			err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
+			if (err) goto fail;
+			EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS);
+			BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "call ext4_journal_dirty_metadata");
+			err = ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
+		}
+	}
 
 	ext4_debug("allocating inode %lu\n", inode->i_ino);
 	goto really_out;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0d96c7d3bb5b..2b81b1324a6f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -40,8 +40,6 @@
 #include "xattr.h"
 #include "acl.h"
 
-static int ext4_writepage_trans_blocks(struct inode *inode);
-
 /*
  * Test whether an inode is a fast symlink.
  */
@@ -804,6 +802,7 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
 	ext4_fsblk_t first_block = 0;
 
 
+	J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
 	J_ASSERT(handle != NULL || create == 0);
 	depth = ext4_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
 
@@ -984,7 +983,7 @@ static int ext4_get_block(struct inode *inode, sector_t iblock,
 
 get_block:
 	if (ret == 0) {
-		ret = ext4_get_blocks_handle(handle, inode, iblock,
+		ret = ext4_get_blocks_wrap(handle, inode, iblock,
 					max_blocks, bh_result, create, 0);
 		if (ret > 0) {
 			bh_result->b_size = (ret << inode->i_blkbits);
@@ -1008,7 +1007,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
 	dummy.b_state = 0;
 	dummy.b_blocknr = -1000;
 	buffer_trace_init(&dummy.b_history);
-	err = ext4_get_blocks_handle(handle, inode, block, 1,
+	err = ext4_get_blocks_wrap(handle, inode, block, 1,
 					&dummy, create, 1);
 	/*
 	 * ext4_get_blocks_handle() returns number of blocks
@@ -1759,7 +1758,7 @@ void ext4_set_aops(struct inode *inode)
  * This required during truncate. We need to physically zero the tail end
  * of that block so it doesn't yield old data if the file is later grown.
  */
-static int ext4_block_truncate_page(handle_t *handle, struct page *page,
+int ext4_block_truncate_page(handle_t *handle, struct page *page,
 		struct address_space *mapping, loff_t from)
 {
 	ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
@@ -2263,6 +2262,9 @@ void ext4_truncate(struct inode *inode)
 			return;
 	}
 
+	if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+		return ext4_ext_truncate(inode, page);
+
 	handle = start_transaction(inode);
 	if (IS_ERR(handle)) {
 		if (page) {
@@ -3003,12 +3005,15 @@ err_out:
  * block and work out the exact number of indirects which are touched.  Pah.
  */
 
-static int ext4_writepage_trans_blocks(struct inode *inode)
+int ext4_writepage_trans_blocks(struct inode *inode)
 {
 	int bpp = ext4_journal_blocks_per_page(inode);
 	int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3;
 	int ret;
 
+	if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+		return ext4_ext_writepage_trans_blocks(inode, bpp);
+
 	if (ext4_should_journal_data(inode))
 		ret = 3 * (bpp + indirects) + 2;
 	else
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index a63dce2117b8..22a737c306c7 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -248,7 +248,6 @@ flags_err:
 		return err;
 	}
 
-
 	default:
 		return -ENOTTY;
 	}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f131bb69b62e..69f875250500 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -390,6 +390,7 @@ static void ext4_put_super (struct super_block * sb)
 	struct ext4_super_block *es = sbi->s_es;
 	int i;
 
+	ext4_ext_release(sb);
 	ext4_xattr_put_super(sb);
 	jbd2_journal_destroy(sbi->s_journal);
 	if (!(sb->s_flags & MS_RDONLY)) {
@@ -454,6 +455,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 #endif
 	ei->i_block_alloc_info = NULL;
 	ei->vfs_inode.i_version = 1;
+	memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
 	return &ei->vfs_inode;
 }
 
@@ -677,7 +679,7 @@ enum {
 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
 	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
 	Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
-	Opt_grpquota
+	Opt_grpquota, Opt_extents,
 };
 
 static match_table_t tokens = {
@@ -727,6 +729,7 @@ static match_table_t tokens = {
 	{Opt_quota, "quota"},
 	{Opt_usrquota, "usrquota"},
 	{Opt_barrier, "barrier=%u"},
+	{Opt_extents, "extents"},
 	{Opt_err, NULL},
 	{Opt_resize, "resize"},
 };
@@ -1059,6 +1062,9 @@ clear_qf_name:
 		case Opt_bh:
 			clear_opt(sbi->s_mount_opt, NOBH);
 			break;
+		case Opt_extents:
+			set_opt (sbi->s_mount_opt, EXTENTS);
+			break;
 		default:
 			printk (KERN_ERR
 				"EXT4-fs: Unrecognized mount option \"%s\" "
@@ -1787,6 +1793,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 		test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
 		"writeback");
 
+	ext4_ext_init(sb);
+
 	lock_kernel();
 	return 0;
 
-- 
cgit v1.2.3


From 3a5b2ecdd1fa63a8f25bd769223bc1c2564ce45d Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Wed, 11 Oct 2006 01:21:05 -0700
Subject: [PATCH] ext4: switch fsblk to sector_t

Redefine ext3 in-kernel filesystem block type (ext3_fsblk_t) from unsigned
long to sector_t, to allow kernel to handle  >32 bit ext3 blocks.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c | 23 +++++++++--------------
 fs/ext4/ialloc.c | 11 +++++++----
 fs/ext4/resize.c | 13 ++++++-------
 fs/ext4/super.c  |  8 ++++----
 4 files changed, 26 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index e9e98449137b..aa33ff271fa9 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -147,7 +147,7 @@ restart:
 		rsv = list_entry(n, struct ext4_reserve_window_node, rsv_node);
 		if (verbose)
 			printk("reservation window 0x%p "
-			       "start:  %lu, end:  %lu\n",
+			       "start:  "E3FSBLK", end:  "E3FSBLK"\n",
 			       rsv, rsv->rsv_start, rsv->rsv_end);
 		if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) {
 			printk("Bad reservation %p (start >= end)\n",
@@ -443,10 +443,7 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
 
 do_more:
 	overflow = 0;
-	block_group = (block - le32_to_cpu(es->s_first_data_block)) /
-		      EXT4_BLOCKS_PER_GROUP(sb);
-	bit = (block - le32_to_cpu(es->s_first_data_block)) %
-		      EXT4_BLOCKS_PER_GROUP(sb);
+	ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
 	/*
 	 * Check to see if we are freeing blocks across a group
 	 * boundary.
@@ -1404,7 +1401,7 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
 {
 	struct buffer_head *bitmap_bh = NULL;
 	struct buffer_head *gdp_bh;
-	int group_no;
+	unsigned long group_no;
 	int goal_group;
 	ext4_grpblk_t grp_target_blk;	/* blockgroup relative goal block */
 	ext4_grpblk_t grp_alloc_blk;	/* blockgroup-relative allocated block*/
@@ -1467,8 +1464,7 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
 	if (goal < le32_to_cpu(es->s_first_data_block) ||
 	    goal >= le32_to_cpu(es->s_blocks_count))
 		goal = le32_to_cpu(es->s_first_data_block);
-	group_no = (goal - le32_to_cpu(es->s_first_data_block)) /
-			EXT4_BLOCKS_PER_GROUP(sb);
+	ext4_get_group_no_and_offset(sb, goal, &group_no, &grp_target_blk);
 	goal_group = group_no;
 retry_alloc:
 	gdp = ext4_get_group_desc(sb, group_no, &gdp_bh);
@@ -1485,8 +1481,6 @@ retry_alloc:
 		my_rsv = NULL;
 
 	if (free_blocks > 0) {
-		grp_target_blk = ((goal - le32_to_cpu(es->s_first_data_block)) %
-				EXT4_BLOCKS_PER_GROUP(sb));
 		bitmap_bh = read_block_bitmap(sb, group_no);
 		if (!bitmap_bh)
 			goto io_error;
@@ -1613,7 +1607,7 @@ allocated:
 	if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) {
 		ext4_error(sb, "ext4_new_block",
 			    "block("E3FSBLK") >= blocks count(%d) - "
-			    "block_group = %d, es == %p ", ret_block,
+			    "block_group = %lu, es == %p ", ret_block,
 			le32_to_cpu(es->s_blocks_count), group_no, es);
 		goto out;
 	}
@@ -1733,9 +1727,10 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
 static inline int
 block_in_use(ext4_fsblk_t block, struct super_block *sb, unsigned char *map)
 {
-	return ext4_test_bit ((block -
-		le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) %
-			 EXT4_BLOCKS_PER_GROUP(sb), map);
+	ext4_grpblk_t offset;
+
+	ext4_get_group_no_and_offset(sb, block, NULL, &offset);
+	return ext4_test_bit (offset, map);
 }
 
 static inline int test_root(int a, int b)
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index e17a6c918d72..94e1bb4abe31 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -23,7 +23,7 @@
 #include <linux/buffer_head.h>
 #include <linux/random.h>
 #include <linux/bitops.h>
-
+#include <linux/blkdev.h>
 #include <asm/byteorder.h>
 
 #include "xattr.h"
@@ -274,7 +274,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
 	freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
 	avefreei = freei / ngroups;
 	freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
-	avefreeb = freeb / ngroups;
+	avefreeb = freeb;
+	sector_div(avefreeb, ngroups);
 	ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
 
 	if ((parent == sb->s_root->d_inode) ||
@@ -303,13 +304,15 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
 		goto fallback;
 	}
 
-	blocks_per_dir = (le32_to_cpu(es->s_blocks_count) - freeb) / ndirs;
+	blocks_per_dir = le32_to_cpu(es->s_blocks_count) - freeb;
+	sector_div(blocks_per_dir, ndirs);
 
 	max_dirs = ndirs / ngroups + inodes_per_group / 16;
 	min_inodes = avefreei - inodes_per_group / 4;
 	min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb) / 4;
 
-	max_debt = EXT4_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, (ext4_fsblk_t)BLOCK_COST);
+	max_debt = EXT4_BLOCKS_PER_GROUP(sb);
+	sector_div(max_debt, max(blocks_per_dir, (ext4_fsblk_t)BLOCK_COST));
 	if (max_debt * INODE_COST > inodes_per_group)
 		max_debt = inodes_per_group / INODE_COST;
 	if (max_debt > 255)
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 5b2828d21180..c60bfed5f5e7 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -36,7 +36,7 @@ static int verify_group_input(struct super_block *sb,
 		 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
 	ext4_fsblk_t metaend = start + overhead;
 	struct buffer_head *bh = NULL;
-	ext4_grpblk_t free_blocks_count;
+	ext4_grpblk_t free_blocks_count, offset;
 	int err = -EINVAL;
 
 	input->free_blocks_count = free_blocks_count =
@@ -49,13 +49,13 @@ static int verify_group_input(struct super_block *sb,
 		       "no-super", input->group, input->blocks_count,
 		       free_blocks_count, input->reserved_blocks);
 
+	ext4_get_group_no_and_offset(sb, start, NULL, &offset);
 	if (group != sbi->s_groups_count)
 		ext4_warning(sb, __FUNCTION__,
 			     "Cannot add at group %u (only %lu groups)",
 			     input->group, sbi->s_groups_count);
-	else if ((start - le32_to_cpu(es->s_first_data_block)) %
-		 EXT4_BLOCKS_PER_GROUP(sb))
-		ext4_warning(sb, __FUNCTION__, "Last group not full");
+	else if (offset != 0)
+			ext4_warning(sb, __FUNCTION__, "Last group not full");
 	else if (input->reserved_blocks > input->blocks_count / 5)
 		ext4_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)",
 			     input->reserved_blocks);
@@ -945,7 +945,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 
 	if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
 		printk(KERN_ERR "EXT4-fs: filesystem on %s:"
-			" too large to resize to %lu blocks safely\n",
+			" too large to resize to "E3FSBLK" blocks safely\n",
 			sb->s_id, n_blocks_count);
 		if (sizeof(sector_t) < 8)
 			ext4_warning(sb, __FUNCTION__,
@@ -960,8 +960,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 	}
 
 	/* Handle the remaining blocks in the last group only. */
-	last = (o_blocks_count - le32_to_cpu(es->s_first_data_block)) %
-		EXT4_BLOCKS_PER_GROUP(sb);
+	ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last);
 
 	if (last == 0) {
 		ext4_warning(sb, __FUNCTION__,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 69f875250500..1d12e4f7d69f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1433,8 +1433,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	 * block sizes.  We need to calculate the offset from buffer start.
 	 */
 	if (blocksize != EXT4_MIN_BLOCK_SIZE) {
-		logic_sb_block = (sb_block * EXT4_MIN_BLOCK_SIZE) / blocksize;
-		offset = (sb_block * EXT4_MIN_BLOCK_SIZE) % blocksize;
+		logic_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
+		offset = sector_div(logic_sb_block, blocksize);
 	} else {
 		logic_sb_block = sb_block;
 	}
@@ -1539,8 +1539,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 
 		brelse (bh);
 		sb_set_blocksize(sb, blocksize);
-		logic_sb_block = (sb_block * EXT4_MIN_BLOCK_SIZE) / blocksize;
-		offset = (sb_block * EXT4_MIN_BLOCK_SIZE) % blocksize;
+		logic_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
+		offset = sector_div(logic_sb_block, blocksize);
 		bh = sb_bread(sb, logic_sb_block);
 		if (!bh) {
 			printk(KERN_ERR
-- 
cgit v1.2.3


From f65e6fba163dfd0f962efb7d8f5528b6872e2b15 Mon Sep 17 00:00:00 2001
From: Alex Tomas <alex@clusterfs.com>
Date: Wed, 11 Oct 2006 01:21:05 -0700
Subject: [PATCH] ext4: 48bit physical block number support in extents

Signed-off-by: Alex Tomas <alex@clusterfs.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/extents.c | 187 ++++++++++++++++++++++++++++++++----------------------
 1 file changed, 111 insertions(+), 76 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index f67b2ef6a71f..4a13b56e1540 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -44,6 +44,44 @@
 #include <asm/uaccess.h>
 
 
+/* this macro combines low and hi parts of phys. blocknr into ext4_fsblk_t */
+static inline ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
+{
+	ext4_fsblk_t block;
+
+	block = le32_to_cpu(ex->ee_start);
+	if (sizeof(ext4_fsblk_t) > 4)
+		block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
+	return block;
+}
+
+/* this macro combines low and hi parts of phys. blocknr into ext4_fsblk_t */
+static inline ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
+{
+	ext4_fsblk_t block;
+
+	block = le32_to_cpu(ix->ei_leaf);
+	if (sizeof(ext4_fsblk_t) > 4)
+		block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
+	return block;
+}
+
+/* the routine stores large phys. blocknr into extent breaking it into parts */
+static inline void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
+{
+	ex->ee_start = cpu_to_le32((unsigned long) (pb & 0xffffffff));
+	if (sizeof(ext4_fsblk_t) > 4)
+		ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
+}
+
+/* the routine stores large phys. blocknr into index breaking it into parts */
+static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
+{
+	ix->ei_leaf = cpu_to_le32((unsigned long) (pb & 0xffffffff));
+	if (sizeof(ext4_fsblk_t) > 4)
+		ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
+}
+
 static int ext4_ext_check_header(const char *function, struct inode *inode,
 				struct ext4_extent_header *eh)
 {
@@ -124,13 +162,13 @@ static int ext4_ext_dirty(handle_t *handle, struct inode *inode,
 	return err;
 }
 
-static int ext4_ext_find_goal(struct inode *inode,
+static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
 			      struct ext4_ext_path *path,
-			      unsigned long block)
+			      ext4_fsblk_t block)
 {
 	struct ext4_inode_info *ei = EXT4_I(inode);
-	unsigned long bg_start;
-	unsigned long colour;
+	ext4_fsblk_t bg_start;
+	ext4_grpblk_t colour;
 	int depth;
 
 	if (path) {
@@ -139,8 +177,7 @@ static int ext4_ext_find_goal(struct inode *inode,
 
 		/* try to predict block placement */
 		if ((ex = path[depth].p_ext))
-			return le32_to_cpu(ex->ee_start)
-					+ (block - le32_to_cpu(ex->ee_block));
+			return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block));
 
 		/* it looks index is empty
 		 * try to find starting from index itself */
@@ -156,12 +193,12 @@ static int ext4_ext_find_goal(struct inode *inode,
 	return bg_start + colour + block;
 }
 
-static int
+static ext4_fsblk_t
 ext4_ext_new_block(handle_t *handle, struct inode *inode,
 			struct ext4_ext_path *path,
 			struct ext4_extent *ex, int *err)
 {
-	int goal, newblock;
+	ext4_fsblk_t goal, newblock;
 
 	goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
 	newblock = ext4_new_block(handle, inode, goal, err);
@@ -230,13 +267,13 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
 	ext_debug("path:");
 	for (k = 0; k <= l; k++, path++) {
 		if (path->p_idx) {
-		  ext_debug("  %d->%d", le32_to_cpu(path->p_idx->ei_block),
-			    le32_to_cpu(path->p_idx->ei_leaf));
+		  ext_debug("  %d->"E3FSBLK, le32_to_cpu(path->p_idx->ei_block),
+			    idx_pblock(path->p_idx));
 		} else if (path->p_ext) {
-			ext_debug("  %d:%d:%d",
+			ext_debug("  %d:%d:"E3FSBLK" ",
 				  le32_to_cpu(path->p_ext->ee_block),
 				  le16_to_cpu(path->p_ext->ee_len),
-				  le32_to_cpu(path->p_ext->ee_start));
+				  ext_pblock(path->p_ext));
 		} else
 			ext_debug("  []");
 	}
@@ -257,9 +294,8 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
 	ex = EXT_FIRST_EXTENT(eh);
 
 	for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
-		ext_debug("%d:%d:%d ", le32_to_cpu(ex->ee_block),
-			  le16_to_cpu(ex->ee_len),
-			  le32_to_cpu(ex->ee_start));
+		ext_debug("%d:%d:"E3FSBLK" ", le32_to_cpu(ex->ee_block),
+			  le16_to_cpu(ex->ee_len), ext_pblock(ex));
 	}
 	ext_debug("\n");
 }
@@ -308,8 +344,8 @@ ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int bloc
 	}
 
 	path->p_idx = l - 1;
-	ext_debug("  -> %d->%d ", le32_to_cpu(path->p_idx->ei_block),
-		  le32_to_cpu(path->p_idx->ei_leaf));
+	ext_debug("  -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block),
+		  idx_block(path->p_idx));
 
 #ifdef CHECK_BINSEARCH
 	{
@@ -374,10 +410,10 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
 	}
 
 	path->p_ext = l - 1;
-	ext_debug("  -> %d:%d:%d ",
+	ext_debug("  -> %d:"E3FSBLK":%d ",
 		        le32_to_cpu(path->p_ext->ee_block),
-		        le32_to_cpu(path->p_ext->ee_start),
-		        le16_to_cpu(path->p_ext->ee_len));
+		        ext_pblock(path->p_ext),
+			le16_to_cpu(path->p_ext->ee_len));
 
 #ifdef CHECK_BINSEARCH
 	{
@@ -442,7 +478,7 @@ ext4_ext_find_extent(struct inode *inode, int block, struct ext4_ext_path *path)
 		ext_debug("depth %d: num %d, max %d\n",
 			  ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
 		ext4_ext_binsearch_idx(inode, path + ppos, block);
-		path[ppos].p_block = le32_to_cpu(path[ppos].p_idx->ei_leaf);
+		path[ppos].p_block = idx_pblock(path[ppos].p_idx);
 		path[ppos].p_depth = i;
 		path[ppos].p_ext = NULL;
 
@@ -489,7 +525,7 @@ err:
  */
 static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
 				struct ext4_ext_path *curp,
-				int logical, int ptr)
+				int logical, ext4_fsblk_t ptr)
 {
 	struct ext4_extent_idx *ix;
 	int len, err;
@@ -524,7 +560,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
 	}
 
 	ix->ei_block = cpu_to_le32(logical);
-	ix->ei_leaf = cpu_to_le32(ptr);
+	ext4_idx_store_pblock(ix, ptr);
 	curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1);
 
 	BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries)
@@ -556,9 +592,9 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	struct ext4_extent_idx *fidx;
 	struct ext4_extent *ex;
 	int i = at, k, m, a;
-	unsigned long newblock, oldblock;
+	ext4_fsblk_t newblock, oldblock;
 	__le32 border;
-	int *ablocks = NULL; /* array of allocated blocks */
+	ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */
 	int err = 0;
 
 	/* make decision: where to split? */
@@ -591,10 +627,10 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	 * we need this to handle errors and free blocks
 	 * upon them
 	 */
-	ablocks = kmalloc(sizeof(unsigned long) * depth, GFP_NOFS);
+	ablocks = kmalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS);
 	if (!ablocks)
 		return -ENOMEM;
-	memset(ablocks, 0, sizeof(unsigned long) * depth);
+	memset(ablocks, 0, sizeof(ext4_fsblk_t) * depth);
 
 	/* allocate all needed blocks */
 	ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
@@ -633,9 +669,9 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	path[depth].p_ext++;
 	while (path[depth].p_ext <=
 			EXT_MAX_EXTENT(path[depth].p_hdr)) {
-		ext_debug("move %d:%d:%d in new leaf %lu\n",
+		ext_debug("move %d:"E3FSBLK":%d in new leaf "E3FSBLK"\n",
 			        le32_to_cpu(path[depth].p_ext->ee_block),
-			        le32_to_cpu(path[depth].p_ext->ee_start),
+			        ext_pblock(path[depth].p_ext),
 			        le16_to_cpu(path[depth].p_ext->ee_len),
 				newblock);
 		/*memmove(ex++, path[depth].p_ext++,
@@ -679,7 +715,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	while (k--) {
 		oldblock = newblock;
 		newblock = ablocks[--a];
-		bh = sb_getblk(inode->i_sb, newblock);
+		bh = sb_getblk(inode->i_sb, (ext4_fsblk_t)newblock);
 		if (!bh) {
 			err = -EIO;
 			goto cleanup;
@@ -696,9 +732,9 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 		neh->eh_depth = cpu_to_le16(depth - i);
 		fidx = EXT_FIRST_INDEX(neh);
 		fidx->ei_block = border;
-		fidx->ei_leaf = cpu_to_le32(oldblock);
+		ext4_idx_store_pblock(fidx, oldblock);
 
-		ext_debug("int.index at %d (block %lu): %lu -> %lu\n", i,
+		ext_debug("int.index at %d (block "E3FSBLK"): %lu -> "E3FSBLK"\n", i,
 				newblock, (unsigned long) le32_to_cpu(border),
 				oldblock);
 		/* copy indexes */
@@ -710,9 +746,9 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 		BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) !=
 				EXT_LAST_INDEX(path[i].p_hdr));
 		while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
-			ext_debug("%d: move %d:%d in new index %lu\n", i,
+			ext_debug("%d: move %d:%d in new index "E3FSBLK"\n", i,
 				        le32_to_cpu(path[i].p_idx->ei_block),
-				        le32_to_cpu(path[i].p_idx->ei_leaf),
+				        idx_pblock(path[i].p_idx),
 				        newblock);
 			/*memmove(++fidx, path[i].p_idx++,
 					sizeof(struct ext4_extent_idx));
@@ -791,7 +827,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
 	struct ext4_extent_header *neh;
 	struct ext4_extent_idx *fidx;
 	struct buffer_head *bh;
-	unsigned long newblock;
+	ext4_fsblk_t newblock;
 	int err = 0;
 
 	newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
@@ -839,13 +875,13 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
 	curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
 	/* FIXME: it works, but actually path[0] can be index */
 	curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block;
-	curp->p_idx->ei_leaf = cpu_to_le32(newblock);
+	ext4_idx_store_pblock(curp->p_idx, newblock);
 
 	neh = ext_inode_hdr(inode);
 	fidx = EXT_FIRST_INDEX(neh);
-	ext_debug("new root: num %d(%d), lblock %d, ptr %d\n",
+	ext_debug("new root: num %d(%d), lblock %d, ptr "E3FSBLK"\n",
 		  le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
-		  le32_to_cpu(fidx->ei_block), le32_to_cpu(fidx->ei_leaf));
+		  le32_to_cpu(fidx->ei_block), idx_pblock(fidx));
 
 	neh->eh_depth = cpu_to_le16(path->p_depth + 1);
 	err = ext4_ext_dirty(handle, inode, curp);
@@ -1042,7 +1078,6 @@ static int inline
 ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
 				struct ext4_extent *ex2)
 {
-	/* FIXME: 48bit support */
         if (le32_to_cpu(ex1->ee_block) + le16_to_cpu(ex1->ee_len)
 	    != le32_to_cpu(ex2->ee_block))
 		return 0;
@@ -1052,8 +1087,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
 		return 0;
 #endif
 
-        if (le32_to_cpu(ex1->ee_start) + le16_to_cpu(ex1->ee_len)
-			== le32_to_cpu(ex2->ee_start))
+        if (ext_pblock(ex1) + le16_to_cpu(ex1->ee_len) == ext_pblock(ex2))
 		return 1;
 	return 0;
 }
@@ -1080,11 +1114,10 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 
 	/* try to insert block into found extent and return */
 	if (ex && ext4_can_extents_be_merged(inode, ex, newext)) {
-		ext_debug("append %d block to %d:%d (from %d)\n",
+		ext_debug("append %d block to %d:%d (from "E3FSBLK")\n",
 				le16_to_cpu(newext->ee_len),
 				le32_to_cpu(ex->ee_block),
-				le16_to_cpu(ex->ee_len),
-				le32_to_cpu(ex->ee_start));
+				le16_to_cpu(ex->ee_len), ext_pblock(ex));
 		if ((err = ext4_ext_get_access(handle, inode, path + depth)))
 			return err;
 		ex->ee_len = cpu_to_le16(le16_to_cpu(ex->ee_len)
@@ -1140,9 +1173,9 @@ has_space:
 
 	if (!nearex) {
 		/* there is no extent in this leaf, create first one */
-		ext_debug("first extent in the leaf: %d:%d:%d\n",
+		ext_debug("first extent in the leaf: %d:"E3FSBLK":%d\n",
 			        le32_to_cpu(newext->ee_block),
-			        le32_to_cpu(newext->ee_start),
+			        ext_pblock(newext),
 			        le16_to_cpu(newext->ee_len));
 		path[depth].p_ext = EXT_FIRST_EXTENT(eh);
 	} else if (le32_to_cpu(newext->ee_block)
@@ -1152,10 +1185,10 @@ has_space:
 			len = EXT_MAX_EXTENT(eh) - nearex;
 			len = (len - 1) * sizeof(struct ext4_extent);
 			len = len < 0 ? 0 : len;
-			ext_debug("insert %d:%d:%d after: nearest 0x%p, "
+			ext_debug("insert %d:"E3FSBLK":%d after: nearest 0x%p, "
 					"move %d from 0x%p to 0x%p\n",
 				        le32_to_cpu(newext->ee_block),
-				        le32_to_cpu(newext->ee_start),
+				        ext_pblock(newext),
 				        le16_to_cpu(newext->ee_len),
 					nearex, len, nearex + 1, nearex + 2);
 			memmove(nearex + 2, nearex + 1, len);
@@ -1165,10 +1198,10 @@ has_space:
 		BUG_ON(newext->ee_block == nearex->ee_block);
 		len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent);
 		len = len < 0 ? 0 : len;
-		ext_debug("insert %d:%d:%d before: nearest 0x%p, "
+		ext_debug("insert %d:"E3FSBLK":%d before: nearest 0x%p, "
 				"move %d from 0x%p to 0x%p\n",
 				le32_to_cpu(newext->ee_block),
-				le32_to_cpu(newext->ee_start),
+				ext_pblock(newext),
 				le16_to_cpu(newext->ee_len),
 				nearex, len, nearex + 1, nearex + 2);
 		memmove(nearex + 1, nearex, len);
@@ -1179,9 +1212,8 @@ has_space:
 	nearex = path[depth].p_ext;
 	nearex->ee_block = newext->ee_block;
 	nearex->ee_start = newext->ee_start;
+	nearex->ee_start_hi = newext->ee_start_hi;
 	nearex->ee_len = newext->ee_len;
-	/* FIXME: support for large fs */
-	nearex->ee_start_hi = 0;
 
 merge:
 	/* try to merge extents to the right */
@@ -1290,7 +1322,7 @@ int ext4_ext_walk_space(struct inode *inode, unsigned long block,
 		} else {
 		        cbex.ec_block = le32_to_cpu(ex->ee_block);
 		        cbex.ec_len = le16_to_cpu(ex->ee_len);
-		        cbex.ec_start = le32_to_cpu(ex->ee_start);
+		        cbex.ec_start = ext_pblock(ex);
 			cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
 		}
 
@@ -1398,13 +1430,13 @@ ext4_ext_in_cache(struct inode *inode, unsigned long block,
 			cex->ec_type != EXT4_EXT_CACHE_EXTENT);
 	if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) {
 	        ex->ee_block = cpu_to_le32(cex->ec_block);
-	        ex->ee_start = cpu_to_le32(cex->ec_start);
+		ext4_ext_store_pblock(ex, cex->ec_start);
 	        ex->ee_len = cpu_to_le16(cex->ec_len);
-		ext_debug("%lu cached by %lu:%lu:%lu\n",
+		ext_debug("%lu cached by %lu:%lu:"E3FSBLK"\n",
 				(unsigned long) block,
 				(unsigned long) cex->ec_block,
 				(unsigned long) cex->ec_len,
-				(unsigned long) cex->ec_start);
+				cex->ec_start);
 		return cex->ec_type;
 	}
 
@@ -1422,18 +1454,18 @@ int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 {
 	struct buffer_head *bh;
 	int err;
-	unsigned long leaf;
+	ext4_fsblk_t leaf;
 
 	/* free index block */
 	path--;
-	leaf = le32_to_cpu(path->p_idx->ei_leaf);
+	leaf = idx_pblock(path->p_idx);
 	BUG_ON(path->p_hdr->eh_entries == 0);
 	if ((err = ext4_ext_get_access(handle, inode, path)))
 		return err;
 	path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1);
 	if ((err = ext4_ext_dirty(handle, inode, path)))
 		return err;
-	ext_debug("index is empty, remove it, free block %lu\n", leaf);
+	ext_debug("index is empty, remove it, free block "E3FSBLK"\n", leaf);
 	bh = sb_find_get_block(inode->i_sb, leaf);
 	ext4_forget(handle, 1, inode, bh, leaf);
 	ext4_free_blocks(handle, inode, leaf, 1);
@@ -1515,10 +1547,11 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
 	if (from >= le32_to_cpu(ex->ee_block)
 	    && to == le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) {
 		/* tail removal */
-		unsigned long num, start;
+		unsigned long num;
+		ext4_fsblk_t start;
 		num = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - from;
-		start = le32_to_cpu(ex->ee_start) + le16_to_cpu(ex->ee_len) - num;
-		ext_debug("free last %lu blocks starting %lu\n", num, start);
+		start = ext_pblock(ex) + le16_to_cpu(ex->ee_len) - num;
+		ext_debug("free last %lu blocks starting "E3FSBLK"\n", num, start);
 		for (i = 0; i < num; i++) {
 			bh = sb_find_get_block(inode->i_sb, start + i);
 			ext4_forget(handle, 0, inode, bh, start + i);
@@ -1621,7 +1654,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 
 		if (num == 0) {
 			/* this extent is removed entirely mark slot unused */
-			ex->ee_start = 0;
+			ext4_ext_store_pblock(ex, 0);
 			eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1);
 		}
 
@@ -1632,8 +1665,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 		if (err)
 			goto out;
 
-		ext_debug("new extent: %u:%u:%u\n", block, num,
-				le32_to_cpu(ex->ee_start));
+		ext_debug("new extent: %u:%u:"E3FSBLK"\n", block, num,
+				ext_pblock(ex));
 		ex--;
 		ex_ee_block = le32_to_cpu(ex->ee_block);
 		ex_ee_len = le16_to_cpu(ex->ee_len);
@@ -1748,11 +1781,11 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start)
 				path[i].p_idx);
 		if (ext4_ext_more_to_rm(path + i)) {
 			/* go to the next level */
-			ext_debug("move to level %d (block %d)\n",
-				  i + 1, le32_to_cpu(path[i].p_idx->ei_leaf));
+			ext_debug("move to level %d (block "E3FSBLK")\n",
+				  i + 1, idx_pblock(path[i].p_idx));
 			memset(path + i + 1, 0, sizeof(*path));
 			path[i+1].p_bh =
-				sb_bread(sb, le32_to_cpu(path[i].p_idx->ei_leaf));
+				sb_bread(sb, idx_pblock(path[i].p_idx));
 			if (!path[i+1].p_bh) {
 				/* should we reset i_size? */
 				err = -EIO;
@@ -1851,13 +1884,15 @@ void ext4_ext_release(struct super_block *sb)
 #endif
 }
 
-int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, sector_t iblock,
+int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
+			ext4_fsblk_t iblock,
 			unsigned long max_blocks, struct buffer_head *bh_result,
 			int create, int extend_disksize)
 {
 	struct ext4_ext_path *path = NULL;
 	struct ext4_extent newex, *ex;
-	int goal, newblock, err = 0, depth;
+	ext4_fsblk_t goal, newblock;
+	int err = 0, depth;
 	unsigned long allocated = 0;
 
 	__clear_bit(BH_New, &bh_result->b_state);
@@ -1878,7 +1913,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, sector_t iblock,
 			/* block is already allocated */
 		        newblock = iblock
 		                   - le32_to_cpu(newex.ee_block)
-			           + le32_to_cpu(newex.ee_start);
+			           + ext_pblock(&newex);
 			/* number of remain blocks in the extent */
 			allocated = le16_to_cpu(newex.ee_len) -
 					(iblock - le32_to_cpu(newex.ee_block));
@@ -1907,14 +1942,14 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, sector_t iblock,
 
 	if ((ex = path[depth].p_ext)) {
 	        unsigned long ee_block = le32_to_cpu(ex->ee_block);
-		unsigned long ee_start = le32_to_cpu(ex->ee_start);
+		ext4_fsblk_t ee_start = ext_pblock(ex);
 		unsigned short ee_len  = le16_to_cpu(ex->ee_len);
 		/* if found exent covers block, simple return it */
 	        if (iblock >= ee_block && iblock < ee_block + ee_len) {
 			newblock = iblock - ee_block + ee_start;
 			/* number of remain blocks in the extent */
 			allocated = ee_len - (iblock - ee_block);
-			ext_debug("%d fit into %lu:%d -> %d\n", (int) iblock,
+			ext_debug("%d fit into %lu:%d -> "E3FSBLK"\n", (int) iblock,
 					ee_block, ee_len, newblock);
 			ext4_ext_put_in_cache(inode, ee_block, ee_len,
 						ee_start, EXT4_EXT_CACHE_EXTENT);
@@ -1944,12 +1979,12 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, sector_t iblock,
 	newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err);
 	if (!newblock)
 		goto out2;
-	ext_debug("allocate new block: goal %d, found %d/%lu\n",
+	ext_debug("allocate new block: goal "E3FSBLK", found "E3FSBLK"/%lu\n",
 			goal, newblock, allocated);
 
 	/* try to insert new extent into found leaf and return */
 	newex.ee_block = cpu_to_le32(iblock);
-	newex.ee_start = cpu_to_le32(newblock);
+	ext4_ext_store_pblock(&newex, newblock);
 	newex.ee_len = cpu_to_le16(allocated);
 	err = ext4_ext_insert_extent(handle, inode, path, &newex);
 	if (err)
@@ -1959,7 +1994,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, sector_t iblock,
 		EXT4_I(inode)->i_disksize = inode->i_size;
 
 	/* previous routine could use block we allocated */
-	newblock = le32_to_cpu(newex.ee_start);
+	newblock = ext_pblock(&newex);
 	__set_bit(BH_New, &bh_result->b_state);
 
 	ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
-- 
cgit v1.2.3


From 471d4011a9862efff02094388b8fe8cd67683c38 Mon Sep 17 00:00:00 2001
From: Suparna Bhattacharya <suparna@in.ibm.com>
Date: Wed, 11 Oct 2006 01:21:06 -0700
Subject: [PATCH] ext4: uninitialised extent handling

Make it possible to add file preallocation support in future as an RO_COMPAT
feature by recognizing uninitialized extents as holes and limiting extent
length to keep the top bit of ee_len free for marking uninitialized extents.

Signed-off-by: Suparna Bhattacharya <suparna@in.ibm.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/extents.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'fs')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4a13b56e1540..32526061a17d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1082,6 +1082,13 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
 	    != le32_to_cpu(ex2->ee_block))
 		return 0;
 
+	/*
+	 * To allow future support for preallocated extents to be added
+	 * as an RO_COMPAT feature, refuse to merge to extents if
+	 * can result in the top bit of ee_len being set
+	 */
+	if (le16_to_cpu(ex1->ee_len) + le16_to_cpu(ex2->ee_len) > EXT_MAX_LEN)
+		return 0;
 #ifdef AGRESSIVE_TEST
 	if (le16_to_cpu(ex1->ee_len) >= 4)
 		return 0;
@@ -1944,6 +1951,15 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 	        unsigned long ee_block = le32_to_cpu(ex->ee_block);
 		ext4_fsblk_t ee_start = ext_pblock(ex);
 		unsigned short ee_len  = le16_to_cpu(ex->ee_len);
+
+		/*
+		 * Allow future support for preallocated extents to be added
+		 * as an RO_COMPAT feature:
+		 * Uninitialized extents are treated as holes, except that
+		 * we avoid (fail) allocating new blocks during a write.
+		 */
+		if (ee_len > EXT_MAX_LEN)
+			goto out2;
 		/* if found exent covers block, simple return it */
 	        if (iblock >= ee_block && iblock < ee_block + ee_len) {
 			newblock = iblock - ee_block + ee_start;
-- 
cgit v1.2.3


From d0d856e8bd6e697cb44b2b4dd038f3bec576a70e Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Wed, 11 Oct 2006 01:21:07 -0700
Subject: [PATCH] ext4: clean up comments in ext4-extents patch

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/extents.c | 226 +++++++++++++++++++++++++++++++-----------------------
 1 file changed, 128 insertions(+), 98 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 32526061a17d..e06e937a52b8 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -44,7 +44,10 @@
 #include <asm/uaccess.h>
 
 
-/* this macro combines low and hi parts of phys. blocknr into ext4_fsblk_t */
+/*
+ * ext_pblock:
+ * combine low and high parts of physical block number into ext4_fsblk_t
+ */
 static inline ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
 {
 	ext4_fsblk_t block;
@@ -55,7 +58,10 @@ static inline ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
 	return block;
 }
 
-/* this macro combines low and hi parts of phys. blocknr into ext4_fsblk_t */
+/*
+ * idx_pblock:
+ * combine low and high parts of a leaf physical block number into ext4_fsblk_t
+ */
 static inline ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
 {
 	ext4_fsblk_t block;
@@ -66,7 +72,11 @@ static inline ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
 	return block;
 }
 
-/* the routine stores large phys. blocknr into extent breaking it into parts */
+/*
+ * ext4_ext_store_pblock:
+ * stores a large physical block number into an extent struct,
+ * breaking it into parts
+ */
 static inline void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
 {
 	ex->ee_start = cpu_to_le32((unsigned long) (pb & 0xffffffff));
@@ -74,7 +84,11 @@ static inline void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb
 		ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
 }
 
-/* the routine stores large phys. blocknr into index breaking it into parts */
+/*
+ * ext4_idx_store_pblock:
+ * stores a large physical block number into an index struct,
+ * breaking it into parts
+ */
 static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
 {
 	ix->ei_leaf = cpu_to_le32((unsigned long) (pb & 0xffffffff));
@@ -179,8 +193,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
 		if ((ex = path[depth].p_ext))
 			return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block));
 
-		/* it looks index is empty
-		 * try to find starting from index itself */
+		/* it looks like index is empty;
+		 * try to find starting block from index itself */
 		if (path[depth].p_bh)
 			return path[depth].p_bh->b_blocknr;
 	}
@@ -317,7 +331,8 @@ static void ext4_ext_drop_refs(struct ext4_ext_path *path)
 }
 
 /*
- * binary search for closest index by given block
+ * ext4_ext_binsearch_idx:
+ * binary search for the closest index of the given block
  */
 static void
 ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int block)
@@ -375,7 +390,8 @@ ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int bloc
 }
 
 /*
- * binary search for closest extent by given block
+ * ext4_ext_binsearch:
+ * binary search for closest extent of the given block
  */
 static void
 ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
@@ -388,8 +404,8 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
 
 	if (eh->eh_entries == 0) {
 		/*
-		 * this leaf is empty yet:
-		 *  we get such a leaf in split/add case
+		 * this leaf is empty:
+		 * we get such a leaf in split/add case
 		 */
 		return;
 	}
@@ -520,8 +536,9 @@ err:
 }
 
 /*
- * insert new index [logical;ptr] into the block at cupr
- * it check where to insert: before curp or after curp
+ * ext4_ext_insert_index:
+ * insert new index [@logical;@ptr] into the block at @curp;
+ * check where to insert: before @curp or after @curp
  */
 static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
 				struct ext4_ext_path *curp,
@@ -574,13 +591,14 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
 }
 
 /*
- * routine inserts new subtree into the path, using free index entry
- * at depth 'at:
- *  - allocates all needed blocks (new leaf and all intermediate index blocks)
- *  - makes decision where to split
- *  - moves remaining extens and index entries (right to the split point)
- *    into the newly allocated blocks
- *  - initialize subtree
+ * ext4_ext_split:
+ * inserts new subtree into the path, using free index entry
+ * at depth @at:
+ * - allocates all needed blocks (new leaf and all intermediate index blocks)
+ * - makes decision where to split
+ * - moves remaining extents and index entries (right to the split point)
+ *   into the newly allocated blocks
+ * - initializes subtree
  */
 static int ext4_ext_split(handle_t *handle, struct inode *inode,
 				struct ext4_ext_path *path,
@@ -598,14 +616,14 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	int err = 0;
 
 	/* make decision: where to split? */
-	/* FIXME: now desicion is simplest: at current extent */
+	/* FIXME: now decision is simplest: at current extent */
 
-	/* if current leaf will be splitted, then we should use
+	/* if current leaf will be split, then we should use
 	 * border from split point */
 	BUG_ON(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr));
 	if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
 		border = path[depth].p_ext[1].ee_block;
-		ext_debug("leaf will be splitted."
+		ext_debug("leaf will be split."
 				" next leaf starts at %d\n",
 			          le32_to_cpu(border));
 	} else {
@@ -616,16 +634,16 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	}
 
 	/*
-	 * if error occurs, then we break processing
-	 * and turn filesystem read-only. so, index won't
+	 * If error occurs, then we break processing
+	 * and mark filesystem read-only. index won't
 	 * be inserted and tree will be in consistent
-	 * state. next mount will repair buffers too
+	 * state. Next mount will repair buffers too.
 	 */
 
 	/*
-	 * get array to track all allocated blocks
-	 * we need this to handle errors and free blocks
-	 * upon them
+	 * Get array to track all allocated blocks.
+	 * We need this to handle errors and free blocks
+	 * upon them.
 	 */
 	ablocks = kmalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS);
 	if (!ablocks)
@@ -661,7 +679,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	neh->eh_depth = 0;
 	ex = EXT_FIRST_EXTENT(neh);
 
-	/* move remain of path[depth] to the new leaf */
+	/* move remainder of path[depth] to the new leaf */
 	BUG_ON(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max);
 	/* start copy from next extent */
 	/* TODO: we could do it by single memmove */
@@ -813,11 +831,12 @@ cleanup:
 }
 
 /*
- * routine implements tree growing procedure:
- *  - allocates new block
- *  - moves top-level data (index block or leaf) into the new block
- *  - initialize new top-level, creating index that points to the
- *    just created block
+ * ext4_ext_grow_indepth:
+ * implements tree growing procedure:
+ * - allocates new block
+ * - moves top-level data (index block or leaf) into the new block
+ * - initializes new top-level, creating index that points to the
+ *   just created block
  */
 static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
 					struct ext4_ext_path *path,
@@ -892,8 +911,9 @@ out:
 }
 
 /*
- * routine finds empty index and adds new leaf. if no free index found
- * then it requests in-depth growing
+ * ext4_ext_create_new_leaf:
+ * finds empty index and adds new leaf.
+ * if no free index is found, then it requests in-depth growing.
  */
 static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
 					struct ext4_ext_path *path,
@@ -912,8 +932,8 @@ repeat:
 		curp--;
 	}
 
-	/* we use already allocated block for index block
-	 * so, subsequent data blocks should be contigoues */
+	/* we use already allocated block for index block,
+	 * so subsequent data blocks should be contiguous */
 	if (EXT_HAS_FREE_INDEX(curp)) {
 		/* if we found index with free entry, then use that
 		 * entry: create all needed subtree and add new leaf */
@@ -943,12 +963,12 @@ repeat:
 		}
 
 		/*
-		 * only first (depth 0 -> 1) produces free space
-		 * in all other cases we have to split growed tree
+		 * only first (depth 0 -> 1) produces free space;
+		 * in all other cases we have to split the grown tree
 		 */
 		depth = ext_depth(inode);
 		if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
-			/* now we need split */
+			/* now we need to split */
 			goto repeat;
 		}
 	}
@@ -958,10 +978,11 @@ out:
 }
 
 /*
- * returns allocated block in subsequent extent or EXT_MAX_BLOCK
- * NOTE: it consider block number from index entry as
- * allocated block. thus, index entries have to be consistent
- * with leafs
+ * ext4_ext_next_allocated_block:
+ * returns allocated block in subsequent extent or EXT_MAX_BLOCK.
+ * NOTE: it considers block number from index entry as
+ * allocated block. Thus, index entries have to be consistent
+ * with leaves.
  */
 static unsigned long
 ext4_ext_next_allocated_block(struct ext4_ext_path *path)
@@ -993,6 +1014,7 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
 }
 
 /*
+ * ext4_ext_next_leaf_block:
  * returns first allocated block from next leaf or EXT_MAX_BLOCK
  */
 static unsigned ext4_ext_next_leaf_block(struct inode *inode,
@@ -1021,8 +1043,9 @@ static unsigned ext4_ext_next_leaf_block(struct inode *inode,
 }
 
 /*
- * if leaf gets modified and modified extent is first in the leaf
- * then we have to correct all indexes above
+ * ext4_ext_correct_indexes:
+ * if leaf gets modified and modified extent is first in the leaf,
+ * then we have to correct all indexes above.
  * TODO: do we need to correct tree in all cases?
  */
 int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
@@ -1050,7 +1073,7 @@ int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
 	}
 
 	/*
-	 * TODO: we need correction if border is smaller then current one
+	 * TODO: we need correction if border is smaller than current one
 	 */
 	k = depth - 1;
 	border = path[depth].p_ext->ee_block;
@@ -1085,7 +1108,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
 	/*
 	 * To allow future support for preallocated extents to be added
 	 * as an RO_COMPAT feature, refuse to merge to extents if
-	 * can result in the top bit of ee_len being set
+	 * this can result in the top bit of ee_len being set.
 	 */
 	if (le16_to_cpu(ex1->ee_len) + le16_to_cpu(ex2->ee_len) > EXT_MAX_LEN)
 		return 0;
@@ -1100,9 +1123,10 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
 }
 
 /*
- * this routine tries to merge requsted extent into the existing
- * extent or inserts requested extent as new one into the tree,
- * creating new leaf in no-space case
+ * ext4_ext_insert_extent:
+ * tries to merge requsted extent into the existing extent or
+ * inserts requested extent as new one into the tree,
+ * creating new leaf in the no-space case.
  */
 int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 				struct ext4_ext_path *path,
@@ -1163,8 +1187,8 @@ repeat:
 	}
 
 	/*
-	 * there is no free space in found leaf
-	 * we're gonna add new leaf in the tree
+	 * There is no free space in the found leaf.
+	 * We're gonna add a new leaf in the tree.
 	 */
 	err = ext4_ext_create_new_leaf(handle, inode, path, newext);
 	if (err)
@@ -1377,7 +1401,8 @@ ext4_ext_put_in_cache(struct inode *inode, __u32 block,
 }
 
 /*
- * this routine calculate boundaries of the gap requested block fits into
+ * ext4_ext_put_gap_in_cache:
+ * calculate boundaries of the gap that the requested block fits into
  * and cache this gap
  */
 static inline void
@@ -1452,9 +1477,10 @@ ext4_ext_in_cache(struct inode *inode, unsigned long block,
 }
 
 /*
- * routine removes index from the index block
- * it's used in truncate case only. thus all requests are for
- * last index in the block only
+ * ext4_ext_rm_idx:
+ * removes index from the index block.
+ * It's used in truncate case only, thus all requests are for
+ * last index in the block only.
  */
 int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 			struct ext4_ext_path *path)
@@ -1480,11 +1506,12 @@ int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 }
 
 /*
- * This routine returns max. credits extent tree can consume.
+ * ext4_ext_calc_credits_for_insert:
+ * This routine returns max. credits that the extent tree can consume.
  * It should be OK for low-performance paths like ->writepage()
- * To allow many writing process to fit a single transaction,
- * caller should calculate credits under truncate_mutex and
- * pass actual path.
+ * To allow many writing processes to fit into a single transaction,
+ * the caller should calculate credits under truncate_mutex and
+ * pass the actual path.
  */
 int inline ext4_ext_calc_credits_for_insert(struct inode *inode,
 						struct ext4_ext_path *path)
@@ -1500,9 +1527,9 @@ int inline ext4_ext_calc_credits_for_insert(struct inode *inode,
 	}
 
 	/*
-	 * given 32bit logical block (4294967296 blocks), max. tree
+	 * given 32-bit logical block (4294967296 blocks), max. tree
 	 * can be 4 levels in depth -- 4 * 340^4 == 53453440000.
-	 * let's also add one more level for imbalance.
+	 * Let's also add one more level for imbalance.
 	 */
 	depth = 5;
 
@@ -1510,13 +1537,13 @@ int inline ext4_ext_calc_credits_for_insert(struct inode *inode,
 	needed = 2;
 
 	/*
-	 * tree can be full, so it'd need to grow in depth:
+	 * tree can be full, so it would need to grow in depth:
 	 * allocation + old root + new root
 	 */
 	needed += 2 + 1 + 1;
 
 	/*
-	 * Index split can happen, we'd need:
+	 * Index split can happen, we would need:
 	 *    allocate intermediate indexes (bitmap + group)
 	 *  + change two blocks at each level, but root (already included)
 	 */
@@ -1634,7 +1661,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 			BUG_ON(b != ex_ee_block + ex_ee_len - 1);
 		}
 
-		/* at present, extent can't cross block group */
+		/* at present, extent can't cross block group: */
 		/* leaf + bitmap + group desc + sb + inode */
 		credits = 5;
 		if (ex == EXT_FIRST_EXTENT(eh)) {
@@ -1660,7 +1687,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 			goto out;
 
 		if (num == 0) {
-			/* this extent is removed entirely mark slot unused */
+			/* this extent is removed; mark slot entirely unused */
 			ext4_ext_store_pblock(ex, 0);
 			eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1);
 		}
@@ -1692,7 +1719,8 @@ out:
 }
 
 /*
- * returns 1 if current index have to be freed (even partial)
+ * ext4_ext_more_to_rm:
+ * returns 1 if current index has to be freed (even partial)
  */
 static int inline
 ext4_ext_more_to_rm(struct ext4_ext_path *path)
@@ -1703,7 +1731,7 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path)
 		return 0;
 
 	/*
-	 * if truncate on deeper level happened it it wasn't partial
+	 * if truncate on deeper level happened, it wasn't partial,
 	 * so we have to consider current index for truncation
 	 */
 	if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block)
@@ -1729,8 +1757,8 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start)
 	ext4_ext_invalidate_cache(inode);
 
 	/*
-	 * we start scanning from right side freeing all the blocks
-	 * after i_size and walking into the deep
+	 * We start scanning from right side, freeing all the blocks
+	 * after i_size and walking into the tree depth-wise.
 	 */
 	path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL);
 	if (path == NULL) {
@@ -1749,7 +1777,7 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start)
 		if (i == depth) {
 			/* this is leaf block */
 			err = ext4_ext_rm_leaf(handle, inode, path, start);
-			/* root level have p_bh == NULL, brelse() eats this */
+			/* root level has p_bh == NULL, brelse() eats this */
 			brelse(path[i].p_bh);
 			path[i].p_bh = NULL;
 			i--;
@@ -1772,14 +1800,14 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start)
 		BUG_ON(path[i].p_hdr->eh_magic != EXT4_EXT_MAGIC);
 
 		if (!path[i].p_idx) {
-			/* this level hasn't touched yet */
+			/* this level hasn't been touched yet */
 			path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
 			path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1;
 			ext_debug("init index ptr: hdr 0x%p, num %d\n",
 				  path[i].p_hdr,
 				  le16_to_cpu(path[i].p_hdr->eh_entries));
 		} else {
-			/* we've already was here, see at next index */
+			/* we were already here, see at next index */
 			path[i].p_idx--;
 		}
 
@@ -1799,19 +1827,19 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start)
 				break;
 			}
 
-			/* put actual number of indexes to know is this
-			 * number got changed at the next iteration */
+			/* save actual number of indexes since this
+			 * number is changed at the next iteration */
 			path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries);
 			i++;
 		} else {
-			/* we finish processing this index, go up */
+			/* we finished processing this index, go up */
 			if (path[i].p_hdr->eh_entries == 0 && i > 0) {
-				/* index is empty, remove it
+				/* index is empty, remove it;
 				 * handle must be already prepared by the
 				 * truncatei_leaf() */
 				err = ext4_ext_rm_idx(handle, inode, path + i);
 			}
-			/* root level have p_bh == NULL, brelse() eats this */
+			/* root level has p_bh == NULL, brelse() eats this */
 			brelse(path[i].p_bh);
 			path[i].p_bh = NULL;
 			i--;
@@ -1822,8 +1850,8 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start)
 	/* TODO: flexible tree reduction should be here */
 	if (path->p_hdr->eh_entries == 0) {
 		/*
-		 * truncate to zero freed all the tree
-		 * so, we need to correct eh_depth
+		 * truncate to zero freed all the tree,
+		 * so we need to correct eh_depth
 		 */
 		err = ext4_ext_get_access(handle, inode, path);
 		if (err == 0) {
@@ -1912,7 +1940,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 		if (goal == EXT4_EXT_CACHE_GAP) {
 			if (!create) {
 				/* block isn't allocated yet and
-				 * user don't want to allocate it */
+				 * user doesn't want to allocate it */
 				goto out2;
 			}
 			/* we should allocate requested block */
@@ -1921,7 +1949,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 		        newblock = iblock
 		                   - le32_to_cpu(newex.ee_block)
 			           + ext_pblock(&newex);
-			/* number of remain blocks in the extent */
+			/* number of remaining blocks in the extent */
 			allocated = le16_to_cpu(newex.ee_len) -
 					(iblock - le32_to_cpu(newex.ee_block));
 			goto out;
@@ -1941,8 +1969,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 	depth = ext_depth(inode);
 
 	/*
-	 * consistent leaf must not be empty
-	 * this situations is possible, though, _during_ tree modification
+	 * consistent leaf must not be empty;
+	 * this situation is possible, though, _during_ tree modification;
 	 * this is why assert can't be put in ext4_ext_find_extent()
 	 */
 	BUG_ON(path[depth].p_ext == NULL && depth != 0);
@@ -1960,10 +1988,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 		 */
 		if (ee_len > EXT_MAX_LEN)
 			goto out2;
-		/* if found exent covers block, simple return it */
+		/* if found extent covers block, simply return it */
 	        if (iblock >= ee_block && iblock < ee_block + ee_len) {
 			newblock = iblock - ee_block + ee_start;
-			/* number of remain blocks in the extent */
+			/* number of remaining blocks in the extent */
 			allocated = ee_len - (iblock - ee_block);
 			ext_debug("%d fit into %lu:%d -> "E3FSBLK"\n", (int) iblock,
 					ee_block, ee_len, newblock);
@@ -1974,17 +2002,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 	}
 
 	/*
-	 * requested block isn't allocated yet
+	 * requested block isn't allocated yet;
 	 * we couldn't try to create block if create flag is zero
 	 */
 	if (!create) {
-		/* put just found gap into cache to speedup subsequest reqs */
+		/* put just found gap into cache to speed up
+		 * subsequent requests */
 		ext4_ext_put_gap_in_cache(inode, path, iblock);
 		goto out2;
 	}
 	/*
          * Okay, we need to do block allocation.  Lazily initialize the block
-         * allocation info here if necessary
+         * allocation info here if necessary.
         */
 	if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info))
 		ext4_init_block_alloc_info(inode);
@@ -2062,9 +2091,9 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
 	ext4_ext_invalidate_cache(inode);
 
 	/*
-	 * TODO: optimization is possible here
-	 * probably we need not scaning at all,
-	 * because page truncation is enough
+	 * TODO: optimization is possible here.
+	 * Probably we need not scan at all,
+	 * because page truncation is enough.
 	 */
 	if (ext4_orphan_add(handle, inode))
 		goto out_stop;
@@ -2078,13 +2107,13 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
 	err = ext4_ext_remove_space(inode, last_block);
 
 	/* In a multi-transaction truncate, we only make the final
-	 * transaction synchronous */
+	 * transaction synchronous. */
 	if (IS_SYNC(inode))
 		handle->h_sync = 1;
 
 out_stop:
 	/*
-	 * If this was a simple ftruncate(), and the file will remain alive
+	 * If this was a simple ftruncate() and the file will remain alive,
 	 * then we need to clear up the orphan record which we created above.
 	 * However, if this was a real unlink then we were called by
 	 * ext4_delete_inode(), and we allow that function to clean up the
@@ -2098,7 +2127,8 @@ out_stop:
 }
 
 /*
- * this routine calculate max number of blocks we could modify
+ * ext4_ext_writepage_trans_blocks:
+ * calculate max number of blocks we could modify
  * in order to allocate new block for an inode
  */
 int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
@@ -2107,7 +2137,7 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
 
 	needed = ext4_ext_calc_credits_for_insert(inode, NULL);
 
-	/* caller want to allocate num blocks, but note it includes sb */
+	/* caller wants to allocate num blocks, but note it includes sb */
 	needed = needed * num - (num - 1);
 
 #ifdef CONFIG_QUOTA
-- 
cgit v1.2.3


From b517bea1c74e4773482b3f41b3f493522a8c8e30 Mon Sep 17 00:00:00 2001
From: Zach Brown <zach.brown@oracle.com>
Date: Wed, 11 Oct 2006 01:21:08 -0700
Subject: [PATCH] 64-bit jbd2 core

Here is the patch to JBD to handle 64 bit block numbers, originally from Zach
Brown.  This patch is useful only after adding support for 64-bit block
numbers in the filesystem.

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Zach Brown <zach.brown@oracle.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd2/commit.c   | 17 +++++++++++++----
 fs/jbd2/journal.c  | 11 +++++++++++
 fs/jbd2/recovery.c | 43 ++++++++++++++++++++++++++++++-------------
 fs/jbd2/revoke.c   | 14 +++++++++++---
 4 files changed, 65 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index b1a4eafc1541..44d68a113c73 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -271,6 +271,14 @@ write_out_data:
 	journal_do_submit_data(wbuf, bufs);
 }
 
+static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
+				   sector_t block)
+{
+	tag->t_blocknr = cpu_to_be32(block & (u32)~0);
+	if (tag_bytes > JBD_TAG_SIZE32)
+		tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
+}
+
 /*
  * jbd2_journal_commit_transaction
  *
@@ -293,6 +301,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	int first_tag = 0;
 	int tag_flag;
 	int i;
+	int tag_bytes = journal_tag_bytes(journal);
 
 	/*
 	 * First job: lock down the current transaction and wait for
@@ -597,10 +606,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 			tag_flag |= JBD2_FLAG_SAME_UUID;
 
 		tag = (journal_block_tag_t *) tagp;
-		tag->t_blocknr = cpu_to_be32(jh2bh(jh)->b_blocknr);
+		write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr);
 		tag->t_flags = cpu_to_be32(tag_flag);
-		tagp += sizeof(journal_block_tag_t);
-		space_left -= sizeof(journal_block_tag_t);
+		tagp += tag_bytes;
+		space_left -= tag_bytes;
 
 		if (first_tag) {
 			memcpy (tagp, journal->j_uuid, 16);
@@ -614,7 +623,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 
 		if (bufs == journal->j_wbufsize ||
 		    commit_transaction->t_buffers == NULL ||
-		    space_left < sizeof(journal_block_tag_t) + 16) {
+		    space_left < tag_bytes + 16) {
 
 			jbd_debug(4, "JBD: Submit %d IOs\n", bufs);
 
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 8d0f71e562fe..926ebcbf8a7a 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1609,6 +1609,17 @@ int jbd2_journal_blocks_per_page(struct inode *inode)
 	return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
 }
 
+/*
+ * helper functions to deal with 32 or 64bit block numbers.
+ */
+size_t journal_tag_bytes(journal_t *journal)
+{
+	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
+		return JBD_TAG_SIZE64;
+	else
+		return JBD_TAG_SIZE32;
+}
+
 /*
  * Simple support for retrying memory allocations.  Introduced to help to
  * debug different VM deadlock avoidance strategies.
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index b2012d112432..2486843adda0 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -178,19 +178,20 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
  * Count the number of in-use tags in a journal descriptor block.
  */
 
-static int count_tags(struct buffer_head *bh, int size)
+static int count_tags(journal_t *journal, struct buffer_head *bh)
 {
 	char *			tagp;
 	journal_block_tag_t *	tag;
-	int			nr = 0;
+	int			nr = 0, size = journal->j_blocksize;
+	int			tag_bytes = journal_tag_bytes(journal);
 
 	tagp = &bh->b_data[sizeof(journal_header_t)];
 
-	while ((tagp - bh->b_data + sizeof(journal_block_tag_t)) <= size) {
+	while ((tagp - bh->b_data + tag_bytes) <= size) {
 		tag = (journal_block_tag_t *) tagp;
 
 		nr++;
-		tagp += sizeof(journal_block_tag_t);
+		tagp += tag_bytes;
 		if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID)))
 			tagp += 16;
 
@@ -307,6 +308,14 @@ int jbd2_journal_skip_recovery(journal_t *journal)
 	return err;
 }
 
+static inline sector_t read_tag_block(int tag_bytes, journal_block_tag_t *tag)
+{
+	sector_t block = be32_to_cpu(tag->t_blocknr);
+	if (tag_bytes > JBD_TAG_SIZE32)
+		block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
+	return block;
+}
+
 static int do_one_pass(journal_t *journal,
 			struct recovery_info *info, enum passtype pass)
 {
@@ -318,11 +327,12 @@ static int do_one_pass(journal_t *journal,
 	struct buffer_head *	bh;
 	unsigned int		sequence;
 	int			blocktype;
+	int			tag_bytes = journal_tag_bytes(journal);
 
 	/* Precompute the maximum metadata descriptors in a descriptor block */
 	int			MAX_BLOCKS_PER_DESC;
 	MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
-			       / sizeof(journal_block_tag_t));
+			       / tag_bytes);
 
 	/*
 	 * First thing is to establish what we expect to find in the log
@@ -412,8 +422,7 @@ static int do_one_pass(journal_t *journal,
 			 * in pass REPLAY; otherwise, just skip over the
 			 * blocks it describes. */
 			if (pass != PASS_REPLAY) {
-				next_log_block +=
-					count_tags(bh, journal->j_blocksize);
+				next_log_block += count_tags(journal, bh);
 				wrap(journal, next_log_block);
 				brelse(bh);
 				continue;
@@ -424,7 +433,7 @@ static int do_one_pass(journal_t *journal,
 			 * getting done here! */
 
 			tagp = &bh->b_data[sizeof(journal_header_t)];
-			while ((tagp - bh->b_data +sizeof(journal_block_tag_t))
+			while ((tagp - bh->b_data + tag_bytes)
 			       <= journal->j_blocksize) {
 				unsigned long io_block;
 
@@ -446,7 +455,8 @@ static int do_one_pass(journal_t *journal,
 					unsigned long blocknr;
 
 					J_ASSERT(obh != NULL);
-					blocknr = be32_to_cpu(tag->t_blocknr);
+					blocknr = read_tag_block(tag_bytes,
+								 tag);
 
 					/* If the block has been
 					 * revoked, then we're all done
@@ -494,7 +504,7 @@ static int do_one_pass(journal_t *journal,
 				}
 
 			skip_write:
-				tagp += sizeof(journal_block_tag_t);
+				tagp += tag_bytes;
 				if (!(flags & JBD2_FLAG_SAME_UUID))
 					tagp += 16;
 
@@ -572,17 +582,24 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
 {
 	jbd2_journal_revoke_header_t *header;
 	int offset, max;
+	int record_len = 4;
 
 	header = (jbd2_journal_revoke_header_t *) bh->b_data;
 	offset = sizeof(jbd2_journal_revoke_header_t);
 	max = be32_to_cpu(header->r_count);
 
-	while (offset < max) {
+	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
+		record_len = 8;
+
+	while (offset + record_len <= max) {
 		unsigned long blocknr;
 		int err;
 
-		blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
-		offset += 4;
+		if (record_len == 4)
+			blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
+		else
+			blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset)));
+		offset += record_len;
 		err = jbd2_journal_set_revoke(journal, blocknr, sequence);
 		if (err)
 			return err;
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 5820a0c5ad26..8aac875bd301 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -584,9 +584,17 @@ static void write_one_revoke_record(journal_t *journal,
 		*descriptorp = descriptor;
 	}
 
-	* ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) =
-		cpu_to_be32(record->blocknr);
-	offset += 4;
+	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) {
+		* ((__be64 *)(&jh2bh(descriptor)->b_data[offset])) =
+			cpu_to_be64(record->blocknr);
+		offset += 8;
+
+	} else {
+		* ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) =
+			cpu_to_be32(record->blocknr);
+		offset += 4;
+	}
+
 	*offsetp = offset;
 }
 
-- 
cgit v1.2.3


From 299717696d48531d70aeb4614c3939e4a28456c1 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Wed, 11 Oct 2006 01:21:09 -0700
Subject: [PATCH] jbd2: sector_t conversion

JBD layer in-kernel block varibles type fixes to support >32 bit block number
and convert to sector_t type.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd2/commit.c   |  2 +-
 fs/jbd2/journal.c  | 18 +++++++++---------
 fs/jbd2/recovery.c |  8 ++++----
 fs/jbd2/revoke.c   | 23 ++++++++++++-----------
 4 files changed, 26 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 44d68a113c73..1a9ce8885220 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -293,7 +293,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	int bufs;
 	int flags;
 	int err;
-	unsigned long blocknr;
+	sector_t blocknr;
 	char *tagp = NULL;
 	journal_header_t *header;
 	journal_block_tag_t *tag = NULL;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 926ebcbf8a7a..259e8365ea15 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -271,7 +271,7 @@ static void journal_kill_thread(journal_t *journal)
 int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
 				  struct journal_head  *jh_in,
 				  struct journal_head **jh_out,
-				  unsigned long blocknr)
+				  sector_t blocknr)
 {
 	int need_copy_out = 0;
 	int done_copy_out = 0;
@@ -555,7 +555,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
  * Log buffer allocation routines:
  */
 
-int jbd2_journal_next_log_block(journal_t *journal, unsigned long *retp)
+int jbd2_journal_next_log_block(journal_t *journal, sector_t *retp)
 {
 	unsigned long blocknr;
 
@@ -579,10 +579,10 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long *retp)
  * ready.
  */
 int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
-		 unsigned long *retp)
+		 sector_t *retp)
 {
 	int err = 0;
-	unsigned long ret;
+	sector_t ret;
 
 	if (journal->j_inode) {
 		ret = bmap(journal->j_inode, blocknr);
@@ -618,7 +618,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
 struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
 {
 	struct buffer_head *bh;
-	unsigned long blocknr;
+	sector_t blocknr;
 	int err;
 
 	err = jbd2_journal_next_log_block(journal, &blocknr);
@@ -706,7 +706,7 @@ fail:
  */
 journal_t * jbd2_journal_init_dev(struct block_device *bdev,
 			struct block_device *fs_dev,
-			int start, int len, int blocksize)
+			sector_t start, int len, int blocksize)
 {
 	journal_t *journal = journal_init_common();
 	struct buffer_head *bh;
@@ -753,7 +753,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
 	journal_t *journal = journal_init_common();
 	int err;
 	int n;
-	unsigned long blocknr;
+	sector_t blocknr;
 
 	if (!journal)
 		return NULL;
@@ -819,7 +819,7 @@ static void journal_fail_superblock (journal_t *journal)
 static int journal_reset(journal_t *journal)
 {
 	journal_superblock_t *sb = journal->j_superblock;
-	unsigned long first, last;
+	sector_t first, last;
 
 	first = be32_to_cpu(sb->s_first);
 	last = be32_to_cpu(sb->s_maxlen);
@@ -853,7 +853,7 @@ static int journal_reset(journal_t *journal)
  **/
 int jbd2_journal_create(journal_t *journal)
 {
-	unsigned long blocknr;
+	sector_t blocknr;
 	struct buffer_head *bh;
 	journal_superblock_t *sb;
 	int i, err;
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 2486843adda0..52054a83e717 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -70,7 +70,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
 {
 	int err;
 	unsigned int max, nbufs, next;
-	unsigned long blocknr;
+	sector_t blocknr;
 	struct buffer_head *bh;
 
 	struct buffer_head * bufs[MAXBUF];
@@ -132,7 +132,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
 		 unsigned int offset)
 {
 	int err;
-	unsigned long blocknr;
+	sector_t blocknr;
 	struct buffer_head *bh;
 
 	*bhp = NULL;
@@ -452,7 +452,7 @@ static int do_one_pass(journal_t *journal,
 						"block %ld in log\n",
 						err, io_block);
 				} else {
-					unsigned long blocknr;
+					sector_t blocknr;
 
 					J_ASSERT(obh != NULL);
 					blocknr = read_tag_block(tag_bytes,
@@ -592,7 +592,7 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
 		record_len = 8;
 
 	while (offset + record_len <= max) {
-		unsigned long blocknr;
+		sector_t blocknr;
 		int err;
 
 		if (record_len == 4)
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 8aac875bd301..3310a1d7ace9 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -81,7 +81,7 @@ struct jbd2_revoke_record_s
 {
 	struct list_head  hash;
 	tid_t		  sequence;	/* Used for recovery only */
-	unsigned long	  blocknr;
+	sector_t	  blocknr;
 };
 
 
@@ -106,17 +106,18 @@ static void flush_descriptor(journal_t *, struct journal_head *, int);
 /* Utility functions to maintain the revoke table */
 
 /* Borrowed from buffer.c: this is a tried and tested block hash function */
-static inline int hash(journal_t *journal, unsigned long block)
+static inline int hash(journal_t *journal, sector_t block)
 {
 	struct jbd2_revoke_table_s *table = journal->j_revoke;
 	int hash_shift = table->hash_shift;
+	int hash = (int)block ^ (int)((block >> 31) >> 1);
 
-	return ((block << (hash_shift - 6)) ^
-		(block >> 13) ^
-		(block << (hash_shift - 12))) & (table->hash_size - 1);
+	return ((hash << (hash_shift - 6)) ^
+		(hash >> 13) ^
+		(hash << (hash_shift - 12))) & (table->hash_size - 1);
 }
 
-static int insert_revoke_hash(journal_t *journal, unsigned long blocknr,
+static int insert_revoke_hash(journal_t *journal, sector_t blocknr,
 			      tid_t seq)
 {
 	struct list_head *hash_list;
@@ -146,7 +147,7 @@ oom:
 /* Find a revoke record in the journal's hash table. */
 
 static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal,
-						      unsigned long blocknr)
+						      sector_t blocknr)
 {
 	struct list_head *hash_list;
 	struct jbd2_revoke_record_s *record;
@@ -325,7 +326,7 @@ void jbd2_journal_destroy_revoke(journal_t *journal)
  * by one.
  */
 
-int jbd2_journal_revoke(handle_t *handle, unsigned long blocknr,
+int jbd2_journal_revoke(handle_t *handle, sector_t blocknr,
 		   struct buffer_head *bh_in)
 {
 	struct buffer_head *bh = NULL;
@@ -394,7 +395,7 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long blocknr,
 		}
 	}
 
-	jbd_debug(2, "insert revoke for block %lu, bh_in=%p\n", blocknr, bh_in);
+	jbd_debug(2, "insert revoke for block %llu, bh_in=%p\n",blocknr, bh_in);
 	err = insert_revoke_hash(journal, blocknr,
 				handle->h_transaction->t_tid);
 	BUFFER_TRACE(bh_in, "exit");
@@ -649,7 +650,7 @@ static void flush_descriptor(journal_t *journal,
  */
 
 int jbd2_journal_set_revoke(journal_t *journal,
-		       unsigned long blocknr,
+		       sector_t blocknr,
 		       tid_t sequence)
 {
 	struct jbd2_revoke_record_s *record;
@@ -673,7 +674,7 @@ int jbd2_journal_set_revoke(journal_t *journal,
  */
 
 int jbd2_journal_test_revoke(journal_t *journal,
-			unsigned long blocknr,
+			sector_t blocknr,
 			tid_t sequence)
 {
 	struct jbd2_revoke_record_s *record;
-- 
cgit v1.2.3


From a1ddeb7eaecea6a924e3a79aa386797020cb436f Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Wed, 11 Oct 2006 01:21:09 -0700
Subject: [PATCH] ext4: 48bit i_file_acl

As we are planning to support 48-bit block numbers for ext4, we need to
support 48-bit block numbers for extended attributes.  In the short term, we
can do this by reuse (on-disk) 16-bit padding (linux2.i_pad1 currently used
only by "hurd") as high order bits for xattr.  This patch basically does that.

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/inode.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2b81b1324a6f..9db8cff3baa4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2643,6 +2643,11 @@ void ext4_read_inode(struct inode * inode)
 	ei->i_frag_size = raw_inode->i_fsize;
 #endif
 	ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
+	if ((sizeof(sector_t) > 4) &&
+	    (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
+	     cpu_to_le32(EXT4_OS_HURD)))
+		ei->i_file_acl |=
+			((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
 	if (!S_ISREG(inode->i_mode)) {
 		ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
 	} else {
@@ -2776,6 +2781,11 @@ static int ext4_do_update_inode(handle_t *handle,
 	raw_inode->i_frag = ei->i_frag_no;
 	raw_inode->i_fsize = ei->i_frag_size;
 #endif
+	if ((sizeof(sector_t) > 4) &&
+	    (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
+	     cpu_to_le32(EXT4_OS_HURD)))
+		raw_inode->i_file_acl_high =
+			cpu_to_le16(ei->i_file_acl >> 32);
 	raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl);
 	if (!S_ISREG(inode->i_mode)) {
 		raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
-- 
cgit v1.2.3


From bd81d8eec043094d3ff729a8ff6d5b3a06d3c4b1 Mon Sep 17 00:00:00 2001
From: Laurent Vivier <Laurent.Vivier@bull.net>
Date: Wed, 11 Oct 2006 01:21:10 -0700
Subject: [PATCH] ext4: 64bit metadata

In-kernel super block changes to support >32 bit free blocks numbers.

Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Alexandre Ratchov <alexandre.ratchov@bull.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c | 50 ++++++++++++++---------------
 fs/ext4/ialloc.c |  8 ++---
 fs/ext4/inode.c  |  6 ++--
 fs/ext4/resize.c | 52 ++++++++++++++++--------------
 fs/ext4/super.c  | 96 +++++++++++++++++++++++++++++++++++++++-----------------
 5 files changed, 128 insertions(+), 84 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index aa33ff271fa9..6887151ccc47 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -99,12 +99,13 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
 	desc = ext4_get_group_desc (sb, block_group, NULL);
 	if (!desc)
 		goto error_out;
-	bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
+	bh = sb_bread(sb, ext4_block_bitmap(desc));
 	if (!bh)
 		ext4_error (sb, "read_block_bitmap",
 			    "Cannot read block bitmap - "
-			    "block_group = %d, block_bitmap = %u",
-			    block_group, le32_to_cpu(desc->bg_block_bitmap));
+			    "block_group = %d, block_bitmap = "E3FSBLK,
+			    block_group,
+			    ext4_block_bitmap(desc));
 error_out:
 	return bh;
 }
@@ -432,14 +433,14 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
 	es = sbi->s_es;
 	if (block < le32_to_cpu(es->s_first_data_block) ||
 	    block + count < block ||
-	    block + count > le32_to_cpu(es->s_blocks_count)) {
+	    block + count > ext4_blocks_count(es)) {
 		ext4_error (sb, "ext4_free_blocks",
 			    "Freeing blocks not in datazone - "
 			    "block = "E3FSBLK", count = %lu", block, count);
 		goto error_return;
 	}
 
-	ext4_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
+	ext4_debug ("freeing block(s) %llu-%llu\n", block, block + count - 1);
 
 do_more:
 	overflow = 0;
@@ -460,12 +461,11 @@ do_more:
 	if (!desc)
 		goto error_return;
 
-	if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) ||
-	    in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) ||
-	    in_range (block, le32_to_cpu(desc->bg_inode_table),
-		      sbi->s_itb_per_group) ||
-	    in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table),
-		      sbi->s_itb_per_group))
+	if (in_range(ext4_block_bitmap(desc), block, count) ||
+	    in_range(ext4_inode_bitmap(desc), block, count) ||
+	    in_range(block, ext4_inode_table(desc), sbi->s_itb_per_group) ||
+	    in_range(block + count - 1, ext4_inode_table(desc),
+		     sbi->s_itb_per_group))
 		ext4_error (sb, "ext4_free_blocks",
 			    "Freeing blocks in system zones - "
 			    "Block = "E3FSBLK", count = %lu",
@@ -552,8 +552,8 @@ do_more:
 						bit + i, bitmap_bh->b_data)) {
 			jbd_unlock_bh_state(bitmap_bh);
 			ext4_error(sb, __FUNCTION__,
-				"bit already cleared for block "E3FSBLK,
-				 block + i);
+				   "bit already cleared for block "E3FSBLK,
+				   (ext4_fsblk_t)(block + i));
 			jbd_lock_bh_state(bitmap_bh);
 			BUFFER_TRACE(bitmap_bh, "bit already cleared");
 		} else {
@@ -1351,7 +1351,7 @@ static int ext4_has_free_blocks(struct ext4_sb_info *sbi)
 	ext4_fsblk_t free_blocks, root_blocks;
 
 	free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
-	root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
+	root_blocks = ext4_r_blocks_count(sbi->s_es);
 	if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
 		sbi->s_resuid != current->fsuid &&
 		(sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
@@ -1462,7 +1462,7 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
 	 * First, test whether the goal block is free.
 	 */
 	if (goal < le32_to_cpu(es->s_first_data_block) ||
-	    goal >= le32_to_cpu(es->s_blocks_count))
+	    goal >= ext4_blocks_count(es))
 		goal = le32_to_cpu(es->s_first_data_block);
 	ext4_get_group_no_and_offset(sb, goal, &group_no, &grp_target_blk);
 	goal_group = group_no;
@@ -1561,12 +1561,12 @@ allocated:
 
 	ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no);
 
-	if (in_range(le32_to_cpu(gdp->bg_block_bitmap), ret_block, num) ||
-	    in_range(le32_to_cpu(gdp->bg_inode_bitmap), ret_block, num) ||
-	    in_range(ret_block, le32_to_cpu(gdp->bg_inode_table),
-		      EXT4_SB(sb)->s_itb_per_group) ||
-	    in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table),
-		      EXT4_SB(sb)->s_itb_per_group))
+	if (in_range(ext4_block_bitmap(gdp), ret_block, num) ||
+	    in_range(ext4_block_bitmap(gdp), ret_block, num) ||
+	    in_range(ret_block, ext4_inode_table(gdp),
+		     EXT4_SB(sb)->s_itb_per_group) ||
+	    in_range(ret_block + num - 1, ext4_inode_table(gdp),
+		     EXT4_SB(sb)->s_itb_per_group))
 		ext4_error(sb, "ext4_new_block",
 			    "Allocating block in system zone - "
 			    "blocks from "E3FSBLK", length %lu",
@@ -1604,11 +1604,11 @@ allocated:
 	jbd_unlock_bh_state(bitmap_bh);
 #endif
 
-	if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) {
+	if (ret_block + num - 1 >= ext4_blocks_count(es)) {
 		ext4_error(sb, "ext4_new_block",
-			    "block("E3FSBLK") >= blocks count(%d) - "
+			    "block("E3FSBLK") >= blocks count("E3FSBLK") - "
 			    "block_group = %lu, es == %p ", ret_block,
-			le32_to_cpu(es->s_blocks_count), group_no, es);
+			ext4_blocks_count(es), group_no, es);
 		goto out;
 	}
 
@@ -1707,7 +1707,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
 	brelse(bitmap_bh);
 	printk("ext4_count_free_blocks: stored = "E3FSBLK
 		", computed = "E3FSBLK", "E3FSBLK"\n",
-	       le32_to_cpu(es->s_free_blocks_count),
+	       EXT4_FREE_BLOCKS_COUNT(es),
 		desc_count, bitmap_count);
 	return bitmap_count;
 #else
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 94e1bb4abe31..959b7fa8f5db 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -60,12 +60,12 @@ read_inode_bitmap(struct super_block * sb, unsigned long block_group)
 	if (!desc)
 		goto error_out;
 
-	bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
+	bh = sb_bread(sb, ext4_inode_bitmap(desc));
 	if (!bh)
 		ext4_error(sb, "read_inode_bitmap",
 			    "Cannot read inode bitmap - "
-			    "block_group = %lu, inode_bitmap = %u",
-			    block_group, le32_to_cpu(desc->bg_inode_bitmap));
+			    "block_group = %lu, inode_bitmap = %llu",
+			    block_group, ext4_inode_bitmap(desc));
 error_out:
 	return bh;
 }
@@ -304,7 +304,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
 		goto fallback;
 	}
 
-	blocks_per_dir = le32_to_cpu(es->s_blocks_count) - freeb;
+	blocks_per_dir = ext4_blocks_count(es) - freeb;
 	sector_div(blocks_per_dir, ndirs);
 
 	max_dirs = ndirs / ngroups + inodes_per_group / 16;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9db8cff3baa4..effc38afebe3 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2438,8 +2438,8 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
 	 */
 	offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) *
 		EXT4_INODE_SIZE(sb);
-	block = le32_to_cpu(gdp[desc].bg_inode_table) +
-		(offset >> EXT4_BLOCK_SIZE_BITS(sb));
+	block = ext4_inode_table(gdp + desc) +
+			(offset >> EXT4_BLOCK_SIZE_BITS(sb));
 
 	iloc->block_group = block_group;
 	iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1);
@@ -2506,7 +2506,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
 				goto make_io;
 
 			bitmap_bh = sb_getblk(inode->i_sb,
-					le32_to_cpu(desc->bg_inode_bitmap));
+				ext4_inode_bitmap(desc));
 			if (!bitmap_bh)
 				goto make_io;
 
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index c60bfed5f5e7..3dbf91b82202 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -27,7 +27,7 @@ static int verify_group_input(struct super_block *sb,
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_super_block *es = sbi->s_es;
-	ext4_fsblk_t start = le32_to_cpu(es->s_blocks_count);
+	ext4_fsblk_t start = ext4_blocks_count(es);
 	ext4_fsblk_t end = start + input->blocks_count;
 	unsigned group = input->group;
 	ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
@@ -68,43 +68,43 @@ static int verify_group_input(struct super_block *sb,
 			     end - 1);
 	else if (outside(input->block_bitmap, start, end))
 		ext4_warning(sb, __FUNCTION__,
-			     "Block bitmap not in group (block %u)",
+			     "Block bitmap not in group (block %llu)",
 			     input->block_bitmap);
 	else if (outside(input->inode_bitmap, start, end))
 		ext4_warning(sb, __FUNCTION__,
-			     "Inode bitmap not in group (block %u)",
+			     "Inode bitmap not in group (block %llu)",
 			     input->inode_bitmap);
 	else if (outside(input->inode_table, start, end) ||
 	         outside(itend - 1, start, end))
 		ext4_warning(sb, __FUNCTION__,
-			     "Inode table not in group (blocks %u-"E3FSBLK")",
+			     "Inode table not in group (blocks %llu-%llu)",
 			     input->inode_table, itend - 1);
 	else if (input->inode_bitmap == input->block_bitmap)
 		ext4_warning(sb, __FUNCTION__,
-			     "Block bitmap same as inode bitmap (%u)",
+			     "Block bitmap same as inode bitmap (%llu)",
 			     input->block_bitmap);
 	else if (inside(input->block_bitmap, input->inode_table, itend))
 		ext4_warning(sb, __FUNCTION__,
-			     "Block bitmap (%u) in inode table (%u-"E3FSBLK")",
+			     "Block bitmap (%llu) in inode table (%llu-%llu)",
 			     input->block_bitmap, input->inode_table, itend-1);
 	else if (inside(input->inode_bitmap, input->inode_table, itend))
 		ext4_warning(sb, __FUNCTION__,
-			     "Inode bitmap (%u) in inode table (%u-"E3FSBLK")",
+			     "Inode bitmap (%llu) in inode table (%llu-%llu)",
 			     input->inode_bitmap, input->inode_table, itend-1);
 	else if (inside(input->block_bitmap, start, metaend))
 		ext4_warning(sb, __FUNCTION__,
-			     "Block bitmap (%u) in GDT table"
+			     "Block bitmap (%llu) in GDT table"
 			     " ("E3FSBLK"-"E3FSBLK")",
 			     input->block_bitmap, start, metaend - 1);
 	else if (inside(input->inode_bitmap, start, metaend))
 		ext4_warning(sb, __FUNCTION__,
-			     "Inode bitmap (%u) in GDT table"
+			     "Inode bitmap (%llu) in GDT table"
 			     " ("E3FSBLK"-"E3FSBLK")",
 			     input->inode_bitmap, start, metaend - 1);
 	else if (inside(input->inode_table, start, metaend) ||
 	         inside(itend - 1, start, metaend))
 		ext4_warning(sb, __FUNCTION__,
-			     "Inode table (%u-"E3FSBLK") overlaps"
+			     "Inode table ("E3FSBLK"-"E3FSBLK") overlaps"
 			     "GDT table ("E3FSBLK"-"E3FSBLK")",
 			     input->inode_table, itend - 1, start, metaend - 1);
 	else
@@ -286,6 +286,7 @@ exit_journal:
 	return err;
 }
 
+
 /*
  * Iterate through the groups which hold BACKUP superblock/GDT copies in an
  * ext4 filesystem.  The counters should be initialized to 1, 5, and 7 before
@@ -340,12 +341,15 @@ static int verify_reserved_gdb(struct super_block *sb,
 	int gdbackups = 0;
 
 	while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) {
-		if (le32_to_cpu(*p++) != grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
+		if (le32_to_cpu(*p++) !=
+		    grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
 			ext4_warning(sb, __FUNCTION__,
 				     "reserved GDT "E3FSBLK
 				     " missing grp %d ("E3FSBLK")",
 				     blk, grp,
-				     grp * EXT4_BLOCKS_PER_GROUP(sb) + blk);
+				     grp *
+				     (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) +
+				     blk);
 			return -EINVAL;
 		}
 		if (++gdbackups > EXT4_ADDR_PER_BLOCK(sb))
@@ -731,8 +735,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 		return -EPERM;
 	}
 
-	if (le32_to_cpu(es->s_blocks_count) + input->blocks_count <
-	    le32_to_cpu(es->s_blocks_count)) {
+	if (ext4_blocks_count(es) + input->blocks_count <
+	    ext4_blocks_count(es)) {
 		ext4_warning(sb, __FUNCTION__, "blocks_count overflow\n");
 		return -EINVAL;
 	}
@@ -830,9 +834,9 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 	/* Update group descriptor block for new group */
 	gdp = (struct ext4_group_desc *)primary->b_data + gdb_off;
 
-	gdp->bg_block_bitmap = cpu_to_le32(input->block_bitmap);
-	gdp->bg_inode_bitmap = cpu_to_le32(input->inode_bitmap);
-	gdp->bg_inode_table = cpu_to_le32(input->inode_table);
+	ext4_block_bitmap_set(gdp, input->block_bitmap); /* LV FIXME */
+	ext4_inode_bitmap_set(gdp, input->inode_bitmap); /* LV FIXME */
+	ext4_inode_table_set(gdp, input->inode_table); /* LV FIXME */
 	gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
 	gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
 
@@ -846,7 +850,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 	 * blocks/inodes before the group is live won't actually let us
 	 * allocate the new space yet.
 	 */
-	es->s_blocks_count = cpu_to_le32(le32_to_cpu(es->s_blocks_count) +
+	ext4_blocks_count_set(es, ext4_blocks_count(es) +
 		input->blocks_count);
 	es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) +
 		EXT4_INODES_PER_GROUP(sb));
@@ -882,7 +886,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 
 	/* Update the reserved block counts only once the new group is
 	 * active. */
-	es->s_r_blocks_count = cpu_to_le32(le32_to_cpu(es->s_r_blocks_count) +
+	ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) +
 		input->reserved_blocks);
 
 	/* Update the free space counts */
@@ -933,7 +937,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 	/* We don't need to worry about locking wrt other resizers just
 	 * yet: we're going to revalidate es->s_blocks_count after
 	 * taking lock_super() below. */
-	o_blocks_count = le32_to_cpu(es->s_blocks_count);
+	o_blocks_count = ext4_blocks_count(es);
 	o_groups_count = EXT4_SB(sb)->s_groups_count;
 
 	if (test_opt(sb, DEBUG))
@@ -1004,7 +1008,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 	}
 
 	lock_super(sb);
-	if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
+	if (o_blocks_count != ext4_blocks_count(es)) {
 		ext4_warning(sb, __FUNCTION__,
 			     "multiple resizers run on filesystem!");
 		unlock_super(sb);
@@ -1020,7 +1024,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 		ext4_journal_stop(handle);
 		goto exit_put;
 	}
-	es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
+	ext4_blocks_count_set(es, o_blocks_count + add);
 	ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
 	sb->s_dirt = 1;
 	unlock_super(sb);
@@ -1032,8 +1036,8 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 	if ((err = ext4_journal_stop(handle)))
 		goto exit_put;
 	if (test_opt(sb, DEBUG))
-		printk(KERN_DEBUG "EXT4-fs: extended group to %u blocks\n",
-		       le32_to_cpu(es->s_blocks_count));
+		printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n",
+		       ext4_blocks_count(es));
 	update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es,
 		       sizeof(struct ext4_super_block));
 exit_put:
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1d12e4f7d69f..b91dffd7a031 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -62,6 +62,43 @@ static void ext4_unlockfs(struct super_block *sb);
 static void ext4_write_super (struct super_block * sb);
 static void ext4_write_super_lockfs(struct super_block *sb);
 
+
+ext4_fsblk_t ext4_block_bitmap(struct ext4_group_desc *bg)
+{
+	return le32_to_cpu(bg->bg_block_bitmap) |
+		((ext4_fsblk_t)le16_to_cpu(bg->bg_block_bitmap_hi) << 32);
+}
+
+ext4_fsblk_t ext4_inode_bitmap(struct ext4_group_desc *bg)
+{
+	return le32_to_cpu(bg->bg_inode_bitmap) |
+		((ext4_fsblk_t)le16_to_cpu(bg->bg_inode_bitmap_hi) << 32);
+}
+
+ext4_fsblk_t ext4_inode_table(struct ext4_group_desc *bg)
+{
+	return le32_to_cpu(bg->bg_inode_table) |
+		((ext4_fsblk_t)le16_to_cpu(bg->bg_inode_table_hi) << 32);
+}
+
+void ext4_block_bitmap_set(struct ext4_group_desc *bg, ext4_fsblk_t blk)
+{
+	bg->bg_block_bitmap = cpu_to_le32((u32)blk);
+	bg->bg_block_bitmap_hi = cpu_to_le16(blk >> 32);
+}
+
+void ext4_inode_bitmap_set(struct ext4_group_desc *bg, ext4_fsblk_t blk)
+{
+	bg->bg_inode_bitmap  = cpu_to_le32((u32)blk);
+	bg->bg_inode_bitmap_hi = cpu_to_le16(blk >> 32);
+}
+
+void ext4_inode_table_set(struct ext4_group_desc *bg, ext4_fsblk_t blk)
+{
+	bg->bg_inode_table = cpu_to_le32((u32)blk);
+	bg->bg_inode_table_hi = cpu_to_le16(blk >> 32);
+}
+
 /*
  * Wrappers for jbd2_journal_start/end.
  *
@@ -1182,6 +1219,9 @@ static int ext4_check_descriptors (struct super_block * sb)
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
 	ext4_fsblk_t last_block;
+	ext4_fsblk_t block_bitmap;
+	ext4_fsblk_t inode_bitmap;
+	ext4_fsblk_t inode_table;
 	struct ext4_group_desc * gdp = NULL;
 	int desc_block = 0;
 	int i;
@@ -1191,7 +1231,7 @@ static int ext4_check_descriptors (struct super_block * sb)
 	for (i = 0; i < sbi->s_groups_count; i++)
 	{
 		if (i == sbi->s_groups_count - 1)
-			last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
+			last_block = ext4_blocks_count(sbi->s_es) - 1;
 		else
 			last_block = first_block +
 				(EXT4_BLOCKS_PER_GROUP(sb) - 1);
@@ -1199,42 +1239,39 @@ static int ext4_check_descriptors (struct super_block * sb)
 		if ((i % EXT4_DESC_PER_BLOCK(sb)) == 0)
 			gdp = (struct ext4_group_desc *)
 					sbi->s_group_desc[desc_block++]->b_data;
-		if (le32_to_cpu(gdp->bg_block_bitmap) < first_block ||
-		    le32_to_cpu(gdp->bg_block_bitmap) > last_block)
+		block_bitmap = ext4_block_bitmap(gdp);
+		if (block_bitmap < first_block || block_bitmap > last_block)
 		{
 			ext4_error (sb, "ext4_check_descriptors",
 				    "Block bitmap for group %d"
-				    " not in group (block %lu)!",
-				    i, (unsigned long)
-					le32_to_cpu(gdp->bg_block_bitmap));
+				    " not in group (block "E3FSBLK")!",
+				    i, block_bitmap);
 			return 0;
 		}
-		if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block ||
-		    le32_to_cpu(gdp->bg_inode_bitmap) > last_block)
+		inode_bitmap = ext4_inode_bitmap(gdp);
+		if (inode_bitmap < first_block || inode_bitmap > last_block)
 		{
 			ext4_error (sb, "ext4_check_descriptors",
 				    "Inode bitmap for group %d"
-				    " not in group (block %lu)!",
-				    i, (unsigned long)
-					le32_to_cpu(gdp->bg_inode_bitmap));
+				    " not in group (block "E3FSBLK")!",
+				    i, inode_bitmap);
 			return 0;
 		}
-		if (le32_to_cpu(gdp->bg_inode_table) < first_block ||
-		    le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >
-		    last_block)
+		inode_table = ext4_inode_table(gdp);
+		if (inode_table < first_block ||
+		    inode_table + sbi->s_itb_per_group > last_block)
 		{
 			ext4_error (sb, "ext4_check_descriptors",
 				    "Inode table for group %d"
-				    " not in group (block %lu)!",
-				    i, (unsigned long)
-					le32_to_cpu(gdp->bg_inode_table));
+				    " not in group (block "E3FSBLK")!",
+				    i, inode_table);
 			return 0;
 		}
 		first_block += EXT4_BLOCKS_PER_GROUP(sb);
 		gdp++;
 	}
 
-	sbi->s_es->s_free_blocks_count=cpu_to_le32(ext4_count_free_blocks(sb));
+	ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
 	sbi->s_es->s_free_inodes_count=cpu_to_le32(ext4_count_free_inodes(sb));
 	return 1;
 }
@@ -1411,6 +1448,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	int i;
 	int needs_recovery;
 	__le32 features;
+	__u64 blocks_count;
 
 	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
 	if (!sbi)
@@ -1620,7 +1658,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 		goto failed_mount;
 	}
 
-	if (le32_to_cpu(es->s_blocks_count) >
+	if (ext4_blocks_count(es) >
 		    (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
 		printk(KERN_ERR "EXT4-fs: filesystem on %s:"
 			" too large to mount safely\n", sb->s_id);
@@ -1632,9 +1670,11 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 
 	if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
 		goto cantfind_ext4;
-	sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
-			       le32_to_cpu(es->s_first_data_block) - 1)
-				       / EXT4_BLOCKS_PER_GROUP(sb)) + 1;
+	blocks_count = (ext4_blocks_count(es) -
+			le32_to_cpu(es->s_first_data_block) +
+			EXT4_BLOCKS_PER_GROUP(sb) - 1);
+	do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
+	sbi->s_groups_count = blocks_count;
 	db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
 		   EXT4_DESC_PER_BLOCK(sb);
 	sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
@@ -1949,7 +1989,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
 		goto out_bdev;
 	}
 
-	len = le32_to_cpu(es->s_blocks_count);
+	len = ext4_blocks_count(es);
 	start = sb_block + 1;
 	brelse(bh);	/* we're done with the superblock */
 
@@ -2119,7 +2159,7 @@ static void ext4_commit_super (struct super_block * sb,
 	if (!sbh)
 		return;
 	es->s_wtime = cpu_to_le32(get_seconds());
-	es->s_free_blocks_count = cpu_to_le32(ext4_count_free_blocks(sb));
+	ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb));
 	es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
 	BUFFER_TRACE(sbh, "marking dirty");
 	mark_buffer_dirty(sbh);
@@ -2312,7 +2352,7 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
 	ext4_init_journal_params(sb, sbi->s_journal);
 
 	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
-		n_blocks_count > le32_to_cpu(es->s_blocks_count)) {
+		n_blocks_count > ext4_blocks_count(es)) {
 		if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) {
 			err = -EROFS;
 			goto restore_opts;
@@ -2431,10 +2471,10 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
 
 	buf->f_type = EXT4_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
-	buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead;
+	buf->f_blocks = ext4_blocks_count(es) - overhead;
 	buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter);
-	buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
-	if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
+	buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
+	if (buf->f_bfree < ext4_r_blocks_count(es))
 		buf->f_bavail = 0;
 	buf->f_files = le32_to_cpu(es->s_inodes_count);
 	buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter);
-- 
cgit v1.2.3


From 2ae0210760aed9d626eaede5b63db95e198f7c8e Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Wed, 11 Oct 2006 01:21:11 -0700
Subject: [PATCH] ext4: blk_type from sector_t to unsigned long long

Change ext4 in-kernel block type (ext4_fsblk_t) from sector_t to unsigned
long long.  Remove ext4 block type string micro E3FSBLK, replaced with "%llu"

[akpm@osdl.org: build fix]
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c  | 18 +++++++++---------
 fs/ext4/extents.c | 38 +++++++++++++++++++-------------------
 fs/ext4/inode.c   |  6 +++---
 fs/ext4/resize.c  | 28 ++++++++++++++--------------
 fs/ext4/super.c   |  6 +++---
 fs/ext4/xattr.c   | 12 ++++++------
 6 files changed, 54 insertions(+), 54 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 6887151ccc47..df77ea891f29 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -103,7 +103,7 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
 	if (!bh)
 		ext4_error (sb, "read_block_bitmap",
 			    "Cannot read block bitmap - "
-			    "block_group = %d, block_bitmap = "E3FSBLK,
+			    "block_group = %d, block_bitmap = %llu",
 			    block_group,
 			    ext4_block_bitmap(desc));
 error_out:
@@ -148,7 +148,7 @@ restart:
 		rsv = list_entry(n, struct ext4_reserve_window_node, rsv_node);
 		if (verbose)
 			printk("reservation window 0x%p "
-			       "start:  "E3FSBLK", end:  "E3FSBLK"\n",
+			       "start:  %llu, end:  %llu\n",
 			       rsv, rsv->rsv_start, rsv->rsv_end);
 		if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) {
 			printk("Bad reservation %p (start >= end)\n",
@@ -436,7 +436,7 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
 	    block + count > ext4_blocks_count(es)) {
 		ext4_error (sb, "ext4_free_blocks",
 			    "Freeing blocks not in datazone - "
-			    "block = "E3FSBLK", count = %lu", block, count);
+			    "block = %llu, count = %lu", block, count);
 		goto error_return;
 	}
 
@@ -468,7 +468,7 @@ do_more:
 		     sbi->s_itb_per_group))
 		ext4_error (sb, "ext4_free_blocks",
 			    "Freeing blocks in system zones - "
-			    "Block = "E3FSBLK", count = %lu",
+			    "Block = %llu, count = %lu",
 			    block, count);
 
 	/*
@@ -552,7 +552,7 @@ do_more:
 						bit + i, bitmap_bh->b_data)) {
 			jbd_unlock_bh_state(bitmap_bh);
 			ext4_error(sb, __FUNCTION__,
-				   "bit already cleared for block "E3FSBLK,
+				   "bit already cleared for block %llu",
 				   (ext4_fsblk_t)(block + i));
 			jbd_lock_bh_state(bitmap_bh);
 			BUFFER_TRACE(bitmap_bh, "bit already cleared");
@@ -1569,7 +1569,7 @@ allocated:
 		     EXT4_SB(sb)->s_itb_per_group))
 		ext4_error(sb, "ext4_new_block",
 			    "Allocating block in system zone - "
-			    "blocks from "E3FSBLK", length %lu",
+			    "blocks from %llu, length %lu",
 			     ret_block, num);
 
 	performed_allocation = 1;
@@ -1606,7 +1606,7 @@ allocated:
 
 	if (ret_block + num - 1 >= ext4_blocks_count(es)) {
 		ext4_error(sb, "ext4_new_block",
-			    "block("E3FSBLK") >= blocks count("E3FSBLK") - "
+			    "block(%llu) >= blocks count(%llu) - "
 			    "block_group = %lu, es == %p ", ret_block,
 			ext4_blocks_count(es), group_no, es);
 		goto out;
@@ -1705,8 +1705,8 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
 		bitmap_count += x;
 	}
 	brelse(bitmap_bh);
-	printk("ext4_count_free_blocks: stored = "E3FSBLK
-		", computed = "E3FSBLK", "E3FSBLK"\n",
+	printk("ext4_count_free_blocks: stored = %llu"
+		", computed = %llu, %llu\n",
 	       EXT4_FREE_BLOCKS_COUNT(es),
 		desc_count, bitmap_count);
 	return bitmap_count;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e06e937a52b8..f72b7567bfa2 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -281,10 +281,10 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
 	ext_debug("path:");
 	for (k = 0; k <= l; k++, path++) {
 		if (path->p_idx) {
-		  ext_debug("  %d->"E3FSBLK, le32_to_cpu(path->p_idx->ei_block),
+		  ext_debug("  %d->%llu", le32_to_cpu(path->p_idx->ei_block),
 			    idx_pblock(path->p_idx));
 		} else if (path->p_ext) {
-			ext_debug("  %d:%d:"E3FSBLK" ",
+			ext_debug("  %d:%d:%llu ",
 				  le32_to_cpu(path->p_ext->ee_block),
 				  le16_to_cpu(path->p_ext->ee_len),
 				  ext_pblock(path->p_ext));
@@ -308,7 +308,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
 	ex = EXT_FIRST_EXTENT(eh);
 
 	for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
-		ext_debug("%d:%d:"E3FSBLK" ", le32_to_cpu(ex->ee_block),
+		ext_debug("%d:%d:%llu ", le32_to_cpu(ex->ee_block),
 			  le16_to_cpu(ex->ee_len), ext_pblock(ex));
 	}
 	ext_debug("\n");
@@ -426,7 +426,7 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
 	}
 
 	path->p_ext = l - 1;
-	ext_debug("  -> %d:"E3FSBLK":%d ",
+	ext_debug("  -> %d:%llu:%d ",
 		        le32_to_cpu(path->p_ext->ee_block),
 		        ext_pblock(path->p_ext),
 			le16_to_cpu(path->p_ext->ee_len));
@@ -687,7 +687,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	path[depth].p_ext++;
 	while (path[depth].p_ext <=
 			EXT_MAX_EXTENT(path[depth].p_hdr)) {
-		ext_debug("move %d:"E3FSBLK":%d in new leaf "E3FSBLK"\n",
+		ext_debug("move %d:%llu:%d in new leaf %llu\n",
 			        le32_to_cpu(path[depth].p_ext->ee_block),
 			        ext_pblock(path[depth].p_ext),
 			        le16_to_cpu(path[depth].p_ext->ee_len),
@@ -752,7 +752,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 		fidx->ei_block = border;
 		ext4_idx_store_pblock(fidx, oldblock);
 
-		ext_debug("int.index at %d (block "E3FSBLK"): %lu -> "E3FSBLK"\n", i,
+		ext_debug("int.index at %d (block %llu): %lu -> %llu\n", i,
 				newblock, (unsigned long) le32_to_cpu(border),
 				oldblock);
 		/* copy indexes */
@@ -764,7 +764,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 		BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) !=
 				EXT_LAST_INDEX(path[i].p_hdr));
 		while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
-			ext_debug("%d: move %d:%d in new index "E3FSBLK"\n", i,
+			ext_debug("%d: move %d:%d in new index %llu\n", i,
 				        le32_to_cpu(path[i].p_idx->ei_block),
 				        idx_pblock(path[i].p_idx),
 				        newblock);
@@ -898,7 +898,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
 
 	neh = ext_inode_hdr(inode);
 	fidx = EXT_FIRST_INDEX(neh);
-	ext_debug("new root: num %d(%d), lblock %d, ptr "E3FSBLK"\n",
+	ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
 		  le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
 		  le32_to_cpu(fidx->ei_block), idx_pblock(fidx));
 
@@ -1145,7 +1145,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 
 	/* try to insert block into found extent and return */
 	if (ex && ext4_can_extents_be_merged(inode, ex, newext)) {
-		ext_debug("append %d block to %d:%d (from "E3FSBLK")\n",
+		ext_debug("append %d block to %d:%d (from %llu)\n",
 				le16_to_cpu(newext->ee_len),
 				le32_to_cpu(ex->ee_block),
 				le16_to_cpu(ex->ee_len), ext_pblock(ex));
@@ -1204,7 +1204,7 @@ has_space:
 
 	if (!nearex) {
 		/* there is no extent in this leaf, create first one */
-		ext_debug("first extent in the leaf: %d:"E3FSBLK":%d\n",
+		ext_debug("first extent in the leaf: %d:%llu:%d\n",
 			        le32_to_cpu(newext->ee_block),
 			        ext_pblock(newext),
 			        le16_to_cpu(newext->ee_len));
@@ -1216,7 +1216,7 @@ has_space:
 			len = EXT_MAX_EXTENT(eh) - nearex;
 			len = (len - 1) * sizeof(struct ext4_extent);
 			len = len < 0 ? 0 : len;
-			ext_debug("insert %d:"E3FSBLK":%d after: nearest 0x%p, "
+			ext_debug("insert %d:%llu:%d after: nearest 0x%p, "
 					"move %d from 0x%p to 0x%p\n",
 				        le32_to_cpu(newext->ee_block),
 				        ext_pblock(newext),
@@ -1229,7 +1229,7 @@ has_space:
 		BUG_ON(newext->ee_block == nearex->ee_block);
 		len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent);
 		len = len < 0 ? 0 : len;
-		ext_debug("insert %d:"E3FSBLK":%d before: nearest 0x%p, "
+		ext_debug("insert %d:%llu:%d before: nearest 0x%p, "
 				"move %d from 0x%p to 0x%p\n",
 				le32_to_cpu(newext->ee_block),
 				ext_pblock(newext),
@@ -1464,7 +1464,7 @@ ext4_ext_in_cache(struct inode *inode, unsigned long block,
 	        ex->ee_block = cpu_to_le32(cex->ec_block);
 		ext4_ext_store_pblock(ex, cex->ec_start);
 	        ex->ee_len = cpu_to_le16(cex->ec_len);
-		ext_debug("%lu cached by %lu:%lu:"E3FSBLK"\n",
+		ext_debug("%lu cached by %lu:%lu:%llu\n",
 				(unsigned long) block,
 				(unsigned long) cex->ec_block,
 				(unsigned long) cex->ec_len,
@@ -1498,7 +1498,7 @@ int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 	path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1);
 	if ((err = ext4_ext_dirty(handle, inode, path)))
 		return err;
-	ext_debug("index is empty, remove it, free block "E3FSBLK"\n", leaf);
+	ext_debug("index is empty, remove it, free block %llu\n", leaf);
 	bh = sb_find_get_block(inode->i_sb, leaf);
 	ext4_forget(handle, 1, inode, bh, leaf);
 	ext4_free_blocks(handle, inode, leaf, 1);
@@ -1585,7 +1585,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
 		ext4_fsblk_t start;
 		num = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - from;
 		start = ext_pblock(ex) + le16_to_cpu(ex->ee_len) - num;
-		ext_debug("free last %lu blocks starting "E3FSBLK"\n", num, start);
+		ext_debug("free last %lu blocks starting %llu\n", num, start);
 		for (i = 0; i < num; i++) {
 			bh = sb_find_get_block(inode->i_sb, start + i);
 			ext4_forget(handle, 0, inode, bh, start + i);
@@ -1699,7 +1699,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 		if (err)
 			goto out;
 
-		ext_debug("new extent: %u:%u:"E3FSBLK"\n", block, num,
+		ext_debug("new extent: %u:%u:%llu\n", block, num,
 				ext_pblock(ex));
 		ex--;
 		ex_ee_block = le32_to_cpu(ex->ee_block);
@@ -1816,7 +1816,7 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start)
 				path[i].p_idx);
 		if (ext4_ext_more_to_rm(path + i)) {
 			/* go to the next level */
-			ext_debug("move to level %d (block "E3FSBLK")\n",
+			ext_debug("move to level %d (block %llu)\n",
 				  i + 1, idx_pblock(path[i].p_idx));
 			memset(path + i + 1, 0, sizeof(*path));
 			path[i+1].p_bh =
@@ -1993,7 +1993,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 			newblock = iblock - ee_block + ee_start;
 			/* number of remaining blocks in the extent */
 			allocated = ee_len - (iblock - ee_block);
-			ext_debug("%d fit into %lu:%d -> "E3FSBLK"\n", (int) iblock,
+			ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock,
 					ee_block, ee_len, newblock);
 			ext4_ext_put_in_cache(inode, ee_block, ee_len,
 						ee_start, EXT4_EXT_CACHE_EXTENT);
@@ -2024,7 +2024,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 	newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err);
 	if (!newblock)
 		goto out2;
-	ext_debug("allocate new block: goal "E3FSBLK", found "E3FSBLK"/%lu\n",
+	ext_debug("allocate new block: goal %llu, found %llu/%lu\n",
 			goal, newblock, allocated);
 
 	/* try to insert new extent into found leaf and return */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index effc38afebe3..99b82b52b5f0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2115,7 +2115,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
 			 */
 			if (!bh) {
 				ext4_error(inode->i_sb, "ext4_free_branches",
-					   "Read failure, inode=%lu, block="E3FSBLK,
+					   "Read failure, inode=%lu, block=%llu",
 					   inode->i_ino, nr);
 				continue;
 			}
@@ -2466,7 +2466,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
 	if (!bh) {
 		ext4_error (inode->i_sb, "ext4_get_inode_loc",
 				"unable to read inode block - "
-				"inode=%lu, block="E3FSBLK,
+				"inode=%lu, block=%llu",
 				 inode->i_ino, block);
 		return -EIO;
 	}
@@ -2548,7 +2548,7 @@ make_io:
 		if (!buffer_uptodate(bh)) {
 			ext4_error(inode->i_sb, "ext4_get_inode_loc",
 					"unable to read inode block - "
-					"inode=%lu, block="E3FSBLK,
+					"inode=%lu, block=%llu",
 					inode->i_ino, block);
 			brelse(bh);
 			return -EIO;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 3dbf91b82202..3e960677c2f2 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -64,7 +64,7 @@ static int verify_group_input(struct super_block *sb,
 			     input->blocks_count);
 	else if (!(bh = sb_bread(sb, end - 1)))
 		ext4_warning(sb, __FUNCTION__,
-			     "Cannot read last block ("E3FSBLK")",
+			     "Cannot read last block (%llu)",
 			     end - 1);
 	else if (outside(input->block_bitmap, start, end))
 		ext4_warning(sb, __FUNCTION__,
@@ -94,18 +94,18 @@ static int verify_group_input(struct super_block *sb,
 	else if (inside(input->block_bitmap, start, metaend))
 		ext4_warning(sb, __FUNCTION__,
 			     "Block bitmap (%llu) in GDT table"
-			     " ("E3FSBLK"-"E3FSBLK")",
+			     " (%llu-%llu)",
 			     input->block_bitmap, start, metaend - 1);
 	else if (inside(input->inode_bitmap, start, metaend))
 		ext4_warning(sb, __FUNCTION__,
 			     "Inode bitmap (%llu) in GDT table"
-			     " ("E3FSBLK"-"E3FSBLK")",
+			     " (%llu-%llu)",
 			     input->inode_bitmap, start, metaend - 1);
 	else if (inside(input->inode_table, start, metaend) ||
 	         inside(itend - 1, start, metaend))
 		ext4_warning(sb, __FUNCTION__,
-			     "Inode table ("E3FSBLK"-"E3FSBLK") overlaps"
-			     "GDT table ("E3FSBLK"-"E3FSBLK")",
+			     "Inode table (%llu-%llu) overlaps"
+			     "GDT table (%llu-%llu)",
 			     input->inode_table, itend - 1, start, metaend - 1);
 	else
 		err = 0;
@@ -344,8 +344,8 @@ static int verify_reserved_gdb(struct super_block *sb,
 		if (le32_to_cpu(*p++) !=
 		    grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
 			ext4_warning(sb, __FUNCTION__,
-				     "reserved GDT "E3FSBLK
-				     " missing grp %d ("E3FSBLK")",
+				     "reserved GDT %llu"
+				     " missing grp %d (%llu)",
 				     blk, grp,
 				     grp *
 				     (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) +
@@ -424,7 +424,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	data = (__le32 *)dind->b_data;
 	if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
 		ext4_warning(sb, __FUNCTION__,
-			     "new group %u GDT block "E3FSBLK" not reserved",
+			     "new group %u GDT block %llu not reserved",
 			     input->group, gdblock);
 		err = -EINVAL;
 		goto exit_dind;
@@ -547,7 +547,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
 	for (res = 0; res < reserved_gdb; res++, blk++) {
 		if (le32_to_cpu(*data) != blk) {
 			ext4_warning(sb, __FUNCTION__,
-				     "reserved block "E3FSBLK
+				     "reserved block %llu"
 				     " not at offset %ld",
 				     blk,
 				     (long)(data - (__le32 *)dind->b_data));
@@ -941,7 +941,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 	o_groups_count = EXT4_SB(sb)->s_groups_count;
 
 	if (test_opt(sb, DEBUG))
-		printk(KERN_DEBUG "EXT4-fs: extending last group from "E3FSBLK" uto "E3FSBLK" blocks\n",
+		printk(KERN_DEBUG "EXT4-fs: extending last group from %llu uto %llu blocks\n",
 		       o_blocks_count, n_blocks_count);
 
 	if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
@@ -949,7 +949,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 
 	if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
 		printk(KERN_ERR "EXT4-fs: filesystem on %s:"
-			" too large to resize to "E3FSBLK" blocks safely\n",
+			" too large to resize to %llu blocks safely\n",
 			sb->s_id, n_blocks_count);
 		if (sizeof(sector_t) < 8)
 			ext4_warning(sb, __FUNCTION__,
@@ -984,7 +984,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 
 	if (o_blocks_count + add < n_blocks_count)
 		ext4_warning(sb, __FUNCTION__,
-			     "will only finish group ("E3FSBLK
+			     "will only finish group (%llu"
 			     " blocks, %u new)",
 			     o_blocks_count + add, add);
 
@@ -1028,10 +1028,10 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 	ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
 	sb->s_dirt = 1;
 	unlock_super(sb);
-	ext4_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
+	ext4_debug("freeing blocks %lu through %llu\n", o_blocks_count,
 		   o_blocks_count + add);
 	ext4_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
-	ext4_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n", o_blocks_count,
+	ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
 		   o_blocks_count + add);
 	if ((err = ext4_journal_stop(handle)))
 		goto exit_put;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b91dffd7a031..d844175e60e8 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1244,7 +1244,7 @@ static int ext4_check_descriptors (struct super_block * sb)
 		{
 			ext4_error (sb, "ext4_check_descriptors",
 				    "Block bitmap for group %d"
-				    " not in group (block "E3FSBLK")!",
+				    " not in group (block %llu)!",
 				    i, block_bitmap);
 			return 0;
 		}
@@ -1253,7 +1253,7 @@ static int ext4_check_descriptors (struct super_block * sb)
 		{
 			ext4_error (sb, "ext4_check_descriptors",
 				    "Inode bitmap for group %d"
-				    " not in group (block "E3FSBLK")!",
+				    " not in group (block %llu)!",
 				    i, inode_bitmap);
 			return 0;
 		}
@@ -1263,7 +1263,7 @@ static int ext4_check_descriptors (struct super_block * sb)
 		{
 			ext4_error (sb, "ext4_check_descriptors",
 				    "Inode table for group %d"
-				    " not in group (block "E3FSBLK")!",
+				    " not in group (block %llu)!",
 				    i, inode_table);
 			return 0;
 		}
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 90f7d5c0bae4..63233cd946a7 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -233,7 +233,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
 		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
 	if (ext4_xattr_check_block(bh)) {
 bad_block:	ext4_error(inode->i_sb, __FUNCTION__,
-			   "inode %lu: bad block "E3FSBLK, inode->i_ino,
+			   "inode %lu: bad block %llu", inode->i_ino,
 			   EXT4_I(inode)->i_file_acl);
 		error = -EIO;
 		goto cleanup;
@@ -375,7 +375,7 @@ ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
 		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
 	if (ext4_xattr_check_block(bh)) {
 		ext4_error(inode->i_sb, __FUNCTION__,
-			   "inode %lu: bad block "E3FSBLK, inode->i_ino,
+			   "inode %lu: bad block %llu", inode->i_ino,
 			   EXT4_I(inode)->i_file_acl);
 		error = -EIO;
 		goto cleanup;
@@ -647,7 +647,7 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
 			le32_to_cpu(BHDR(bs->bh)->h_refcount));
 		if (ext4_xattr_check_block(bs->bh)) {
 			ext4_error(sb, __FUNCTION__,
-				"inode %lu: bad block "E3FSBLK, inode->i_ino,
+				"inode %lu: bad block %llu", inode->i_ino,
 				EXT4_I(inode)->i_file_acl);
 			error = -EIO;
 			goto cleanup;
@@ -848,7 +848,7 @@ cleanup_dquot:
 
 bad_block:
 	ext4_error(inode->i_sb, __FUNCTION__,
-		   "inode %lu: bad block "E3FSBLK, inode->i_ino,
+		   "inode %lu: bad block %llu", inode->i_ino,
 		   EXT4_I(inode)->i_file_acl);
 	goto cleanup;
 
@@ -1077,14 +1077,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
 	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
 	if (!bh) {
 		ext4_error(inode->i_sb, __FUNCTION__,
-			"inode %lu: block "E3FSBLK" read error", inode->i_ino,
+			"inode %lu: block %llu read error", inode->i_ino,
 			EXT4_I(inode)->i_file_acl);
 		goto cleanup;
 	}
 	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
 	    BHDR(bh)->h_blocks != cpu_to_le32(1)) {
 		ext4_error(inode->i_sb, __FUNCTION__,
-			"inode %lu: bad block "E3FSBLK, inode->i_ino,
+			"inode %lu: bad block %llu", inode->i_ino,
 			EXT4_I(inode)->i_file_acl);
 		goto cleanup;
 	}
-- 
cgit v1.2.3


From 9b8f1f0106ab39ad58765d4e7c57189835f51127 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Wed, 11 Oct 2006 01:21:13 -0700
Subject: [PATCH] ext4: removesector_t bits check

Previously when in-kernel ext4 block type is sector_t, it's only 4 bits long
under some 32bit arch (when CONFIG_LBD is not on).  So we need to check the
size of sector_t before we read 48bit long on-disk blocks to in-kernel blocks.

These checks are unnecessary now as we changed the in-kernel blocks to
unsigned longlong.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/extents.c | 12 ++++--------
 fs/ext4/inode.c   | 10 ++++------
 2 files changed, 8 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index f72b7567bfa2..926186a787a8 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -53,8 +53,7 @@ static inline ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
 	ext4_fsblk_t block;
 
 	block = le32_to_cpu(ex->ee_start);
-	if (sizeof(ext4_fsblk_t) > 4)
-		block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
+	block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
 	return block;
 }
 
@@ -67,8 +66,7 @@ static inline ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
 	ext4_fsblk_t block;
 
 	block = le32_to_cpu(ix->ei_leaf);
-	if (sizeof(ext4_fsblk_t) > 4)
-		block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
+	block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
 	return block;
 }
 
@@ -80,8 +78,7 @@ static inline ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
 static inline void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
 {
 	ex->ee_start = cpu_to_le32((unsigned long) (pb & 0xffffffff));
-	if (sizeof(ext4_fsblk_t) > 4)
-		ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
+	ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
 }
 
 /*
@@ -92,8 +89,7 @@ static inline void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb
 static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
 {
 	ix->ei_leaf = cpu_to_le32((unsigned long) (pb & 0xffffffff));
-	if (sizeof(ext4_fsblk_t) > 4)
-		ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
+	ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
 }
 
 static int ext4_ext_check_header(const char *function, struct inode *inode,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 99b82b52b5f0..c05dc57148bb 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2643,9 +2643,8 @@ void ext4_read_inode(struct inode * inode)
 	ei->i_frag_size = raw_inode->i_fsize;
 #endif
 	ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
-	if ((sizeof(sector_t) > 4) &&
-	    (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
-	     cpu_to_le32(EXT4_OS_HURD)))
+	if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
+	    cpu_to_le32(EXT4_OS_HURD))
 		ei->i_file_acl |=
 			((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
 	if (!S_ISREG(inode->i_mode)) {
@@ -2781,9 +2780,8 @@ static int ext4_do_update_inode(handle_t *handle,
 	raw_inode->i_frag = ei->i_frag_no;
 	raw_inode->i_fsize = ei->i_frag_size;
 #endif
-	if ((sizeof(sector_t) > 4) &&
-	    (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
-	     cpu_to_le32(EXT4_OS_HURD)))
+	if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
+	    cpu_to_le32(EXT4_OS_HURD))
 		raw_inode->i_file_acl_high =
 			cpu_to_le16(ei->i_file_acl >> 32);
 	raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl);
-- 
cgit v1.2.3


From 18eba7aae080d4a5c0d850ea810e83d11f0a8d77 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Wed, 11 Oct 2006 01:21:13 -0700
Subject: [PATCH] jbd2: switch blks_type from sector_t to ull

Similar to ext4, change blocks in JBD2 from sector_t to unsigned long long.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd2/commit.c   |  4 ++--
 fs/jbd2/journal.c  | 18 +++++++++---------
 fs/jbd2/recovery.c | 12 ++++++------
 fs/jbd2/revoke.c   | 14 +++++++-------
 4 files changed, 24 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 1a9ce8885220..70b2ae1ef281 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -272,7 +272,7 @@ write_out_data:
 }
 
 static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
-				   sector_t block)
+				   unsigned long long block)
 {
 	tag->t_blocknr = cpu_to_be32(block & (u32)~0);
 	if (tag_bytes > JBD_TAG_SIZE32)
@@ -293,7 +293,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	int bufs;
 	int flags;
 	int err;
-	sector_t blocknr;
+	unsigned long long blocknr;
 	char *tagp = NULL;
 	journal_header_t *header;
 	journal_block_tag_t *tag = NULL;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 259e8365ea15..10db92ced014 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -271,7 +271,7 @@ static void journal_kill_thread(journal_t *journal)
 int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
 				  struct journal_head  *jh_in,
 				  struct journal_head **jh_out,
-				  sector_t blocknr)
+				  unsigned long long blocknr)
 {
 	int need_copy_out = 0;
 	int done_copy_out = 0;
@@ -555,7 +555,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
  * Log buffer allocation routines:
  */
 
-int jbd2_journal_next_log_block(journal_t *journal, sector_t *retp)
+int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
 {
 	unsigned long blocknr;
 
@@ -579,10 +579,10 @@ int jbd2_journal_next_log_block(journal_t *journal, sector_t *retp)
  * ready.
  */
 int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
-		 sector_t *retp)
+		 unsigned long long *retp)
 {
 	int err = 0;
-	sector_t ret;
+	unsigned long long ret;
 
 	if (journal->j_inode) {
 		ret = bmap(journal->j_inode, blocknr);
@@ -618,7 +618,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
 struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
 {
 	struct buffer_head *bh;
-	sector_t blocknr;
+	unsigned long long blocknr;
 	int err;
 
 	err = jbd2_journal_next_log_block(journal, &blocknr);
@@ -706,7 +706,7 @@ fail:
  */
 journal_t * jbd2_journal_init_dev(struct block_device *bdev,
 			struct block_device *fs_dev,
-			sector_t start, int len, int blocksize)
+			unsigned long long start, int len, int blocksize)
 {
 	journal_t *journal = journal_init_common();
 	struct buffer_head *bh;
@@ -753,7 +753,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
 	journal_t *journal = journal_init_common();
 	int err;
 	int n;
-	sector_t blocknr;
+	unsigned long long blocknr;
 
 	if (!journal)
 		return NULL;
@@ -819,7 +819,7 @@ static void journal_fail_superblock (journal_t *journal)
 static int journal_reset(journal_t *journal)
 {
 	journal_superblock_t *sb = journal->j_superblock;
-	sector_t first, last;
+	unsigned long long first, last;
 
 	first = be32_to_cpu(sb->s_first);
 	last = be32_to_cpu(sb->s_maxlen);
@@ -853,7 +853,7 @@ static int journal_reset(journal_t *journal)
  **/
 int jbd2_journal_create(journal_t *journal)
 {
-	sector_t blocknr;
+	unsigned long long blocknr;
 	struct buffer_head *bh;
 	journal_superblock_t *sb;
 	int i, err;
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 52054a83e717..9f10acafaf70 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -70,7 +70,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
 {
 	int err;
 	unsigned int max, nbufs, next;
-	sector_t blocknr;
+	unsigned long long blocknr;
 	struct buffer_head *bh;
 
 	struct buffer_head * bufs[MAXBUF];
@@ -132,7 +132,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
 		 unsigned int offset)
 {
 	int err;
-	sector_t blocknr;
+	unsigned long long blocknr;
 	struct buffer_head *bh;
 
 	*bhp = NULL;
@@ -308,9 +308,9 @@ int jbd2_journal_skip_recovery(journal_t *journal)
 	return err;
 }
 
-static inline sector_t read_tag_block(int tag_bytes, journal_block_tag_t *tag)
+static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag)
 {
-	sector_t block = be32_to_cpu(tag->t_blocknr);
+	unsigned long long block = be32_to_cpu(tag->t_blocknr);
 	if (tag_bytes > JBD_TAG_SIZE32)
 		block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
 	return block;
@@ -452,7 +452,7 @@ static int do_one_pass(journal_t *journal,
 						"block %ld in log\n",
 						err, io_block);
 				} else {
-					sector_t blocknr;
+					unsigned long long blocknr;
 
 					J_ASSERT(obh != NULL);
 					blocknr = read_tag_block(tag_bytes,
@@ -592,7 +592,7 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
 		record_len = 8;
 
 	while (offset + record_len <= max) {
-		sector_t blocknr;
+		unsigned long long blocknr;
 		int err;
 
 		if (record_len == 4)
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 3310a1d7ace9..380d19917f37 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -81,7 +81,7 @@ struct jbd2_revoke_record_s
 {
 	struct list_head  hash;
 	tid_t		  sequence;	/* Used for recovery only */
-	sector_t	  blocknr;
+	unsigned long long	  blocknr;
 };
 
 
@@ -106,7 +106,7 @@ static void flush_descriptor(journal_t *, struct journal_head *, int);
 /* Utility functions to maintain the revoke table */
 
 /* Borrowed from buffer.c: this is a tried and tested block hash function */
-static inline int hash(journal_t *journal, sector_t block)
+static inline int hash(journal_t *journal, unsigned long long block)
 {
 	struct jbd2_revoke_table_s *table = journal->j_revoke;
 	int hash_shift = table->hash_shift;
@@ -117,7 +117,7 @@ static inline int hash(journal_t *journal, sector_t block)
 		(hash << (hash_shift - 12))) & (table->hash_size - 1);
 }
 
-static int insert_revoke_hash(journal_t *journal, sector_t blocknr,
+static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr,
 			      tid_t seq)
 {
 	struct list_head *hash_list;
@@ -147,7 +147,7 @@ oom:
 /* Find a revoke record in the journal's hash table. */
 
 static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal,
-						      sector_t blocknr)
+						      unsigned long long blocknr)
 {
 	struct list_head *hash_list;
 	struct jbd2_revoke_record_s *record;
@@ -326,7 +326,7 @@ void jbd2_journal_destroy_revoke(journal_t *journal)
  * by one.
  */
 
-int jbd2_journal_revoke(handle_t *handle, sector_t blocknr,
+int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
 		   struct buffer_head *bh_in)
 {
 	struct buffer_head *bh = NULL;
@@ -650,7 +650,7 @@ static void flush_descriptor(journal_t *journal,
  */
 
 int jbd2_journal_set_revoke(journal_t *journal,
-		       sector_t blocknr,
+		       unsigned long long blocknr,
 		       tid_t sequence)
 {
 	struct jbd2_revoke_record_s *record;
@@ -674,7 +674,7 @@ int jbd2_journal_set_revoke(journal_t *journal,
  */
 
 int jbd2_journal_test_revoke(journal_t *journal,
-			sector_t blocknr,
+			unsigned long long blocknr,
 			tid_t sequence)
 {
 	struct jbd2_revoke_record_s *record;
-- 
cgit v1.2.3


From 0d1ee42f27d30eed1659f3e85bcbbc7b3711f61f Mon Sep 17 00:00:00 2001
From: Alexandre Ratchov <alexandre.ratchov@bull.net>
Date: Wed, 11 Oct 2006 01:21:14 -0700
Subject: [PATCH] ext4: allow larger descriptor size

make block group descriptor larger.

Signed-off-by: Alexandre Ratchov <alexandre.ratchov@bull.net>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c |  6 ++++--
 fs/ext4/inode.c  |  8 +++++---
 fs/ext4/super.c  | 18 +++++++++++++++---
 3 files changed, 24 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index df77ea891f29..3dacb124b8c8 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -74,10 +74,12 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
 		return NULL;
 	}
 
-	desc = (struct ext4_group_desc *) sbi->s_group_desc[group_desc]->b_data;
+	desc = (struct ext4_group_desc *)(
+		(__u8 *)sbi->s_group_desc[group_desc]->b_data +
+		offset * EXT4_DESC_SIZE(sb));
 	if (bh)
 		*bh = sbi->s_group_desc[group_desc];
-	return desc + offset;
+	return desc;
 }
 
 /**
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c05dc57148bb..d03e7d85a638 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2432,14 +2432,16 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
 		return 0;
 	}
 
-	gdp = (struct ext4_group_desc *)bh->b_data;
+	gdp = (struct ext4_group_desc *)((__u8 *)bh->b_data +
+		desc * EXT4_DESC_SIZE(sb));
 	/*
 	 * Figure out the offset within the block group inode table
 	 */
 	offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) *
 		EXT4_INODE_SIZE(sb);
-	block = ext4_inode_table(gdp + desc) +
-			(offset >> EXT4_BLOCK_SIZE_BITS(sb));
+	block = ext4_inode_table(gdp) + (offset >> EXT4_BLOCK_SIZE_BITS(sb));
+
+
 
 	iloc->block_group = block_group;
 	iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d844175e60e8..bc8848bff2f1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1268,7 +1268,8 @@ static int ext4_check_descriptors (struct super_block * sb)
 			return 0;
 		}
 		first_block += EXT4_BLOCKS_PER_GROUP(sb);
-		gdp++;
+		gdp = (struct ext4_group_desc *)
+			((__u8 *)gdp + EXT4_DESC_SIZE(sb));
 	}
 
 	ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
@@ -1619,7 +1620,18 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 		       sbi->s_frag_size, blocksize);
 		goto failed_mount;
 	}
-	sbi->s_frags_per_block = 1;
+	sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
+	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
+		if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE ||
+		    sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
+		    sbi->s_desc_size & (sbi->s_desc_size - 1)) {
+			printk(KERN_ERR
+			       "EXT4-fs: unsupported descriptor size %ld\n",
+			       sbi->s_desc_size);
+			goto failed_mount;
+		}
+	} else
+		sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
 	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
 	sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
 	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
@@ -1630,7 +1642,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 		goto cantfind_ext4;
 	sbi->s_itb_per_group = sbi->s_inodes_per_group /
 					sbi->s_inodes_per_block;
-	sbi->s_desc_per_block = blocksize / sizeof(struct ext4_group_desc);
+	sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
 	sbi->s_sbh = bh;
 	sbi->s_mount_state = le16_to_cpu(es->s_state);
 	sbi->s_addr_per_block_bits = log2(EXT4_ADDR_PER_BLOCK(sb));
-- 
cgit v1.2.3


From 8fadc14323684c547f74cf2f4d13517c6c264731 Mon Sep 17 00:00:00 2001
From: Alexandre Ratchov <alexandre.ratchov@bull.net>
Date: Wed, 11 Oct 2006 01:21:15 -0700
Subject: [PATCH] ext4: move block number hi bits

move '_hi' bits of block numbers in the larger part of the
block group descriptor structure

Signed-off-by: Alexandre Ratchov <alexandre.ratchov@bull.net>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c | 20 ++++++++++----------
 fs/ext4/ialloc.c |  4 ++--
 fs/ext4/inode.c  |  7 +++----
 fs/ext4/resize.c |  6 +++---
 fs/ext4/super.c  | 46 +++++++++++++++++++++++++++++-----------------
 5 files changed, 47 insertions(+), 36 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 3dacb124b8c8..3e85886a6382 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -101,13 +101,13 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
 	desc = ext4_get_group_desc (sb, block_group, NULL);
 	if (!desc)
 		goto error_out;
-	bh = sb_bread(sb, ext4_block_bitmap(desc));
+	bh = sb_bread(sb, ext4_block_bitmap(sb, desc));
 	if (!bh)
 		ext4_error (sb, "read_block_bitmap",
 			    "Cannot read block bitmap - "
 			    "block_group = %d, block_bitmap = %llu",
 			    block_group,
-			    ext4_block_bitmap(desc));
+			    ext4_block_bitmap(sb, desc));
 error_out:
 	return bh;
 }
@@ -463,10 +463,10 @@ do_more:
 	if (!desc)
 		goto error_return;
 
-	if (in_range(ext4_block_bitmap(desc), block, count) ||
-	    in_range(ext4_inode_bitmap(desc), block, count) ||
-	    in_range(block, ext4_inode_table(desc), sbi->s_itb_per_group) ||
-	    in_range(block + count - 1, ext4_inode_table(desc),
+	if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
+	    in_range(ext4_inode_bitmap(sb, desc), block, count) ||
+	    in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
+	    in_range(block + count - 1, ext4_inode_table(sb, desc),
 		     sbi->s_itb_per_group))
 		ext4_error (sb, "ext4_free_blocks",
 			    "Freeing blocks in system zones - "
@@ -1563,11 +1563,11 @@ allocated:
 
 	ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no);
 
-	if (in_range(ext4_block_bitmap(gdp), ret_block, num) ||
-	    in_range(ext4_block_bitmap(gdp), ret_block, num) ||
-	    in_range(ret_block, ext4_inode_table(gdp),
+	if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) ||
+	    in_range(ext4_block_bitmap(sb, gdp), ret_block, num) ||
+	    in_range(ret_block, ext4_inode_table(sb, gdp),
 		     EXT4_SB(sb)->s_itb_per_group) ||
-	    in_range(ret_block + num - 1, ext4_inode_table(gdp),
+	    in_range(ret_block + num - 1, ext4_inode_table(sb, gdp),
 		     EXT4_SB(sb)->s_itb_per_group))
 		ext4_error(sb, "ext4_new_block",
 			    "Allocating block in system zone - "
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 959b7fa8f5db..75608e1e5555 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -60,12 +60,12 @@ read_inode_bitmap(struct super_block * sb, unsigned long block_group)
 	if (!desc)
 		goto error_out;
 
-	bh = sb_bread(sb, ext4_inode_bitmap(desc));
+	bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
 	if (!bh)
 		ext4_error(sb, "read_inode_bitmap",
 			    "Cannot read inode bitmap - "
 			    "block_group = %lu, inode_bitmap = %llu",
-			    block_group, ext4_inode_bitmap(desc));
+			    block_group, ext4_inode_bitmap(sb, desc));
 error_out:
 	return bh;
 }
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d03e7d85a638..0a60ec5a16db 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2439,9 +2439,8 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
 	 */
 	offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) *
 		EXT4_INODE_SIZE(sb);
-	block = ext4_inode_table(gdp) + (offset >> EXT4_BLOCK_SIZE_BITS(sb));
-
-
+	block = ext4_inode_table(sb, gdp) +
+		(offset >> EXT4_BLOCK_SIZE_BITS(sb));
 
 	iloc->block_group = block_group;
 	iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1);
@@ -2508,7 +2507,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
 				goto make_io;
 
 			bitmap_bh = sb_getblk(inode->i_sb,
-				ext4_inode_bitmap(desc));
+				ext4_inode_bitmap(inode->i_sb, desc));
 			if (!bitmap_bh)
 				goto make_io;
 
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 3e960677c2f2..1e9578052cd3 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -834,9 +834,9 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 	/* Update group descriptor block for new group */
 	gdp = (struct ext4_group_desc *)primary->b_data + gdb_off;
 
-	ext4_block_bitmap_set(gdp, input->block_bitmap); /* LV FIXME */
-	ext4_inode_bitmap_set(gdp, input->inode_bitmap); /* LV FIXME */
-	ext4_inode_table_set(gdp, input->inode_table); /* LV FIXME */
+	ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
+	ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
+	ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
 	gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
 	gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index bc8848bff2f1..811011fc5c94 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -63,40 +63,52 @@ static void ext4_write_super (struct super_block * sb);
 static void ext4_write_super_lockfs(struct super_block *sb);
 
 
-ext4_fsblk_t ext4_block_bitmap(struct ext4_group_desc *bg)
+ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
+			       struct ext4_group_desc *bg)
 {
 	return le32_to_cpu(bg->bg_block_bitmap) |
-		((ext4_fsblk_t)le16_to_cpu(bg->bg_block_bitmap_hi) << 32);
+		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
+		 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
 }
 
-ext4_fsblk_t ext4_inode_bitmap(struct ext4_group_desc *bg)
+ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
+			       struct ext4_group_desc *bg)
 {
 	return le32_to_cpu(bg->bg_inode_bitmap) |
-		((ext4_fsblk_t)le16_to_cpu(bg->bg_inode_bitmap_hi) << 32);
+		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
+		 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
 }
 
-ext4_fsblk_t ext4_inode_table(struct ext4_group_desc *bg)
+ext4_fsblk_t ext4_inode_table(struct super_block *sb,
+			      struct ext4_group_desc *bg)
 {
 	return le32_to_cpu(bg->bg_inode_table) |
-		((ext4_fsblk_t)le16_to_cpu(bg->bg_inode_table_hi) << 32);
+		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
+		 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
 }
 
-void ext4_block_bitmap_set(struct ext4_group_desc *bg, ext4_fsblk_t blk)
+void ext4_block_bitmap_set(struct super_block *sb,
+			   struct ext4_group_desc *bg, ext4_fsblk_t blk)
 {
 	bg->bg_block_bitmap = cpu_to_le32((u32)blk);
-	bg->bg_block_bitmap_hi = cpu_to_le16(blk >> 32);
+	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
+		bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
 }
 
-void ext4_inode_bitmap_set(struct ext4_group_desc *bg, ext4_fsblk_t blk)
+void ext4_inode_bitmap_set(struct super_block *sb,
+			   struct ext4_group_desc *bg, ext4_fsblk_t blk)
 {
 	bg->bg_inode_bitmap  = cpu_to_le32((u32)blk);
-	bg->bg_inode_bitmap_hi = cpu_to_le16(blk >> 32);
+	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
+		bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
 }
 
-void ext4_inode_table_set(struct ext4_group_desc *bg, ext4_fsblk_t blk)
+void ext4_inode_table_set(struct super_block *sb,
+			  struct ext4_group_desc *bg, ext4_fsblk_t blk)
 {
 	bg->bg_inode_table = cpu_to_le32((u32)blk);
-	bg->bg_inode_table_hi = cpu_to_le16(blk >> 32);
+	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
+		bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
 }
 
 /*
@@ -1239,7 +1251,7 @@ static int ext4_check_descriptors (struct super_block * sb)
 		if ((i % EXT4_DESC_PER_BLOCK(sb)) == 0)
 			gdp = (struct ext4_group_desc *)
 					sbi->s_group_desc[desc_block++]->b_data;
-		block_bitmap = ext4_block_bitmap(gdp);
+		block_bitmap = ext4_block_bitmap(sb, gdp);
 		if (block_bitmap < first_block || block_bitmap > last_block)
 		{
 			ext4_error (sb, "ext4_check_descriptors",
@@ -1248,7 +1260,7 @@ static int ext4_check_descriptors (struct super_block * sb)
 				    i, block_bitmap);
 			return 0;
 		}
-		inode_bitmap = ext4_inode_bitmap(gdp);
+		inode_bitmap = ext4_inode_bitmap(sb, gdp);
 		if (inode_bitmap < first_block || inode_bitmap > last_block)
 		{
 			ext4_error (sb, "ext4_check_descriptors",
@@ -1257,7 +1269,7 @@ static int ext4_check_descriptors (struct super_block * sb)
 				    i, inode_bitmap);
 			return 0;
 		}
-		inode_table = ext4_inode_table(gdp);
+		inode_table = ext4_inode_table(sb, gdp);
 		if (inode_table < first_block ||
 		    inode_table + sbi->s_itb_per_group > last_block)
 		{
@@ -1622,11 +1634,11 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	}
 	sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
-		if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE ||
+		if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
 		    sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
 		    sbi->s_desc_size & (sbi->s_desc_size - 1)) {
 			printk(KERN_ERR
-			       "EXT4-fs: unsupported descriptor size %ld\n",
+			       "EXT4-fs: unsupported descriptor size %lu\n",
 			       sbi->s_desc_size);
 			goto failed_mount;
 		}
-- 
cgit v1.2.3


From 72b64b594081ef0a0717f6aad77e891c72ed4afa Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 11 Oct 2006 01:21:18 -0700
Subject: [PATCH] ext4 uninline ext4_get_group_no_and_offset()

Way too big to inline.

Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 3e85886a6382..402475a6f3df 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -24,6 +24,24 @@
  * balloc.c contains the blocks allocation and deallocation routines
  */
 
+/*
+ * Calculate the block group number and offset, given a block number
+ */
+void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
+		unsigned long *blockgrpp, ext4_grpblk_t *offsetp)
+{
+        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+	ext4_grpblk_t offset;
+
+        blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
+        offset = sector_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb));
+	if (offsetp)
+		*offsetp = offset;
+	if (blockgrpp)
+	        *blockgrpp = blocknr;
+
+}
+
 /*
  * The free blocks are managed by bitmaps.  A file system contains several
  * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
-- 
cgit v1.2.3


From f4e5bc244f23ee024a4dfa034b591b219b2bfb8f Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 11 Oct 2006 01:21:19 -0700
Subject: [PATCH] ext4 64 bit divide fix

With CONFIG_LBD=n, sector_div() expands to a plain old divide.  But ext4 is
_not_ passing in a sector_t as the first argument, so...

fs/built-in.o: In function `ext4_get_group_no_and_offset':
fs/ext4/balloc.c:39: undefined reference to `__umoddi3'
fs/ext4/balloc.c:41: undefined reference to `__udivdi3'
fs/built-in.o: In function `find_group_orlov':
fs/ext4/ialloc.c:278: undefined reference to `__udivdi3'
fs/built-in.o: In function `ext4_fill_super':
fs/ext4/super.c:1488: undefined reference to `__udivdi3'
fs/ext4/super.c:1488: undefined reference to `__umoddi3'
fs/ext4/super.c:1594: undefined reference to `__udivdi3'
fs/ext4/super.c:1601: undefined reference to `__umoddi3'

Fix that up by calling do_div() directly.

Also cast the arg to u64.  do_div() is only defined on u64, and ext4_fsblk_t
is supposed to be opaque.

Note especially the changes to find_group_orlov().  It was attempting to do

	do_div(int, unsigned long long);

which is royally screwed up.  Switched it to plain old divide.

Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c | 2 +-
 fs/ext4/ialloc.c | 6 +++---
 fs/ext4/super.c  | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 402475a6f3df..5d45582f9517 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -34,7 +34,7 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
 	ext4_grpblk_t offset;
 
         blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
-        offset = sector_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb));
+	offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb));
 	if (offsetp)
 		*offsetp = offset;
 	if (blockgrpp)
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 75608e1e5555..c88b439ba5cd 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -275,7 +275,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
 	avefreei = freei / ngroups;
 	freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
 	avefreeb = freeb;
-	sector_div(avefreeb, ngroups);
+	do_div(avefreeb, ngroups);
 	ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
 
 	if ((parent == sb->s_root->d_inode) ||
@@ -305,14 +305,14 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
 	}
 
 	blocks_per_dir = ext4_blocks_count(es) - freeb;
-	sector_div(blocks_per_dir, ndirs);
+	do_div(blocks_per_dir, ndirs);
 
 	max_dirs = ndirs / ngroups + inodes_per_group / 16;
 	min_inodes = avefreei - inodes_per_group / 4;
 	min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb) / 4;
 
 	max_debt = EXT4_BLOCKS_PER_GROUP(sb);
-	sector_div(max_debt, max(blocks_per_dir, (ext4_fsblk_t)BLOCK_COST));
+	max_debt /= max_t(int, blocks_per_dir, BLOCK_COST);
 	if (max_debt * INODE_COST > inodes_per_group)
 		max_debt = inodes_per_group / INODE_COST;
 	if (max_debt > 255)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 811011fc5c94..f7ea0224f535 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1485,7 +1485,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	 */
 	if (blocksize != EXT4_MIN_BLOCK_SIZE) {
 		logic_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
-		offset = sector_div(logic_sb_block, blocksize);
+		offset = do_div(logic_sb_block, blocksize);
 	} else {
 		logic_sb_block = sb_block;
 	}
@@ -1591,7 +1591,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 		brelse (bh);
 		sb_set_blocksize(sb, blocksize);
 		logic_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
-		offset = sector_div(logic_sb_block, blocksize);
+		offset = do_div(logic_sb_block, blocksize);
 		bh = sb_bread(sb, logic_sb_block);
 		if (!bh) {
 			printk(KERN_ERR
-- 
cgit v1.2.3


From 70bbb3e0a07c1ff2dd383761b12f865b6002a7a0 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 11 Oct 2006 01:21:20 -0700
Subject: [PATCH] ext4: rename logic_sb_block

I assume this means "logical sb block".  So call it that.

I still don't understand the name though.  A block is a block.  What's
different about this one?

Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/super.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f7ea0224f535..6cf1f185b9c1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1423,8 +1423,7 @@ static loff_t ext4_max_size(int bits)
 }
 
 static ext4_fsblk_t descriptor_loc(struct super_block *sb,
-				    ext4_fsblk_t logic_sb_block,
-				    int nr)
+				ext4_fsblk_t logical_sb_block, int nr)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	unsigned long bg, first_meta_bg;
@@ -1434,7 +1433,7 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb,
 
 	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
 	    nr < first_meta_bg)
-		return (logic_sb_block + nr + 1);
+		return logical_sb_block + nr + 1;
 	bg = sbi->s_desc_per_block * nr;
 	if (ext4_bg_has_super(sb, bg))
 		has_super = 1;
@@ -1449,7 +1448,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	struct ext4_sb_info *sbi;
 	ext4_fsblk_t block;
 	ext4_fsblk_t sb_block = get_sb_block(&data);
-	ext4_fsblk_t logic_sb_block;
+	ext4_fsblk_t logical_sb_block;
 	unsigned long offset = 0;
 	unsigned int journal_inum = 0;
 	unsigned long journal_devnum = 0;
@@ -1484,13 +1483,13 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	 * block sizes.  We need to calculate the offset from buffer start.
 	 */
 	if (blocksize != EXT4_MIN_BLOCK_SIZE) {
-		logic_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
-		offset = do_div(logic_sb_block, blocksize);
+		logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
+		offset = do_div(logical_sb_block, blocksize);
 	} else {
-		logic_sb_block = sb_block;
+		logical_sb_block = sb_block;
 	}
 
-	if (!(bh = sb_bread(sb, logic_sb_block))) {
+	if (!(bh = sb_bread(sb, logical_sb_block))) {
 		printk (KERN_ERR "EXT4-fs: unable to read superblock\n");
 		goto out_fail;
 	}
@@ -1590,9 +1589,9 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 
 		brelse (bh);
 		sb_set_blocksize(sb, blocksize);
-		logic_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
-		offset = do_div(logic_sb_block, blocksize);
-		bh = sb_bread(sb, logic_sb_block);
+		logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
+		offset = do_div(logical_sb_block, blocksize);
+		bh = sb_bread(sb, logical_sb_block);
 		if (!bh) {
 			printk(KERN_ERR
 			       "EXT4-fs: Can't read superblock on 2nd try.\n");
@@ -1711,7 +1710,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	bgl_lock_init(&sbi->s_blockgroup_lock);
 
 	for (i = 0; i < db_count; i++) {
-		block = descriptor_loc(sb, logic_sb_block, i);
+		block = descriptor_loc(sb, logical_sb_block, i);
 		sbi->s_group_desc[i] = sb_bread(sb, block);
 		if (!sbi->s_group_desc[i]) {
 			printk (KERN_ERR "EXT4-fs: "
-- 
cgit v1.2.3


From ceea16bf85fb46035dda1db263ca29b0e07e22ba Mon Sep 17 00:00:00 2001
From: Dmitry Mishin <dim@openvz.org>
Date: Wed, 11 Oct 2006 01:21:21 -0700
Subject: [PATCH] ext4: errors behaviour fix

Current error behaviour for ext2 and ext3 filesystems does not fully
correspond to the documentation and should be fixed.

According to man 8 mount, ext2 and ext3 file systems allow to set one of 3
different on-errors behaviours:

  ---- start of quote man 8 mount ----

  errors=continue / errors=remount-ro / errors=panic

    Define the behaviour when an error is encountered.  (Either ignore
    errors and just mark the file system erroneous and continue, or remount
    the file system read-only, or panic and halt the system.) The default is
    set in the filesystem superblock, and can be changed using tune2fs(8).

  ---- end of quote ----

However EXT3_ERRORS_CONTINUE is not read from the superblock, and thus
ERRORS_CONT is not saved on the sbi->s_mount_opt.  It leads to the incorrect
handle of errors on ext3.

Then we've checked corresponding code in ext2 and discovered that it is buggy
as well:

- EXT2_ERRORS_CONTINUE is not read from the superblock (the same);

- parse_option() does not clean the alternative values and thus something
  like (ERRORS_CONT|ERRORS_RO) can be set;

- if options are omitted, parse_option() does not set any of these options.

Therefore it is possible to set any combination of these options on the ext2:

- none of them may be set: EXT2_ERRORS_CONTINUE on superblock / empty mount
  options;

- any of them may be set using mount options;

- 2 any options may be set: by using EXT2_ERRORS_RO/EXT2_ERRORS_PANIC on the
  superblock and other value in mount options;

- and finally all three options may be set by adding third option in remount.

Currently ext2 uses these values only in ext2_error() and it is not leading to
any noticeable troubles.  However somebody may be discouraged when he will try
to workaround EXT2_ERRORS_PANIC on the superblock by using errors=continue in
mount options.

This patch:

EXT4_ERRORS_CONTINUE should be taken from the superblock as default value for
error behaviour.

Signed-off-by:	Dmitry Mishin <dim@openvz.org>
Acked-by: Vasily Averin <vvs@sw.ru>
Acked-by: Kirill Korotaev <dev@openvz.org>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/super.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6cf1f185b9c1..1fa3bdc3c671 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1526,6 +1526,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 		set_opt(sbi->s_mount_opt, ERRORS_PANIC);
 	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_RO)
 		set_opt(sbi->s_mount_opt, ERRORS_RO);
+	else
+		set_opt(sbi->s_mount_opt, ERRORS_CONT);
 
 	sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
 	sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
-- 
cgit v1.2.3


From 63f5793351d821749979e36889f9c089c6028c83 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 11 Oct 2006 01:21:24 -0700
Subject: [PATCH] ext4 whitespace cleanups

Someone's tab key is emitting spaces.  Attempt to repair some of the damage.

Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/acl.c     |  4 ++--
 fs/ext4/dir.c     |  7 ++++---
 fs/ext4/extents.c | 14 +++++++-------
 fs/ext4/namei.c   | 52 +++++++++++++++++++++++++---------------------------
 fs/ext4/super.c   | 14 ++++++++------
 5 files changed, 46 insertions(+), 45 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 0a965dd5664e..9e882546d91a 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -141,7 +141,7 @@ ext4_iget_acl(struct inode *inode, struct posix_acl **i_acl)
 
 static inline void
 ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl,
-                  struct posix_acl *acl)
+		struct posix_acl *acl)
 {
 	spin_lock(&inode->i_lock);
 	if (*i_acl != EXT4_ACL_NOT_CACHED)
@@ -375,7 +375,7 @@ int
 ext4_acl_chmod(struct inode *inode)
 {
 	struct posix_acl *acl, *clone;
-        int error;
+	int error;
 
 	if (S_ISLNK(inode->i_mode))
 		return -EOPNOTSUPP;
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 18ac173af575..f8595787a70e 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -188,8 +188,9 @@ revalidate:
 			de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
 			if (!ext4_check_dir_entry ("ext4_readdir", inode, de,
 						   bh, offset)) {
-				/* On error, skip the f_pos to the
-                                   next block. */
+				/*
+				 * On error, skip the f_pos to the next block
+				 */
 				filp->f_pos = (filp->f_pos |
 						(sb->s_blocksize - 1)) + 1;
 				brelse (bh);
@@ -508,7 +509,7 @@ finished:
 
 static int ext4_release_dir (struct inode * inode, struct file * filp)
 {
-       if (filp->private_data)
+	if (filp->private_data)
 		ext4_htree_free_dir_info(filp->private_data);
 
 	return 0;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 926186a787a8..2608dce18f3e 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1014,7 +1014,7 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
  * returns first allocated block from next leaf or EXT_MAX_BLOCK
  */
 static unsigned ext4_ext_next_leaf_block(struct inode *inode,
-                                               struct ext4_ext_path *path)
+					struct ext4_ext_path *path)
 {
 	int depth;
 
@@ -1097,8 +1097,8 @@ static int inline
 ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
 				struct ext4_extent *ex2)
 {
-        if (le32_to_cpu(ex1->ee_block) + le16_to_cpu(ex1->ee_len)
-	    != le32_to_cpu(ex2->ee_block))
+	if (le32_to_cpu(ex1->ee_block) + le16_to_cpu(ex1->ee_len) !=
+			le32_to_cpu(ex2->ee_block))
 		return 0;
 
 	/*
@@ -1113,7 +1113,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
 		return 0;
 #endif
 
-        if (ext_pblock(ex1) + le16_to_cpu(ex1->ee_len) == ext_pblock(ex2))
+	if (ext_pblock(ex1) + le16_to_cpu(ex1->ee_len) == ext_pblock(ex2))
 		return 1;
 	return 0;
 }
@@ -2008,9 +2008,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 		goto out2;
 	}
 	/*
-         * Okay, we need to do block allocation.  Lazily initialize the block
-         * allocation info here if necessary.
-        */
+	 * Okay, we need to do block allocation.  Lazily initialize the block
+	 * allocation info here if necessary.
+	 */
 	if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info))
 		ext4_init_block_alloc_info(inode);
 
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index f98b9994e36c..8b1bd03d20f5 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -241,13 +241,13 @@ static inline unsigned dx_node_limit (struct inode *dir)
 #ifdef DX_DEBUG
 static void dx_show_index (char * label, struct dx_entry *entries)
 {
-        int i, n = dx_get_count (entries);
+	int i, n = dx_get_count (entries);
         printk("%s index ", label);
-        for (i = 0; i < n; i++)
-        {
-                printk("%x->%u ", i? dx_get_hash(entries + i): 0, dx_get_block(entries + i));
-        }
-        printk("\n");
+	for (i = 0; i < n; i++) {
+		printk("%x->%u ", i? dx_get_hash(entries + i) :
+				0, dx_get_block(entries + i));
+	}
+	printk("\n");
 }
 
 struct stats
@@ -688,28 +688,26 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
 
 static void dx_sort_map (struct dx_map_entry *map, unsigned count)
 {
-        struct dx_map_entry *p, *q, *top = map + count - 1;
-        int more;
-        /* Combsort until bubble sort doesn't suck */
-        while (count > 2)
-	{
-                count = count*10/13;
-                if (count - 9 < 2) /* 9, 10 -> 11 */
-                        count = 11;
-                for (p = top, q = p - count; q >= map; p--, q--)
-                        if (p->hash < q->hash)
-                                swap(*p, *q);
-        }
-        /* Garden variety bubble sort */
-        do {
-                more = 0;
-                q = top;
-                while (q-- > map)
-		{
-                        if (q[1].hash >= q[0].hash)
+	struct dx_map_entry *p, *q, *top = map + count - 1;
+	int more;
+	/* Combsort until bubble sort doesn't suck */
+	while (count > 2) {
+		count = count*10/13;
+		if (count - 9 < 2) /* 9, 10 -> 11 */
+			count = 11;
+		for (p = top, q = p - count; q >= map; p--, q--)
+			if (p->hash < q->hash)
+				swap(*p, *q);
+	}
+	/* Garden variety bubble sort */
+	do {
+		more = 0;
+		q = top;
+		while (q-- > map) {
+			if (q[1].hash >= q[0].hash)
 				continue;
-                        swap(*(q+1), *q);
-                        more = 1;
+			swap(*(q+1), *q);
+			more = 1;
 		}
 	} while(more);
 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1fa3bdc3c671..b4b022aa2bc2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1191,9 +1191,10 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
 			"running e2fsck is recommended\n");
 #if 0
 		/* @@@ We _will_ want to clear the valid bit if we find
-                   inconsistencies, to force a fsck at reboot.  But for
-                   a plain journaled filesystem we can keep it set as
-                   valid forever! :) */
+		 * inconsistencies, to force a fsck at reboot.  But for
+		 * a plain journaled filesystem we can keep it set as
+		 * valid forever! :)
+		 */
 	es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT4_VALID_FS);
 #endif
 	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
@@ -1791,8 +1792,9 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	switch (test_opt(sb, DATA_FLAGS)) {
 	case 0:
 		/* No mode set, assume a default based on the journal
-                   capabilities: ORDERED_DATA if the journal can
-                   cope, else JOURNAL_DATA */
+		 * capabilities: ORDERED_DATA if the journal can
+		 * cope, else JOURNAL_DATA
+		 */
 		if (jbd2_journal_check_available_features
 		    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
 			set_opt(sbi->s_mount_opt, ORDERED_DATA);
@@ -2802,7 +2804,7 @@ static int __init init_ext4_fs(void)
 	err = init_inodecache();
 	if (err)
 		goto out1;
-        err = register_filesystem(&ext4dev_fs_type);
+	err = register_filesystem(&ext4dev_fs_type);
 	if (err)
 		goto out;
 	return 0;
-- 
cgit v1.2.3


From b611967de4dc5c52049676c4369dcac622a7cdfe Mon Sep 17 00:00:00 2001
From: Davide Libenzi <davidel@xmailserver.org>
Date: Wed, 11 Oct 2006 01:21:44 -0700
Subject: [PATCH] epoll_pwait()

Implement the epoll_pwait system call, that extend the event wait mechanism
with the same logic ppoll and pselect do.  The definition of epoll_pwait
is:

int epoll_pwait(int epfd, struct epoll_event *events, int maxevents,
                 int timeout, const sigset_t *sigmask, size_t sigsetsize);

The difference between the vanilla epoll_wait and epoll_pwait is that the
latter allows the caller to specify a signal mask to be set while waiting
for events.  Hence epoll_pwait will wait until either one monitored event,
or an unmasked signal happen.  If sigmask is NULL, the epoll_pwait system
call will act exactly like epoll_wait.  For the POSIX definition of
pselect, information is available here:

http://www.opengroup.org/onlinepubs/009695399/functions/select.html

Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Andi Kleen <ak@muc.de>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Cc: Ulrich Drepper <drepper@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/eventpoll.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 53 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 557d5b614fae..ae228ec54e94 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -105,6 +105,8 @@
 /* Maximum msec timeout value storeable in a long int */
 #define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
 
+#define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
+
 
 struct epoll_filefd {
 	struct file *file;
@@ -497,7 +499,7 @@ void eventpoll_release_file(struct file *file)
  */
 asmlinkage long sys_epoll_create(int size)
 {
-	int error, fd;
+	int error, fd = -1;
 	struct eventpoll *ep;
 	struct inode *inode;
 	struct file *file;
@@ -640,7 +642,6 @@ eexit_1:
 	return error;
 }
 
-#define MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
 
 /*
  * Implement the event wait interface for the eventpoll file. It is the kernel
@@ -657,7 +658,7 @@ asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
 		     current, epfd, events, maxevents, timeout));
 
 	/* The maximum number of event must be greater than zero */
-	if (maxevents <= 0 || maxevents > MAX_EVENTS)
+	if (maxevents <= 0 || maxevents > EP_MAX_EVENTS)
 		return -EINVAL;
 
 	/* Verify that the area passed by the user is writeable */
@@ -699,6 +700,55 @@ eexit_1:
 }
 
 
+#ifdef TIF_RESTORE_SIGMASK
+
+/*
+ * Implement the event wait interface for the eventpoll file. It is the kernel
+ * part of the user space epoll_pwait(2).
+ */
+asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
+		int maxevents, int timeout, const sigset_t __user *sigmask,
+		size_t sigsetsize)
+{
+	int error;
+	sigset_t ksigmask, sigsaved;
+
+	/*
+	 * If the caller wants a certain signal mask to be set during the wait,
+	 * we apply it here.
+	 */
+	if (sigmask) {
+		if (sigsetsize != sizeof(sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
+			return -EFAULT;
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+	}
+
+	error = sys_epoll_wait(epfd, events, maxevents, timeout);
+
+	/*
+	 * If we changed the signal mask, we need to restore the original one.
+	 * In case we've got a signal while waiting, we do not restore the
+	 * signal mask yet, and we allow do_signal() to deliver the signal on
+	 * the way back to userspace, before the signal mask is restored.
+	 */
+	if (sigmask) {
+		if (error == -EINTR) {
+			memcpy(&current->saved_sigmask, &sigsaved,
+				sizeof(sigsaved));
+			set_thread_flag(TIF_RESTORE_SIGMASK);
+		} else
+			sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+	}
+
+	return error;
+}
+
+#endif /* #ifdef TIF_RESTORE_SIGMASK */
+
+
 /*
  * Creates the file descriptor to be used by the epoll interface.
  */
-- 
cgit v1.2.3


From e5657933863f43cc6bb76a54d659303dafaa9e58 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 11 Oct 2006 01:21:46 -0700
Subject: [PATCH] grow_buffers() infinite loop fix

If grow_buffers() is for some reason passed a block number which wants to lie
outside the maximum-addressable pagecache range (PAGE_SIZE * 4G bytes) then it
will accidentally truncate `index' and will then instnatiate a page at the
wrong pagecache offset.  This causes __getblk_slow() to go into an infinite
loop.

This can happen with corrupted disks, or with software errors elsewhere.

Detect that, and handle it.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index eeb8ac1aa856..2a7828c0e59b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1042,8 +1042,21 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
 	} while ((size << sizebits) < PAGE_SIZE);
 
 	index = block >> sizebits;
-	block = index << sizebits;
 
+	/*
+	 * Check for a block which wants to lie outside our maximum possible
+	 * pagecache index.  (this comparison is done using sector_t types).
+	 */
+	if (unlikely(index != block >> sizebits)) {
+		char b[BDEVNAME_SIZE];
+
+		printk(KERN_ERR "%s: requested out-of-range block %llu for "
+			"device %s\n",
+			__FUNCTION__, (unsigned long long)block,
+			bdevname(bdev, b));
+		return -EIO;
+	}
+	block = index << sizebits;
 	/* Create a page with the proper size buffers.. */
 	page = grow_dev_page(bdev, block, index, size);
 	if (!page)
@@ -1070,12 +1083,16 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
 
 	for (;;) {
 		struct buffer_head * bh;
+		int ret;
 
 		bh = __find_get_block(bdev, block, size);
 		if (bh)
 			return bh;
 
-		if (!grow_buffers(bdev, block, size))
+		ret = grow_buffers(bdev, block, size);
+		if (ret < 0)
+			return NULL;
+		if (ret == 0)
 			free_more_memory();
 	}
 }
-- 
cgit v1.2.3


From 2245d7c21f5bb1f104ee1226ebcb3dd5b9acfff6 Mon Sep 17 00:00:00 2001
From: Dmitry Mishin <dim@openvz.org>
Date: Wed, 11 Oct 2006 01:21:49 -0700
Subject: [PATCH] ext3: errors behaviour fix

Current error behaviour for ext2 and ext3 filesystems does not fully
correspond to the documentation and should be fixed.

According to man 8 mount, ext2 and ext3 file systems allow to set one of 3
different on-errors behaviours:

  ---- start of quote man 8 mount ----

  errors=continue / errors=remount-ro / errors=panic

    Define the behaviour when an error is encountered.  (Either ignore
    errors and just mark the file system erroneous and continue, or remount
    the file system read-only, or panic and halt the system.) The default is
    set in the filesystem superblock, and can be changed using tune2fs(8).

  ---- end of quote ----

However EXT3_ERRORS_CONTINUE is not read from the superblock, and thus
ERRORS_CONT is not saved on the sbi->s_mount_opt.  It leads to the incorrect
handle of errors on ext3.

Then we've checked corresponding code in ext2 and discovered that it is buggy
as well:

- EXT2_ERRORS_CONTINUE is not read from the superblock (the same);

- parse_option() does not clean the alternative values and thus something
  like (ERRORS_CONT|ERRORS_RO) can be set;

- if options are omitted, parse_option() does not set any of these options.

Therefore it is possible to set any combination of these options on the ext2:

- none of them may be set: EXT2_ERRORS_CONTINUE on superblock / empty mount
  options;

- any of them may be set using mount options;

- 2 any options may be set: by using EXT2_ERRORS_RO/EXT2_ERRORS_PANIC on the
  superblock and other value in mount options;

- and finally all three options may be set by adding third option in remount.

Currently ext2 uses these values only in ext2_error() and it is not leading to
any noticeable troubles.  However somebody may be discouraged when he will try
to workaround EXT2_ERRORS_PANIC on the superblock by using errors=continue in
mount options.

This patch:

EXT3_ERRORS_CONTINUE should be taken from the superblock as default value for
error behaviour.

Signed-off-by:	Dmitry Mishin <dim@openvz.org>
Acked-by:	Vasily Averin <vvs@sw.ru>
Acked-by: 	Kirill Korotaev <dev@openvz.org>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/super.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 8bfd56ef18ca..afc2d4f42d77 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1470,6 +1470,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		set_opt(sbi->s_mount_opt, ERRORS_PANIC);
 	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_RO)
 		set_opt(sbi->s_mount_opt, ERRORS_RO);
+	else
+		set_opt(sbi->s_mount_opt, ERRORS_CONT);
 
 	sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
 	sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
-- 
cgit v1.2.3


From 5a2b4062f5adf2218b9b021e1c33f374bf142cb2 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@sw.ru>
Date: Wed, 11 Oct 2006 01:21:50 -0700
Subject: [PATCH] ext2: errors behaviour fix

Current error behaviour for ext2 and ext3 filesystems does not fully
correspond to the documentation and should be fixed.

According to man 8 mount, ext2 and ext3 file systems allow to set one of 3
different on-errors behaviours:

  ---- start of quote man 8 mount ----

  errors=continue / errors=remount-ro / errors=panic

    Define the behaviour when an error is encountered.  (Either ignore
    errors and just mark the file system erroneous and continue, or remount
    the file system read-only, or panic and halt the system.) The default is
    set in the filesystem superblock, and can be changed using tune2fs(8).

  ---- end of quote ----

However EXT3_ERRORS_CONTINUE is not read from the superblock, and thus
ERRORS_CONT is not saved on the sbi->s_mount_opt.  It leads to the incorrect
handle of errors on ext3.

Then we've checked corresponding code in ext2 and discovered that it is buggy
as well:

- EXT2_ERRORS_CONTINUE is not read from the superblock (the same);

- parse_option() does not clean the alternative values and thus something
  like (ERRORS_CONT|ERRORS_RO) can be set;

- if options are omitted, parse_option() does not set any of these options.

Therefore it is possible to set any combination of these options on the ext2:

- none of them may be set: EXT2_ERRORS_CONTINUE on superblock / empty mount
  options;

- any of them may be set using mount options;

- 2 any options may be set: by using EXT2_ERRORS_RO/EXT2_ERRORS_PANIC on the
  superblock and other value in mount options;

- and finally all three options may be set by adding third option in remount.

Currently ext2 uses these values only in ext2_error() and it is not leading to
any noticeable troubles.  However somebody may be discouraged when he will try
to workaround EXT2_ERRORS_PANIC on the superblock by using errors=continue in
mount options.

This patch:

EXT2_ERRORS_CONTINUE should be read from the superblock as default value for
error behaviour.  parse_option() should clean the alternative options and
should not change default value taken from the superblock.

Signed-off-by: Vasily Averin <vvs@sw.ru>
Acked-by: Kirill Korotaev <dev@openvz.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext2/super.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 513cd421ac0b..d8b9abd95d07 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -364,7 +364,6 @@ static int parse_options (char * options,
 {
 	char * p;
 	substring_t args[MAX_OPT_ARGS];
-	unsigned long kind = EXT2_MOUNT_ERRORS_CONT;
 	int option;
 
 	if (!options)
@@ -404,13 +403,19 @@ static int parse_options (char * options,
 			/* *sb_block = match_int(&args[0]); */
 			break;
 		case Opt_err_panic:
-			kind = EXT2_MOUNT_ERRORS_PANIC;
+			clear_opt (sbi->s_mount_opt, ERRORS_CONT);
+			clear_opt (sbi->s_mount_opt, ERRORS_RO);
+			set_opt (sbi->s_mount_opt, ERRORS_PANIC);
 			break;
 		case Opt_err_ro:
-			kind = EXT2_MOUNT_ERRORS_RO;
+			clear_opt (sbi->s_mount_opt, ERRORS_CONT);
+			clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
+			set_opt (sbi->s_mount_opt, ERRORS_RO);
 			break;
 		case Opt_err_cont:
-			kind = EXT2_MOUNT_ERRORS_CONT;
+			clear_opt (sbi->s_mount_opt, ERRORS_RO);
+			clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
+			set_opt (sbi->s_mount_opt, ERRORS_CONT);
 			break;
 		case Opt_nouid32:
 			set_opt (sbi->s_mount_opt, NO_UID32);
@@ -489,7 +494,6 @@ static int parse_options (char * options,
 			return 0;
 		}
 	}
-	sbi->s_mount_opt |= kind;
 	return 1;
 }
 
@@ -715,6 +719,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 		set_opt(sbi->s_mount_opt, ERRORS_PANIC);
 	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_RO)
 		set_opt(sbi->s_mount_opt, ERRORS_RO);
+	else
+		set_opt(sbi->s_mount_opt, ERRORS_CONT);
 
 	sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
 	sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
-- 
cgit v1.2.3


From 39484e53bb00f55b6303a908070db133608ef2a5 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Wed, 11 Oct 2006 01:21:54 -0700
Subject: [PATCH] 32-bit compatibility HDIO IOCTLs

A couple of HDIO IOCTLs are not yet handled and a few others are marked
as using a pointer rather than an unsigned long.  The formers include:

HDIO_GET_WCACHE, HDIO_GET_ACOUSTIC, HDIO_GET_ADDRESS and
HDIO_GET_BUSSTATE.  The latters are: HDIO_SET_MULTCOUNT,
HDIO_SET_UNMASKINTR, HDIO_SET_KEEPSETTINGS, HDIO_SET_32BIT,
HDIO_SET_NOWERR, HDIO_SET_DMA, HDIO_SET_PIO_MODE and HDIO_SET_NICE.

Additionally 0x330 used to be HDIO_GETGEO_BIG and may be issued by 32-bit
`hdparm' run on a 64-bit kernel making Linux complain loudly.

This is a fix for these issues.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/compat_ioctl.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 27ca1aa30562..a91f2628c981 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -2438,13 +2438,17 @@ HANDLE_IOCTL(0x1260, broken_blkgetsize)
 HANDLE_IOCTL(BLKFRAGET, w_long)
 HANDLE_IOCTL(BLKSECTGET, w_long)
 HANDLE_IOCTL(BLKPG, blkpg_ioctl_trans)
-HANDLE_IOCTL(HDIO_GET_KEEPSETTINGS, hdio_ioctl_trans)
 HANDLE_IOCTL(HDIO_GET_UNMASKINTR, hdio_ioctl_trans)
-HANDLE_IOCTL(HDIO_GET_DMA, hdio_ioctl_trans)
-HANDLE_IOCTL(HDIO_GET_32BIT, hdio_ioctl_trans)
 HANDLE_IOCTL(HDIO_GET_MULTCOUNT, hdio_ioctl_trans)
+HANDLE_IOCTL(HDIO_GET_KEEPSETTINGS, hdio_ioctl_trans)
+HANDLE_IOCTL(HDIO_GET_32BIT, hdio_ioctl_trans)
 HANDLE_IOCTL(HDIO_GET_NOWERR, hdio_ioctl_trans)
+HANDLE_IOCTL(HDIO_GET_DMA, hdio_ioctl_trans)
 HANDLE_IOCTL(HDIO_GET_NICE, hdio_ioctl_trans)
+HANDLE_IOCTL(HDIO_GET_WCACHE, hdio_ioctl_trans)
+HANDLE_IOCTL(HDIO_GET_ACOUSTIC, hdio_ioctl_trans)
+HANDLE_IOCTL(HDIO_GET_ADDRESS, hdio_ioctl_trans)
+HANDLE_IOCTL(HDIO_GET_BUSSTATE, hdio_ioctl_trans)
 HANDLE_IOCTL(FDSETPRM32, fd_ioctl_trans)
 HANDLE_IOCTL(FDDEFPRM32, fd_ioctl_trans)
 HANDLE_IOCTL(FDGETPRM32, fd_ioctl_trans)
-- 
cgit v1.2.3


From 97e860d364aa9d08e895ecb619b9122ae2c70df8 Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Wed, 11 Oct 2006 01:21:59 -0700
Subject: [PATCH] Remove unnecessary check in fs/fat/inode.c

Aince all callers dereference sb, and this function does so earlier too, we
dont need the check.

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fat/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 4613cb202170..78945b53b0f8 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1472,7 +1472,7 @@ int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2)
 		ret = writeback_inode(i1);
 	if (!ret && i2)
 		ret = writeback_inode(i2);
-	if (!ret && sb) {
+	if (!ret) {
 		struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
 		ret = filemap_flush(mapping);
 	}
-- 
cgit v1.2.3


From 8c58165108e26d18849a0138c719e680f281197a Mon Sep 17 00:00:00 2001
From: Monakhov Dmitriy <dmonakhov@openvz.org>
Date: Wed, 11 Oct 2006 01:22:00 -0700
Subject: [PATCH] D-cache aliasing issue in __block_prepare_write

A couple of flush_dcache_page()s are missing on the I/O-error paths.

Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 2a7828c0e59b..f65ef8821c73 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1854,6 +1854,7 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
 			clear_buffer_new(bh);
 			kaddr = kmap_atomic(page, KM_USER0);
 			memset(kaddr+block_start, 0, bh->b_size);
+			flush_dcache_page(page);
 			kunmap_atomic(kaddr, KM_USER0);
 			set_buffer_uptodate(bh);
 			mark_buffer_dirty(bh);
@@ -2360,6 +2361,7 @@ failed:
 	 */
 	kaddr = kmap_atomic(page, KM_USER0);
 	memset(kaddr, 0, PAGE_CACHE_SIZE);
+	flush_dcache_page(page);
 	kunmap_atomic(kaddr, KM_USER0);
 	SetPageUptodate(page);
 	set_page_dirty(page);
-- 
cgit v1.2.3


From 2ecd05ae68a903761e736e9e0aca40d6ace4319e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 11 Oct 2006 01:22:05 -0700
Subject: [PATCH] fs/*: use BUILD_BUG_ON

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/afs/dir.c     |  8 ++++----
 fs/jffs2/super.c |  8 ++++----
 fs/minix/inode.c |  8 ++------
 fs/ocfs2/super.c |  2 +-
 fs/sysv/super.c  | 15 +++++----------
 5 files changed, 16 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index cf8a2cb28505..a6ec75c56fcf 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -211,8 +211,8 @@ static int afs_dir_open(struct inode *inode, struct file *file)
 {
 	_enter("{%lu}", inode->i_ino);
 
-	BUG_ON(sizeof(union afs_dir_block) != 2048);
-	BUG_ON(sizeof(union afs_dirent) != 32);
+	BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048);
+	BUILD_BUG_ON(sizeof(union afs_dirent) != 32);
 
 	if (AFS_FS_I(inode)->flags & AFS_VNODE_DELETED)
 		return -ENOENT;
@@ -446,8 +446,8 @@ static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry,
 	_enter("{%lu},%p{%s}", dir->i_ino, dentry, dentry->d_name.name);
 
 	/* insanity checks first */
-	BUG_ON(sizeof(union afs_dir_block) != 2048);
-	BUG_ON(sizeof(union afs_dirent) != 32);
+	BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048);
+	BUILD_BUG_ON(sizeof(union afs_dirent) != 32);
 
 	if (dentry->d_name.len > 255) {
 		_leave(" = -ENAMETOOLONG");
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 6de374513c01..bc4b8106a490 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -334,10 +334,10 @@ static int __init init_jffs2_fs(void)
 	   which means just 'no padding', without the alignment
 	   thing. But GCC doesn't have that -- we have to just
 	   hope the structs are the right sizes, instead. */
-	BUG_ON(sizeof(struct jffs2_unknown_node) != 12);
-	BUG_ON(sizeof(struct jffs2_raw_dirent) != 40);
-	BUG_ON(sizeof(struct jffs2_raw_inode) != 68);
-	BUG_ON(sizeof(struct jffs2_raw_summary) != 32);
+	BUILD_BUG_ON(sizeof(struct jffs2_unknown_node) != 12);
+	BUILD_BUG_ON(sizeof(struct jffs2_raw_dirent) != 40);
+	BUILD_BUG_ON(sizeof(struct jffs2_raw_inode) != 68);
+	BUILD_BUG_ON(sizeof(struct jffs2_raw_summary) != 32);
 
 	printk(KERN_INFO "JFFS2 version 2.2."
 #ifdef CONFIG_JFFS2_FS_WRITEBUFFER
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index c11a4b9fb863..1e36bae4d0eb 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -149,12 +149,8 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
 		return -ENOMEM;
 	s->s_fs_info = sbi;
 
-	/* N.B. These should be compile-time tests.
-	   Unfortunately that is impossible. */
-	if (32 != sizeof (struct minix_inode))
-		panic("bad V1 i-node size");
-	if (64 != sizeof(struct minix2_inode))
-		panic("bad V2 i-node size");
+	BUILD_BUG_ON(32 != sizeof (struct minix_inode));
+	BUILD_BUG_ON(64 != sizeof(struct minix2_inode));
 
 	if (!sb_set_blocksize(s, BLOCK_SIZE))
 		goto out_bad_hblock;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 4c29cd7cc8e6..76b46ebbb10c 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -339,7 +339,7 @@ static unsigned long long ocfs2_max_file_offset(unsigned int blockshift)
 
 #if BITS_PER_LONG == 32
 # if defined(CONFIG_LBD)
-	BUG_ON(sizeof(sector_t) != 8);
+	BUILD_BUG_ON(sizeof(sector_t) != 8);
 	pagefactor = PAGE_CACHE_SIZE;
 	bitshift = BITS_PER_LONG;
 # else
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index 350cba5d6803..dc9e7dc07fb7 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -358,16 +358,11 @@ static int sysv_fill_super(struct super_block *sb, void *data, int silent)
 	unsigned long blocknr;
 	int size = 0, i;
 	
-	if (1024 != sizeof (struct xenix_super_block))
-		panic("Xenix FS: bad superblock size");
-	if (512 != sizeof (struct sysv4_super_block))
-		panic("SystemV FS: bad superblock size");
-	if (512 != sizeof (struct sysv2_super_block))
-		panic("SystemV FS: bad superblock size");
-	if (500 != sizeof (struct coh_super_block))
-		panic("Coherent FS: bad superblock size");
-	if (64 != sizeof (struct sysv_inode))
-		panic("sysv fs: bad inode size");
+	BUILD_BUG_ON(1024 != sizeof (struct xenix_super_block));
+	BUILD_BUG_ON(512 != sizeof (struct sysv4_super_block));
+	BUILD_BUG_ON(512 != sizeof (struct sysv2_super_block));
+	BUILD_BUG_ON(500 != sizeof (struct coh_super_block));
+	BUILD_BUG_ON(64 != sizeof (struct sysv_inode));
 
 	sbi = kzalloc(sizeof(struct sysv_sb_info), GFP_KERNEL);
 	if (!sbi)
-- 
cgit v1.2.3


From edc666e2ff9ec2e4e9510f1127c68c22cffc93f6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 11 Oct 2006 01:22:14 -0700
Subject: [PATCH] ReiserFS: Make sure all dentries refs are released before
 calling kill_block_super()

Make sure all dentries refs are released before calling kill_block_super()
so that the assumption that generic_shutdown_super() can completely destroy
the dentry tree for there will be no external references holds true.

What was being done in the put_super() superblock op, is now done in the
kill_sb() filesystem op instead, prior to calling kill_block_super().

Changes made in [try #2]:

 (*) reiserfs_kill_sb() now checks that the superblock FS info pointer is set
     before trying to dereference it.

Signed-off-by: David Howells <dhowells@redhat.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: <reiserfs-dev@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/super.c | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index c89aa2338191..9041802df832 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -430,20 +430,29 @@ int remove_save_link(struct inode *inode, int truncate)
 	return journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT);
 }
 
-static void reiserfs_put_super(struct super_block *s)
+static void reiserfs_kill_sb(struct super_block *s)
 {
-	struct reiserfs_transaction_handle th;
-	th.t_trans_id = 0;
+	if (REISERFS_SB(s)) {
+		if (REISERFS_SB(s)->xattr_root) {
+			d_invalidate(REISERFS_SB(s)->xattr_root);
+			dput(REISERFS_SB(s)->xattr_root);
+			REISERFS_SB(s)->xattr_root = NULL;
+		}
 
-	if (REISERFS_SB(s)->xattr_root) {
-		d_invalidate(REISERFS_SB(s)->xattr_root);
-		dput(REISERFS_SB(s)->xattr_root);
+		if (REISERFS_SB(s)->priv_root) {
+			d_invalidate(REISERFS_SB(s)->priv_root);
+			dput(REISERFS_SB(s)->priv_root);
+			REISERFS_SB(s)->priv_root = NULL;
+		}
 	}
 
-	if (REISERFS_SB(s)->priv_root) {
-		d_invalidate(REISERFS_SB(s)->priv_root);
-		dput(REISERFS_SB(s)->priv_root);
-	}
+	kill_block_super(s);
+}
+
+static void reiserfs_put_super(struct super_block *s)
+{
+	struct reiserfs_transaction_handle th;
+	th.t_trans_id = 0;
 
 	/* change file system state to current state if it was mounted with read-write permissions */
 	if (!(s->s_flags & MS_RDONLY)) {
@@ -2156,7 +2165,7 @@ struct file_system_type reiserfs_fs_type = {
 	.owner = THIS_MODULE,
 	.name = "reiserfs",
 	.get_sb = get_super_block,
-	.kill_sb = kill_block_super,
+	.kill_sb = reiserfs_kill_sb,
 	.fs_flags = FS_REQUIRES_DEV,
 };
 
-- 
cgit v1.2.3


From 6ce315234aefcbc599dea390c15672156ebf9e7b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 11 Oct 2006 01:22:15 -0700
Subject: [PATCH] AUTOFS: Make sure all dentries refs are released before
 calling kill_anon_super()

Make sure all dentries refs are released before calling kill_anon_super() so
that the assumption that generic_shutdown_super() can completely destroy the
dentry tree for there will be no external references holds true.

What was being done in the put_super() superblock op, is now done in the
kill_sb() filesystem op instead, prior to calling kill_anon_super().

This makes the struct autofs_sb_info::root member variable redundant (since
sb->s_root is still available), and so that is removed.  The calls to
shrink_dcache_sb() are also removed since they're also redundant as
shrink_dcache_for_umount() will now be called after the cleanup routine.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Ian Kent <raven@themaw.net>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h |  3 +--
 fs/autofs4/init.c     |  2 +-
 fs/autofs4/inode.c    | 22 ++++------------------
 fs/autofs4/waitq.c    |  1 -
 4 files changed, 6 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 480ab178cba5..b13f32c8aeee 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -94,7 +94,6 @@ struct autofs_wait_queue {
 
 struct autofs_sb_info {
 	u32 magic;
-	struct dentry *root;
 	int pipefd;
 	struct file *pipe;
 	pid_t oz_pgrp;
@@ -229,4 +228,4 @@ out:
 }
 
 void autofs4_dentry_release(struct dentry *);
-
+extern void autofs4_kill_sb(struct super_block *);
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index 5d9193332bef..723a1c5e361b 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -24,7 +24,7 @@ static struct file_system_type autofs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "autofs",
 	.get_sb		= autofs_get_sb,
-	.kill_sb	= kill_anon_super,
+	.kill_sb	= autofs4_kill_sb,
 };
 
 static int __init init_autofs4_fs(void)
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 800ce876caec..51fd8595bf85 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -96,7 +96,7 @@ void autofs4_free_ino(struct autofs_info *ino)
  */
 static void autofs4_force_release(struct autofs_sb_info *sbi)
 {
-	struct dentry *this_parent = sbi->root;
+	struct dentry *this_parent = sbi->sb->s_root;
 	struct list_head *next;
 
 	spin_lock(&dcache_lock);
@@ -127,7 +127,7 @@ resume:
 		spin_lock(&dcache_lock);
 	}
 
-	if (this_parent != sbi->root) {
+	if (this_parent != sbi->sb->s_root) {
 		struct dentry *dentry = this_parent;
 
 		next = this_parent->d_u.d_child.next;
@@ -140,15 +140,9 @@ resume:
 		goto resume;
 	}
 	spin_unlock(&dcache_lock);
-
-	dput(sbi->root);
-	sbi->root = NULL;
-	shrink_dcache_sb(sbi->sb);
-
-	return;
 }
 
-static void autofs4_put_super(struct super_block *sb)
+void autofs4_kill_sb(struct super_block *sb)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(sb);
 
@@ -163,6 +157,7 @@ static void autofs4_put_super(struct super_block *sb)
 	kfree(sbi);
 
 	DPRINTK("shutting down");
+	kill_anon_super(sb);
 }
 
 static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
@@ -189,7 +184,6 @@ static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
 }
 
 static struct super_operations autofs4_sops = {
-	.put_super	= autofs4_put_super,
 	.statfs		= simple_statfs,
 	.show_options	= autofs4_show_options,
 };
@@ -315,7 +309,6 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 
 	s->s_fs_info = sbi;
 	sbi->magic = AUTOFS_SBI_MAGIC;
-	sbi->root = NULL;
 	sbi->pipefd = -1;
 	sbi->catatonic = 0;
 	sbi->exp_timeout = 0;
@@ -396,13 +389,6 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	sbi->pipe = pipe;
 	sbi->pipefd = pipefd;
 
-	/*
-	 * Take a reference to the root dentry so we get a chance to
-	 * clean up the dentry tree on umount.
-	 * See autofs4_force_release.
-	 */
-	sbi->root = dget(root);
-
 	/*
 	 * Success! Install the root dentry now to indicate completion.
 	 */
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index ce103e7b0bc3..c0a6c8d445c7 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -45,7 +45,6 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
 		fput(sbi->pipe);	/* Close the pipe */
 		sbi->pipe = NULL;
 	}
-	shrink_dcache_sb(sbi->sb);
 }
 
 static int autofs4_write(struct file *file, const void *addr, int bytes)
-- 
cgit v1.2.3


From c636ebdb186bf37f98d3839f69293597723edb36 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 11 Oct 2006 01:22:19 -0700
Subject: [PATCH] VFS: Destroy the dentries contributed by a superblock on
 unmounting

The attached patch destroys all the dentries attached to a superblock in one go
by:

 (1) Destroying the tree rooted at s_root.

 (2) Destroying every entry in the anon list, one at a time.

 (3) Each entry in the anon list has its subtree consumed from the leaves
     inwards.

This reduces the amount of work generic_shutdown_super() does, and avoids
iterating through the dentry_unused list.

Note that locking is almost entirely absent in the shrink_dcache_for_umount*()
functions added by this patch.  This is because:

 (1) at the point the filesystem calls generic_shutdown_super(), it is not
     permitted to further touch the superblock's set of dentries, and nor may
     it remove aliases from inodes;

 (2) the dcache memory shrinker now skips dentries that are being unmounted;
     and

 (3) the superblock no longer has any external references through which the VFS
     can reach it.

Given these points, the only locking we need to do is when we remove dentries
from the unused list and the name hashes, which we do a directory's worth at a
time.

We also don't need to guard against reference counts going to zero unexpectedly
and removing bits of the tree we're working on as nothing else can call dput().

A cut down version of dentry_iput() has been folded into
shrink_dcache_for_umount_subtree() function.  Apart from not needing to unlock
things, it also doesn't need to check for inotify watches.

In this version of the patch, the complaint about a dentry still being in use
has been expanded from a single BUG_ON() and now gives much more information.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: NeilBrown <neilb@suse.de>
Acked-by: Ian Kent <raven@themaw.net>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dcache.c | 130 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/super.c  |  12 +++---
 2 files changed, 136 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 2355bddad8de..2bac4ba1d1d3 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -548,6 +548,136 @@ repeat:
 	spin_unlock(&dcache_lock);
 }
 
+/*
+ * destroy a single subtree of dentries for unmount
+ * - see the comments on shrink_dcache_for_umount() for a description of the
+ *   locking
+ */
+static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
+{
+	struct dentry *parent;
+
+	BUG_ON(!IS_ROOT(dentry));
+
+	/* detach this root from the system */
+	spin_lock(&dcache_lock);
+	if (!list_empty(&dentry->d_lru)) {
+		dentry_stat.nr_unused--;
+		list_del_init(&dentry->d_lru);
+	}
+	__d_drop(dentry);
+	spin_unlock(&dcache_lock);
+
+	for (;;) {
+		/* descend to the first leaf in the current subtree */
+		while (!list_empty(&dentry->d_subdirs)) {
+			struct dentry *loop;
+
+			/* this is a branch with children - detach all of them
+			 * from the system in one go */
+			spin_lock(&dcache_lock);
+			list_for_each_entry(loop, &dentry->d_subdirs,
+					    d_u.d_child) {
+				if (!list_empty(&loop->d_lru)) {
+					dentry_stat.nr_unused--;
+					list_del_init(&loop->d_lru);
+				}
+
+				__d_drop(loop);
+				cond_resched_lock(&dcache_lock);
+			}
+			spin_unlock(&dcache_lock);
+
+			/* move to the first child */
+			dentry = list_entry(dentry->d_subdirs.next,
+					    struct dentry, d_u.d_child);
+		}
+
+		/* consume the dentries from this leaf up through its parents
+		 * until we find one with children or run out altogether */
+		do {
+			struct inode *inode;
+
+			if (atomic_read(&dentry->d_count) != 0) {
+				printk(KERN_ERR
+				       "BUG: Dentry %p{i=%lx,n=%s}"
+				       " still in use (%d)"
+				       " [unmount of %s %s]\n",
+				       dentry,
+				       dentry->d_inode ?
+				       dentry->d_inode->i_ino : 0UL,
+				       dentry->d_name.name,
+				       atomic_read(&dentry->d_count),
+				       dentry->d_sb->s_type->name,
+				       dentry->d_sb->s_id);
+				BUG();
+			}
+
+			parent = dentry->d_parent;
+			if (parent == dentry)
+				parent = NULL;
+			else
+				atomic_dec(&parent->d_count);
+
+			list_del(&dentry->d_u.d_child);
+			dentry_stat.nr_dentry--;	/* For d_free, below */
+
+			inode = dentry->d_inode;
+			if (inode) {
+				dentry->d_inode = NULL;
+				list_del_init(&dentry->d_alias);
+				if (dentry->d_op && dentry->d_op->d_iput)
+					dentry->d_op->d_iput(dentry, inode);
+				else
+					iput(inode);
+			}
+
+			d_free(dentry);
+
+			/* finished when we fall off the top of the tree,
+			 * otherwise we ascend to the parent and move to the
+			 * next sibling if there is one */
+			if (!parent)
+				return;
+
+			dentry = parent;
+
+		} while (list_empty(&dentry->d_subdirs));
+
+		dentry = list_entry(dentry->d_subdirs.next,
+				    struct dentry, d_u.d_child);
+	}
+}
+
+/*
+ * destroy the dentries attached to a superblock on unmounting
+ * - we don't need to use dentry->d_lock, and only need dcache_lock when
+ *   removing the dentry from the system lists and hashes because:
+ *   - the superblock is detached from all mountings and open files, so the
+ *     dentry trees will not be rearranged by the VFS
+ *   - s_umount is write-locked, so the memory pressure shrinker will ignore
+ *     any dentries belonging to this superblock that it comes across
+ *   - the filesystem itself is no longer permitted to rearrange the dentries
+ *     in this superblock
+ */
+void shrink_dcache_for_umount(struct super_block *sb)
+{
+	struct dentry *dentry;
+
+	if (down_read_trylock(&sb->s_umount))
+		BUG();
+
+	dentry = sb->s_root;
+	sb->s_root = NULL;
+	atomic_dec(&dentry->d_count);
+	shrink_dcache_for_umount_subtree(dentry);
+
+	while (!hlist_empty(&sb->s_anon)) {
+		dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash);
+		shrink_dcache_for_umount_subtree(dentry);
+	}
+}
+
 /*
  * Search for at least 1 mount point in the dentry's subdirs.
  * We descend to the next level whenever the d_subdirs
diff --git a/fs/super.c b/fs/super.c
index aec99ddbe53f..47e554c12e76 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -260,17 +260,17 @@ int fsync_super(struct super_block *sb)
  *	that need destruction out of superblock, call generic_shutdown_super()
  *	and release aforementioned objects.  Note: dentries and inodes _are_
  *	taken care of and do not need specific handling.
+ *
+ *	Upon calling this function, the filesystem may no longer alter or
+ *	rearrange the set of dentries belonging to this super_block, nor may it
+ *	change the attachments of dentries to inodes.
  */
 void generic_shutdown_super(struct super_block *sb)
 {
-	struct dentry *root = sb->s_root;
 	struct super_operations *sop = sb->s_op;
 
-	if (root) {
-		sb->s_root = NULL;
-		shrink_dcache_parent(root);
-		shrink_dcache_sb(sb);
-		dput(root);
+	if (sb->s_root) {
+		shrink_dcache_for_umount(sb);
 		fsync_super(sb);
 		lock_super(sb);
 		sb->s_flags &= ~MS_ACTIVE;
-- 
cgit v1.2.3


From bf02c082bf7a464518d45b9c178b8aa83f74dd5d Mon Sep 17 00:00:00 2001
From: Andreas Mohr <andi@rhlx01.fht-esslingen.de>
Date: Wed, 11 Oct 2006 01:22:24 -0700
Subject: [PATCH] fs/bio.c: tweaks

- Calculate a variable in bvec_alloc_bs() only once needed, not earlier
  (bio.o down from 18408 to 18376 Bytes, 32 Bytes saved, probably due to
  data locality improvements).

- Init variable idx to silence a gcc warning which already existed in the
  unmodified original base file (bvec_alloc_bs() handles idx correctly, so
  there's no need for the warning):

	fs/bio.c: In function `bio_alloc_bioset':
	fs/bio.c:169: warning: `idx' may be used uninitialized in this function

Signed-off-by: Andreas Mohr <andi@lisas.de>
Acked-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/bio.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/bio.c b/fs/bio.c
index 8f93e939f213..f95c8749499f 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -79,7 +79,6 @@ static struct bio_set *fs_bio_set;
 static inline struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs)
 {
 	struct bio_vec *bvl;
-	struct biovec_slab *bp;
 
 	/*
 	 * see comment near bvec_array define!
@@ -98,10 +97,12 @@ static inline struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned lon
 	 * idx now points to the pool we want to allocate from
 	 */
 
-	bp = bvec_slabs + *idx;
 	bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
-	if (bvl)
+	if (bvl) {
+		struct biovec_slab *bp = bvec_slabs + *idx;
+
 		memset(bvl, 0, bp->nr_vecs * sizeof(struct bio_vec));
+	}
 
 	return bvl;
 }
@@ -166,7 +167,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 
 		bio_init(bio);
 		if (likely(nr_iovecs)) {
-			unsigned long idx;
+			unsigned long idx = 0; /* shut up gcc */
 
 			bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
 			if (unlikely(!bvl)) {
-- 
cgit v1.2.3


From 4b4fcaa1a9eec90b44b66a67af6e130349ba008e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Wed, 11 Oct 2006 17:25:45 +0100
Subject: [PATCH] misuse of strstr

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/gfs2/locking/dlm/mount.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
index 1f94dd35a943..cdd1694e889b 100644
--- a/fs/gfs2/locking/dlm/mount.c
+++ b/fs/gfs2/locking/dlm/mount.c
@@ -45,7 +45,7 @@ static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp,
 	strncpy(buf, table_name, 256);
 	buf[255] = '\0';
 
-	p = strstr(buf, ":");
+	p = strchr(buf, ':');
 	if (!p) {
 		log_info("invalid table_name \"%s\"", table_name);
 		kfree(ls);
-- 
cgit v1.2.3


From 70903ca004fef17b0f6483714baefdb2f6ecceb0 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Wed, 11 Oct 2006 18:49:24 +0000
Subject: [CIFS] Do not need to adjust for Jan/Feb for leap day calculation in
 2100 (year divisible by 100)

Signed-off-by: Yehuda Sadeh Weinraub <Yehuda.Sadeh@expand.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/netmisc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 32562d199552..3adbd128e08e 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -956,7 +956,8 @@ struct timespec cnvrtDosUnixTm(__u16 date, __u16 time)
 		days = days - 1;  /* do not count leap year for the year 2100 */
 
 	/* adjust for leap year where we are still before leap day */
-	days -= ((year & 0x03) == 0) && (month < 2 ? 1 : 0);
+	if(year != 120)
+		days -= ((year & 0x03) == 0) && (month < 2 ? 1 : 0);
 	sec += 24 * 60 * 60 * days; 
 
 	ts.tv_sec = sec;
-- 
cgit v1.2.3


From 533f90af6d90b9e4859a158565385d1d84a79f75 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 12 Oct 2006 00:02:32 +0000
Subject: [CIFS] Fix old DOS time conversion to handle timezone

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/netmisc.c |  2 +-
 fs/cifs/readdir.c | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 3adbd128e08e..992e80edc720 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -942,7 +942,7 @@ struct timespec cnvrtDosUnixTm(__u16 date, __u16 time)
 		cERROR(1,("illegal date, month %d day: %d", month, days));
 	month -= 1;
 	days += total_days_of_prev_months[month];
-	days += 3653; /* account for difference in days between 1980 and 1970 */
+	days += 3652; /* account for difference in days between 1980 and 1970 */
 	year = sd->Year;
 	days += year * 365;
 	days += (year/4); /* leap year */
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 81e7b2e5fb4d..80e6ebd440a8 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -106,6 +106,17 @@ static int construct_dentry(struct qstr *qstring, struct file *file,
 	return rc;
 }
 
+static void AdjustForTZ(struct cifsTconInfo * tcon, struct inode * inode)
+{
+	if((tcon) && (tcon->ses) && (tcon->ses->server)) {
+		inode->i_ctime.tv_sec += tcon->ses->server.timeAdj;
+		inode->i_mtime.tv_sec += tcon->ses->server.timeAdj;
+		inode->i_atime.tv_sec += tcon->ses->server.timeAdj;
+	}
+	return;
+}
+
+
 static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
 		char * buf, int *pobject_type, int isNewInode)
 {
@@ -148,7 +159,7 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
                 tmp_inode->i_ctime = cnvrtDosUnixTm(
                                 le16_to_cpu(pfindData->LastWriteDate),
                                 le16_to_cpu(pfindData->LastWriteTime));
-
+		AdjustForTZ(cifs_sb->tcon, tmp_inode);
 		attr = le16_to_cpu(pfindData->Attributes);
 		allocation_size = le32_to_cpu(pfindData->AllocationSize);
 		end_of_file = le32_to_cpu(pfindData->DataSize);
-- 
cgit v1.2.3


From ddae957da48cc381c1472a8909905e1818e4afdd Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 12 Oct 2006 01:23:29 +0000
Subject: [CIFS] fix typo in previous patch

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/readdir.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 80e6ebd440a8..acbabc09543f 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -109,9 +109,9 @@ static int construct_dentry(struct qstr *qstring, struct file *file,
 static void AdjustForTZ(struct cifsTconInfo * tcon, struct inode * inode)
 {
 	if((tcon) && (tcon->ses) && (tcon->ses->server)) {
-		inode->i_ctime.tv_sec += tcon->ses->server.timeAdj;
-		inode->i_mtime.tv_sec += tcon->ses->server.timeAdj;
-		inode->i_atime.tv_sec += tcon->ses->server.timeAdj;
+		inode->i_ctime.tv_sec += tcon->ses->server->timeAdj;
+		inode->i_mtime.tv_sec += tcon->ses->server->timeAdj;
+		inode->i_atime.tv_sec += tcon->ses->server->timeAdj;
 	}
 	return;
 }
-- 
cgit v1.2.3


From acf1a1b1043327b2179ea529730358e58c7c277e Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 12 Oct 2006 03:28:28 +0000
Subject: [CIFS] Level 1 QPathInfo needed for proper OS2 support

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsproto.h |  1 +
 fs/cifs/cifssmb.c   | 21 ++++++++++++++++++---
 fs/cifs/inode.c     |  5 ++++-
 3 files changed, 23 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 7dd2f48a4073..4a4fd2dbca63 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -119,6 +119,7 @@ extern int CIFSFindClose(const int, struct cifsTconInfo *tcon,
 extern int CIFSSMBQPathInfo(const int xid, struct cifsTconInfo *tcon,
 			const unsigned char *searchName,
 			FILE_ALL_INFO * findData,
+			int legacy /* whether to use old info level */,
 			const struct nls_table *nls_codepage, int remap);
 extern int SMBQueryInformation(const int xid, struct cifsTconInfo *tcon,
                         const unsigned char *searchName,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 79a01d35a783..6f50f2bc8870 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2969,6 +2969,7 @@ int
 CIFSSMBQPathInfo(const int xid, struct cifsTconInfo *tcon,
 		 const unsigned char *searchName,
 		 FILE_ALL_INFO * pFindData,
+		 int legacy /* old style infolevel */,
 		 const struct nls_table *nls_codepage, int remap)
 {
 /* level 263 SMB_QUERY_FILE_ALL_INFO */
@@ -3017,7 +3018,10 @@ QPathInfoRetry:
 	byte_count = params + 1 /* pad */ ;
 	pSMB->TotalParameterCount = cpu_to_le16(params);
 	pSMB->ParameterCount = pSMB->TotalParameterCount;
-	pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO);
+	if(legacy)
+		pSMB->InformationLevel = cpu_to_le16(SMB_INFO_STANDARD);
+	else
+		pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO);
 	pSMB->Reserved4 = 0;
 	pSMB->hdr.smb_buf_length += byte_count;
 	pSMB->ByteCount = cpu_to_le16(byte_count);
@@ -3029,13 +3033,24 @@ QPathInfoRetry:
 	} else {		/* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
-		if (rc || (pSMBr->ByteCount < 40)) 
+		if (rc) /* BB add auto retry on EOPNOTSUPP? */
+			rc = -EIO;
+		else if (!legacy && (pSMBr->ByteCount < 40)) 
 			rc = -EIO;	/* bad smb */
+		else if(legacy && (pSMBr->ByteCount < 24))
+			rc = -EIO;  /* 24 or 26 expected but we do not read last field */
 		else if (pFindData){
+			int size;
 			__u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
+			if(legacy) /* we do not read the last field, EAsize, fortunately
+					   since it varies by subdialect and on Set vs. Get, is  
+					   two bytes or 4 bytes depending but we don't care here */
+				size = sizeof(FILE_INFO_STANDARD);
+			else
+				size = sizeof(FILE_ALL_INFO);
 			memcpy((char *) pFindData,
 			       (char *) &pSMBr->hdr.Protocol +
-			       data_offset, sizeof (FILE_ALL_INFO));
+			       data_offset, size);
 		} else
 		    rc = -ENOMEM;
 	}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 06dbce3a1815..fe6d21f99964 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -338,6 +338,7 @@ int cifs_get_inode_info(struct inode **pinode,
 		pfindData = (FILE_ALL_INFO *)buf;
 		/* could do find first instead but this returns more info */
 		rc = CIFSSMBQPathInfo(xid, pTcon, search_path, pfindData,
+			      0 /* not legacy */,
 			      cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
 				CIFS_MOUNT_MAP_SPECIAL_CHR);
 		/* BB optimize code so we do not make the above call
@@ -385,8 +386,10 @@ int cifs_get_inode_info(struct inode **pinode,
 		/* get new inode */
 		if (*pinode == NULL) {
 			*pinode = new_inode(sb);
-			if (*pinode == NULL)
+			if (*pinode == NULL) {
+				kfree(buf);
 				return -ENOMEM;
+			}
 			/* Is an i_ino of zero legal? Can we use that to check
 			   if the server supports returning inode numbers?  Are
 			   there other sanity checks we can use to ensure that
-- 
cgit v1.2.3


From e6e80f294c2688302f41959c57acfee7e131e489 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Wed, 11 Oct 2006 10:03:09 +0200
Subject: [PATCH] splice: fix pipe_to_file() ->prepare_write() error path

Don't jump to the unlock+release path, we already did that.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/splice.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/splice.c b/fs/splice.c
index 13e92dd19fbb..a567010b62ac 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -607,7 +607,7 @@ find_page:
 			ret = -ENOMEM;
 			page = page_cache_alloc_cold(mapping);
 			if (unlikely(!page))
-				goto out_nomem;
+				goto out_ret;
 
 			/*
 			 * This will also lock the page
@@ -666,7 +666,7 @@ find_page:
 		if (sd->pos + this_len > isize)
 			vmtruncate(mapping->host, isize);
 
-		goto out;
+		goto out_ret;
 	}
 
 	if (buf->page != page) {
@@ -698,7 +698,7 @@ find_page:
 out:
 	page_cache_release(page);
 	unlock_page(page);
-out_nomem:
+out_ret:
 	return ret;
 }
 
-- 
cgit v1.2.3


From d58cdfb89ce0c6bd5f81ae931a984ef298dbda20 Mon Sep 17 00:00:00 2001
From: Vasily Tarasov <vtaras@openvz.org>
Date: Thu, 12 Oct 2006 15:09:51 +0200
Subject: [PATCH] block layer: ioprio_best function fix

Currently ioprio_best function first checks wethere aioprio or bioprio equals
IOPRIO_CLASS_NONE (ioprio_valid() macros does that) and if it is so it returns
bioprio/aioprio appropriately. Thus the next four lines, that set aclass/bclass
to IOPRIO_CLASS_BE, if aclass/bclass == IOPRIO_CLASS_NONE, are never executed.

The second problem: if aioprio from class IOPRIO_CLASS_NONE and bioprio from
class IOPRIO_CLASS_IDLE are passed to ioprio_best function, it will return
IOPRIO_CLASS_IDLE. It means that during __make_request we can merge two
requests and set the priority of merged request to IDLE, while one of
the initial requests originates from a process with NONE (default) priority.
So we can get a situation when a process with default ioprio will experience
IO starvation, while there is no process from real-time class in the system.

Just removing ioprio_valid check should correct situation.

Signed-off-by: Vasily Tarasov <vtaras@openvz.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/ioprio.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'fs')

diff --git a/fs/ioprio.c b/fs/ioprio.c
index 6dc6721d9e82..89e8da112a75 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -150,11 +150,6 @@ int ioprio_best(unsigned short aprio, unsigned short bprio)
 	unsigned short aclass = IOPRIO_PRIO_CLASS(aprio);
 	unsigned short bclass = IOPRIO_PRIO_CLASS(bprio);
 
-	if (!ioprio_valid(aprio))
-		return bprio;
-	if (!ioprio_valid(bprio))
-		return aprio;
-
 	if (aclass == IOPRIO_CLASS_NONE)
 		aclass = IOPRIO_CLASS_BE;
 	if (bclass == IOPRIO_CLASS_NONE)
-- 
cgit v1.2.3


From 230a03950ecd63bc613c6adeffbe9049189d9f05 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Thu, 12 Oct 2006 15:07:55 +0000
Subject: [CIFS] cifs Kconfig: don't select CONNECTOR `select' is a bit
 obnoxious: the option keeps on coming back and it's hard to work out what to
 do to make it go away again. The use of `depends on' is preferred (although
 it has usability problems too..)

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index 530581628311..6865a33544d5 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1827,7 +1827,7 @@ config CIFS_EXPERIMENTAL
 config CIFS_UPCALL
 	  bool "Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)"
 	  depends on CIFS_EXPERIMENTAL
-	  select CONNECTOR
+	  depends on CONNECTOR
 	  help
 	    Enables an upcall mechanism for CIFS which will be used to contact
 	    userspace helper utilities to provide SPNEGO packaged Kerberos
-- 
cgit v1.2.3


From d103e164bee2f21d0efe7d713cbbb0a443ba480d Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 12 Oct 2006 17:49:24 +0000
Subject: [CIFS] Workaround incomplete byte length returned by some servers on
 small SMB responses

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsproto.h |  2 +-
 fs/cifs/misc.c      | 42 +++++++++++++++++++++++++++++-------------
 2 files changed, 30 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 4a4fd2dbca63..f1f8225102f0 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -55,7 +55,7 @@ extern int SendReceiveBlockingLock(const unsigned int /* xid */ ,
 				struct smb_hdr * /* input */ ,
 				struct smb_hdr * /* out */ ,
 				int * /* bytes returned */);
-extern int checkSMB(struct smb_hdr *smb, __u16 mid, int length);
+extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length);
 extern int is_valid_oplock_break(struct smb_hdr *smb, struct TCP_Server_Info *);
 extern int is_size_safe_to_change(struct cifsInodeInfo *);
 extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index ca6e9b1413fa..bbc9cd34b6ea 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -418,26 +418,42 @@ checkSMBhdr(struct smb_hdr *smb, __u16 mid)
 }
 
 int
-checkSMB(struct smb_hdr *smb, __u16 mid, int length)
+checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
 {
 	__u32 len = smb->smb_buf_length;
 	__u32 clc_len;  /* calculated length */
 	cFYI(0, ("checkSMB Length: 0x%x, smb_buf_length: 0x%x", length, len));
-	if (((unsigned int)length < 2 + sizeof (struct smb_hdr)) ||
-	    (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4)) {
-		if ((unsigned int)length < 2 + sizeof (struct smb_hdr)) {
-			if (((unsigned int)length >= 
-				sizeof (struct smb_hdr) - 1)
+
+	if (length < 2 + sizeof (struct smb_hdr)) {
+		if ((length >= sizeof (struct smb_hdr) - 1)
 			    && (smb->Status.CifsError != 0)) {
-				smb->WordCount = 0;
-				/* some error cases do not return wct and bcc */
+			smb->WordCount = 0;
+			/* some error cases do not return wct and bcc */
+			return 0;
+		} else if ((length == sizeof(struct smb_hdr) + 1) && 
+				(smb->WordCount == 0)) {
+			char * tmp = (char *)smb;
+			/* Need to work around a bug in two servers here */
+			/* First, check if the part of bcc they sent was zero */
+			if (tmp[sizeof(struct smb_hdr)] == 0) {
+				/* some servers return only half of bcc
+				 * on simple responses (wct, bcc both zero)
+				 * in particular have seen this on
+				 * ulogoffX and FindClose. This leaves
+				 * one byte of bcc potentially unitialized
+				 */
+				/* zero rest of bcc */
+				tmp[sizeof(struct smb_hdr)+1] = 0;
 				return 0;
-			} else {
-				cERROR(1, ("Length less than smb header size"));
 			}
+			cERROR(1,("rcvd invalid byte count (bcc)"));
+		} else {
+			cERROR(1, ("Length less than smb header size"));
 		}
-		if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4)
-			cERROR(1, ("smb length greater than MaxBufSize, mid=%d",
+		return 1;
+	}
+	if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
+		cERROR(1, ("smb length greater than MaxBufSize, mid=%d",
 				   smb->Mid));
 		return 1;
 	}
@@ -446,7 +462,7 @@ checkSMB(struct smb_hdr *smb, __u16 mid, int length)
 		return 1;
 	clc_len = smbCalcSize_LE(smb);
 
-	if(4 + len != (unsigned int)length) {
+	if(4 + len != length) {
 		cERROR(1, ("Length read does not match RFC1001 length %d",len));
 		return 1;
 	}
-- 
cgit v1.2.3


From 52ae7b7935a079aaba25da98fe90772d04109f26 Mon Sep 17 00:00:00 2001
From: Russell Cattelan <cattelan@redhat.com>
Date: Mon, 9 Oct 2006 12:11:54 -0500
Subject: [GFS2] Fix a size calculation error

Fix a size calculation error.
The size was incorrect being computed as a
negative length and then being passed to an
unsigned parameter.

This in turn would cause the allocator to
think it needed enough meta data to store
a gigabyte file for every file created.

Signed-off-by: Russell Cattelan <cattelan@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ops_address.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 4fb743f4e4a4..bdf56cf66224 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -370,15 +370,17 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
 	loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + from;
 	loff_t end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
 	struct gfs2_alloc *al;
+	unsigned int write_len = to - from;
+
 
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|GL_AOP, &ip->i_gh);
 	error = gfs2_glock_nq_m_atime(1, &ip->i_gh);
 	if (error)
 		goto out_uninit;
 
-	gfs2_write_calc_reserv(ip, to - from, &data_blocks, &ind_blocks);
+	gfs2_write_calc_reserv(ip, write_len, &data_blocks, &ind_blocks);
 
-	error = gfs2_write_alloc_required(ip, pos, from - to, &alloc_required);
+	error = gfs2_write_alloc_required(ip, pos, write_len, &alloc_required);
 	if (error)
 		goto out_unlock;
 
-- 
cgit v1.2.3


From f5c54804d9e3bb23d8924af09d9ca1c8de9560b6 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 10 Oct 2006 13:45:15 -0400
Subject: [GFS2] Fix uninitialised variable

This fixes a bug where, in certain cases an uninitialised variable
could cause a dereference of a NULL pointer in gfs2_commit_write().
Also a typo in a comment is fixed at the same time.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ops_address.c | 1 +
 fs/gfs2/rgrp.h        | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index bdf56cf66224..99c933762eb2 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -385,6 +385,7 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
 		goto out_unlock;
 
 
+	ip->i_alloc.al_requested = 0;
 	if (alloc_required) {
 		al = gfs2_alloc_get(ip);
 
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 9eedfd12bfff..b01e0cfc99b5 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -32,7 +32,7 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd);
 struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
 static inline void gfs2_alloc_put(struct gfs2_inode *ip)
 {
-	return; /* Se we can see where ip->i_alloc is used */
+	return; /* So we can see where ip->i_alloc is used */
 }
 
 int gfs2_inplace_reserve_i(struct gfs2_inode *ip,
-- 
cgit v1.2.3


From 1ee48af22ed6dcddea8cdf93c7f2a268cbcf0d56 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Sun, 8 Oct 2006 04:30:48 +0200
Subject: [DLM] Kconfig: don't show an empty DLM menu

Don't show an empty "Distributed Lock Manager" menu if IP_SCTP=n.

Reported by Dmytro Bagrii in kernel Bugzilla #7268.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Patrick Caulfield <pcaulfie@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/Kconfig | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index 490f85b3fa59..81b2c6465eeb 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -1,10 +1,9 @@
 menu "Distributed Lock Manager"
-	depends on INET && EXPERIMENTAL
+	depends on INET && IP_SCTP && EXPERIMENTAL
 
 config DLM
 	tristate "Distributed Lock Manager (DLM)"
 	depends on IPV6 || IPV6=n
-	depends on IP_SCTP
 	select CONFIGFS_FS
 	help
 	A general purpose distributed lock manager for kernel or userspace
-- 
cgit v1.2.3


From fe1a698ffef5af546dd4a8cd6a1f2f202491c4ef Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 11 Oct 2006 13:34:59 -0400
Subject: [GFS2] Fix bug where lock not held

The log lock needs to be held when manipulating the counter
for the number of free journal blocks.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/log.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 554fe5bd1b72..72eec6542d6a 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -569,16 +569,15 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 	else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle)
 		log_write_header(sdp, 0, PULL);
 	lops_after_commit(sdp, ai);
-	sdp->sd_log_head = sdp->sd_log_flush_head;
 
+	gfs2_log_lock(sdp);
+	sdp->sd_log_head = sdp->sd_log_flush_head;
 	sdp->sd_log_blks_free -= sdp->sd_log_num_hdrs;
-
 	sdp->sd_log_blks_reserved = 0;
 	sdp->sd_log_commited_buf = 0;
 	sdp->sd_log_num_hdrs = 0;
 	sdp->sd_log_commited_revoke = 0;
 
-	gfs2_log_lock(sdp);
 	if (!list_empty(&ai->ai_ail1_list)) {
 		list_add(&ai->ai_list, &sdp->sd_ail1_list);
 		ai = NULL;
-- 
cgit v1.2.3


From c312c4fdc88514dd9522b7858eb879e610aeb9b1 Mon Sep 17 00:00:00 2001
From: Russell Cattelan <cattelan@redhat.com>
Date: Thu, 12 Oct 2006 09:23:41 -0400
Subject: [GFS2] Pass the correct value to kunmap_atomic

Pass kaddr rather than (incorrect) struct page to kunmap_atomic.

Signed-off-by: Russell Cattelan <cattelan@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/lops.c        | 4 ++--
 fs/gfs2/ops_address.c | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 881e337b6a70..ab6d1115f95d 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -492,7 +492,7 @@ static int gfs2_check_magic(struct buffer_head *bh)
 	ptr = kaddr + bh_offset(bh);
 	if (*ptr == cpu_to_be32(GFS2_MAGIC))
 		rv = 1;
-	kunmap_atomic(page, KM_USER0);
+	kunmap_atomic(kaddr, KM_USER0);
 
 	return rv;
 }
@@ -626,7 +626,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
 				memcpy(bh->b_data,
 				       kaddr + bh_offset(bd2->bd_bh),
 				       sdp->sd_sb.sb_bsize);
-				kunmap_atomic(page, KM_USER0);
+				kunmap_atomic(kaddr, KM_USER0);
 				*(__be32 *)bh->b_data = 0;
 			} else {
 				bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 99c933762eb2..e0599fed99ce 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -162,7 +162,7 @@ static int zero_readpage(struct page *page)
 
 	kaddr = kmap_atomic(page, KM_USER0);
 	memset(kaddr, 0, PAGE_CACHE_SIZE);
-	kunmap_atomic(page, KM_USER0);
+	kunmap_atomic(kaddr, KM_USER0);
 
 	SetPageUptodate(page);
 
@@ -195,7 +195,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
 	memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode),
 	       ip->i_di.di_size);
 	memset(kaddr + ip->i_di.di_size, 0, PAGE_CACHE_SIZE - ip->i_di.di_size);
-	kunmap_atomic(page, KM_USER0);
+	kunmap_atomic(kaddr, KM_USER0);
 
 	brelse(dibh);
 
@@ -485,7 +485,7 @@ static int gfs2_commit_write(struct file *file, struct page *page,
 		kaddr = kmap_atomic(page, KM_USER0);
 		memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from,
 		       kaddr + from, to - from);
-		kunmap_atomic(page, KM_USER0);
+		kunmap_atomic(kaddr, KM_USER0);
 
 		SetPageUptodate(page);
 
-- 
cgit v1.2.3


From 4c5e1b1a8c3f591b21f09001d6748296ddff33b8 Mon Sep 17 00:00:00 2001
From: Patrick Caulfield <pcaulfie@redhat.com>
Date: Thu, 12 Oct 2006 10:41:22 +0100
Subject: [DLM] fix iovec length in recvmsg

The DLM always passes the iovec length as 1, this is wrong when the circular
buffer wraps round.

Signed-Off-By: Patrick Caulfield <pcaulfie@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/lowcomms.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 7bcea7c5addb..867f93d0417e 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -548,7 +548,7 @@ static int receive_from_sock(void)
 	}
 	len = iov[0].iov_len + iov[1].iov_len;
 
-	r = ret = kernel_recvmsg(sctp_con.sock, &msg, iov, 1, len,
+	r = ret = kernel_recvmsg(sctp_con.sock, &msg, iov, msg.msg_iovlen, len,
 				 MSG_NOSIGNAL | MSG_DONTWAIT);
 	if (ret <= 0)
 		goto out_close;
-- 
cgit v1.2.3


From 1a4e15a04ec69cb3552f4120079f5472377df5f7 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 12 Oct 2006 21:33:51 +0000
Subject: [CIFS] Missing flags2 for DFS

Partly suggested by Igor Mammedov

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifssmb.c | 8 ++++++++
 fs/cifs/connect.c | 4 +++-
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 6f50f2bc8870..5dc5a966bd5f 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -3675,6 +3675,14 @@ getDFSRetry:
 		strncpy(pSMB->RequestFileName, searchName, name_len);
 	}
 
+	if(ses->server) {
+		if(ses->server->secMode &
+		   (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
+			pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
+	}
+
+        pSMB->hdr.Uid = ses->Suid;
+
 	params = 2 /* level */  + name_len /*includes null */ ;
 	pSMB->TotalDataCount = 0;
 	pSMB->DataCount = 0;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index c96f3edf1b9c..1d17691086c2 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3219,7 +3219,9 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
 			}
 			/* else do not bother copying these informational fields */
 		}
-		if(smb_buffer_response->WordCount == 3)
+		if((smb_buffer_response->WordCount == 3) ||
+			 (smb_buffer_response->WordCount == 7))
+			/* field is in same location */
 			tcon->Flags = le16_to_cpu(pSMBr->OptionalSupport);
 		else
 			tcon->Flags = 0;
-- 
cgit v1.2.3


From 7f14daa19ea36b200d237ad3ac5826ae25360461 Mon Sep 17 00:00:00 2001
From: Petr Vandrovec <petr@vandrovec.name>
Date: Fri, 13 Oct 2006 04:13:16 +0200
Subject: [PATCH] Get core dump code to work...

The file based core dump code was broken by pipe changes - a relative
llseek returns the absolute file position on success, not the relative
one, so dump_seek() always failed when invoked with non-zero current
position.

Only success/failure can be tested with relative lseek, we have to trust
kernel that on success we've got right file offset.  With this fix in
place I have finally real core files instead of 1KB fragments...

Signed-off-by: Petr Vandrovec <petr@vandrovec.name>
[ Cleaned it up a bit while here - use SEEK_CUR instead of hardcoding 1 ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_elf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 06435f3665f4..561006127902 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1152,7 +1152,7 @@ static int dump_write(struct file *file, const void *addr, int nr)
 static int dump_seek(struct file *file, loff_t off)
 {
 	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
-		if (file->f_op->llseek(file, off, 1) != off)
+		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
 			return 0;
 	} else {
 		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
-- 
cgit v1.2.3


From 8f6cff98477edbcd8ae4976734ba7edd07bdd244 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Fri, 13 Oct 2006 12:42:36 -0500
Subject: JFS: pageno needs to be long

diRead and diWrite are representing the page number as an unsigned int.
This causes file system corruption on volumes larger than 16TB.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/jfs_imap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 489a3d63002d..ee9b473b7b80 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -318,7 +318,7 @@ int diRead(struct inode *ip)
 	struct inomap *imap;
 	int block_offset;
 	int inodes_left;
-	uint pageno;
+	unsigned long pageno;
 	int rel_inode;
 
 	jfs_info("diRead: ino = %ld", ip->i_ino);
@@ -606,7 +606,7 @@ int diWrite(tid_t tid, struct inode *ip)
 	int block_offset;
 	int inodes_left;
 	struct metapage *mp;
-	uint pageno;
+	unsigned long pageno;
 	int rel_inode;
 	int dioffset;
 	struct inode *ipimap;
-- 
cgit v1.2.3


From 733f99acc82543030ce0417e2f2201ddc63097af Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 14 Oct 2006 16:48:26 +0100
Subject: [PATCH] new cifs endianness bugs

* missing cpu_to_le64() for ChangeTime (introduced by
    [CIFS] Legacy time handling for Win9x and OS/2 part 1)
* missing le16_to_cpu() for DialectIndex (introduced by
    [CIFS] Do not send newer QFSInfo to legacy servers which can not support it)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cifs/cifssmb.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 5dc5a966bd5f..098790eb2aa1 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -399,6 +399,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 	struct TCP_Server_Info * server;
 	u16 count;
 	unsigned int secFlags;
+	u16 dialect;
 
 	if(ses->server)
 		server = ses->server;
@@ -438,9 +439,10 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 	if (rc != 0) 
 		goto neg_err_exit;
 
-	cFYI(1,("Dialect: %d", pSMBr->DialectIndex));
+	dialect = le16_to_cpu(pSMBr->DialectIndex);
+	cFYI(1,("Dialect: %d", dialect));
 	/* Check wct = 1 error case */
-	if((pSMBr->hdr.WordCount < 13) || (pSMBr->DialectIndex == BAD_PROT)) {
+	if((pSMBr->hdr.WordCount < 13) || (dialect == BAD_PROT)) {
 		/* core returns wct = 1, but we do not ask for core - otherwise
 		small wct just comes when dialect index is -1 indicating we 
 		could not negotiate a common dialect */
@@ -448,8 +450,8 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 		goto neg_err_exit;
 #ifdef CONFIG_CIFS_WEAK_PW_HASH 
 	} else if((pSMBr->hdr.WordCount == 13)
-			&& ((pSMBr->DialectIndex == LANMAN_PROT)
-				|| (pSMBr->DialectIndex == LANMAN2_PROT))) {
+			&& ((dialect == LANMAN_PROT)
+				|| (dialect == LANMAN2_PROT))) {
 		__s16 tmp;
 		struct lanman_neg_rsp * rsp = (struct lanman_neg_rsp *)pSMBr;
 
@@ -2943,7 +2945,7 @@ QInfRetry:
 		ts.tv_nsec = 0;
 		ts.tv_sec = time;
 		/* decode time fields */
-		pFinfo->ChangeTime = cifs_UnixTimeToNT(ts);
+		pFinfo->ChangeTime = cpu_to_le64(cifs_UnixTimeToNT(ts));
 		pFinfo->LastWriteTime = pFinfo->ChangeTime;
 		pFinfo->LastAccessTime = 0;
 		pFinfo->AllocationSize =
-- 
cgit v1.2.3


From a7a0d86f5aa40a2215e36fe21d7911cf718ba428 Mon Sep 17 00:00:00 2001
From: Petr Vandrovec <petr@vandrovec.name>
Date: Fri, 13 Oct 2006 18:42:07 +0200
Subject: [PATCH] Fix core files so they make sense to gdb...

It is silly to use non-static variable for writting zeroes to the file.

And more seriously, foffset in core dump file dump function was incremented
too much, so some parts of core dump were shifted by size of few phdrs and
notes down, so although gdb was able to load that file, it did not make lot
of sense - in my test case data pages were shifted down by about 900 bytes.

Signed-off-by: Petr Vandrovec <petr@vandrovec.name>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_elf.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 561006127902..79b05a1a4365 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1220,7 +1220,7 @@ static int notesize(struct memelfnote *en)
 
 static int alignfile(struct file *file, loff_t *foffset)
 {
-	char buf[4] = { 0, };
+	static const char buf[4] = { 0, };
 	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
 	return 1;
 }
@@ -1569,7 +1569,8 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
 
 	DUMP_WRITE(elf, sizeof(*elf));
 	offset += sizeof(*elf);				/* Elf header */
-	offset += (segs+1) * sizeof(struct elf_phdr);	/* Program headers */
+	offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
+	foffset = offset;
 
 	/* Write notes phdr entry */
 	{
@@ -1586,8 +1587,6 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
 		DUMP_WRITE(&phdr, sizeof(phdr));
 	}
 
-	foffset = offset;
-
 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
 
 	/* Write program headers for segments dump */
@@ -1612,7 +1611,6 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
 		phdr.p_align = ELF_EXEC_PAGESIZE;
 
 		DUMP_WRITE(&phdr, sizeof(phdr));
-		foffset += sizeof(phdr);
 	}
 
 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
-- 
cgit v1.2.3


From 5eb30790d4ccd3409240a80eaf9ab76b4fb75fd8 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Tue, 17 Oct 2006 00:09:35 -0700
Subject: [PATCH] null dereference in fs/jbd2/journal.c

This is Eric Sesterhenn's jbd patch applied to jbd2.
Commit: 41716c7c21b15e7ecf14f0caf1eef3980707fb74

His words:

Since commit d1807793e1e7e502e3dc047115e9dbc3b50e4534 we dereference a NULL
pointer.  Coverity id #1432.  We set journal to NULL, and use it directly
afterwards.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Cc: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd2/journal.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 10db92ced014..c60f378b0f76 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -725,6 +725,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
 			__FUNCTION__);
 		kfree(journal);
 		journal = NULL;
+		goto out;
 	}
 	journal->j_dev = bdev;
 	journal->j_fs_dev = fs_dev;
@@ -735,7 +736,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
 	J_ASSERT(bh != NULL);
 	journal->j_sb_buffer = bh;
 	journal->j_superblock = (journal_superblock_t *)bh->b_data;
-
+out:
 	return journal;
 }
 
-- 
cgit v1.2.3


From 0187f879ee8d4b914e74ffa3cc5df268311fc2d2 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 17 Oct 2006 00:09:41 -0700
Subject: [PATCH] PROC_NUMBUF is wrong

Actually, the decimal representation of a 32-bit signed number can take 12
bytes, including the \0.

And then some code adds a \n as well, so let's give it 13 bytes.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/base.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 82da55b5cffe..26a8f8416b79 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -86,7 +86,7 @@
 
 
 /* Worst case buffer size needed for holding an integer. */
-#define PROC_NUMBUF 10
+#define PROC_NUMBUF 13
 
 struct pid_entry {
 	int len;
-- 
cgit v1.2.3


From 9ffbb9162312fd8113037cb3d94f787f06bbfa9a Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Tue, 17 Oct 2006 00:10:06 -0700
Subject: [PATCH] fuse: fix hang on SMP

Fuse didn't always call i_size_write() with i_mutex held which caused rare
hangs on SMP/32bit.  This bug has been present since fuse-2.2, well before
being merged into mainline.

The simplest solution is to protect i_size_write() with the per-connection
spinlock.  Using i_mutex for this purpose would require some restructuring of
the code and I'm not even sure it's always safe to acquire i_mutex in all
places i_size needs to be set.

Since most of vmtruncate is already duplicated for other reasons, duplicate
the remaining part as well, making all i_size_write() calls internal to fuse.

Using i_size_write() was unnecessary in fuse_init_inode(), since this function
is only called on a newly created locked inode.

Reported by a few people over the years, but special thanks to Dana Henriksen
who was persistent enough in helping me debug it.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/dir.c   | 30 +++++++++++++++++++++---------
 fs/fuse/file.c  | 12 +++++++++---
 fs/fuse/inode.c |  5 ++++-
 3 files changed, 34 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 8605155db171..a8f65c11aa2c 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -935,14 +935,30 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
 	}
 }
 
+static void fuse_vmtruncate(struct inode *inode, loff_t offset)
+{
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	int need_trunc;
+
+	spin_lock(&fc->lock);
+	need_trunc = inode->i_size > offset;
+	i_size_write(inode, offset);
+	spin_unlock(&fc->lock);
+
+	if (need_trunc) {
+		struct address_space *mapping = inode->i_mapping;
+		unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
+		truncate_inode_pages(mapping, offset);
+	}
+}
+
 /*
  * Set attributes, and at the same time refresh them.
  *
  * Truncation is slightly complicated, because the 'truncate' request
  * may fail, in which case we don't want to touch the mapping.
- * vmtruncate() doesn't allow for this case.  So do the rlimit
- * checking by hand and call vmtruncate() only after the file has
- * actually been truncated.
+ * vmtruncate() doesn't allow for this case, so do the rlimit checking
+ * and the actual truncation by hand.
  */
 static int fuse_setattr(struct dentry *entry, struct iattr *attr)
 {
@@ -993,12 +1009,8 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
 			make_bad_inode(inode);
 			err = -EIO;
 		} else {
-			if (is_truncate) {
-				loff_t origsize = i_size_read(inode);
-				i_size_write(inode, outarg.attr.size);
-				if (origsize > outarg.attr.size)
-					vmtruncate(inode, outarg.attr.size);
-			}
+			if (is_truncate)
+				fuse_vmtruncate(inode, outarg.attr.size);
 			fuse_change_attributes(inode, &outarg.attr);
 			fi->i_time = time_to_jiffies(outarg.attr_valid,
 						     outarg.attr_valid_nsec);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 183626868eea..2bb5ace3882d 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -481,8 +481,10 @@ static int fuse_commit_write(struct file *file, struct page *page,
 		err = -EIO;
 	if (!err) {
 		pos += count;
-		if (pos > i_size_read(inode))
+		spin_lock(&fc->lock);
+		if (pos > inode->i_size)
 			i_size_write(inode, pos);
+		spin_unlock(&fc->lock);
 
 		if (offset == 0 && to == PAGE_CACHE_SIZE) {
 			clear_page_dirty(page);
@@ -586,8 +588,12 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
 	}
 	fuse_put_request(fc, req);
 	if (res > 0) {
-		if (write && pos > i_size_read(inode))
-			i_size_write(inode, pos);
+		if (write) {
+			spin_lock(&fc->lock);
+			if (pos > inode->i_size)
+				i_size_write(inode, pos);
+			spin_unlock(&fc->lock);
+		}
 		*ppos = pos;
 	}
 	fuse_invalidate_attr(inode);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 7d0a9aee01f2..8e106163aaed 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -109,6 +109,7 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
 
 void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
 {
+	struct fuse_conn *fc = get_fuse_conn(inode);
 	if (S_ISREG(inode->i_mode) && i_size_read(inode) != attr->size)
 		invalidate_inode_pages(inode->i_mapping);
 
@@ -117,7 +118,9 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
 	inode->i_nlink   = attr->nlink;
 	inode->i_uid     = attr->uid;
 	inode->i_gid     = attr->gid;
+	spin_lock(&fc->lock);
 	i_size_write(inode, attr->size);
+	spin_unlock(&fc->lock);
 	inode->i_blocks  = attr->blocks;
 	inode->i_atime.tv_sec   = attr->atime;
 	inode->i_atime.tv_nsec  = attr->atimensec;
@@ -130,7 +133,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
 static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
 {
 	inode->i_mode = attr->mode & S_IFMT;
-	i_size_write(inode, attr->size);
+	inode->i_size = attr->size;
 	if (S_ISREG(inode->i_mode)) {
 		fuse_init_common(inode);
 		fuse_init_file_inode(inode);
-- 
cgit v1.2.3


From 8da5ff23ce0a84d9845b01e6fe5047e17836bf5a Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Tue, 17 Oct 2006 00:10:08 -0700
Subject: [PATCH] fuse: locking fix for nlookup

An inode could be returned by independent parallel lookups, in this case an
update of the lookup counter could be lost resulting in a memory leak in
userspace.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/dir.c   | 2 ++
 fs/fuse/inode.c | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index a8f65c11aa2c..7ecfe95795cd 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -163,7 +163,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
 				fuse_send_forget(fc, req, outarg.nodeid, 1);
 				return 0;
 			}
+			spin_lock(&fc->lock);
 			fi->nlookup ++;
+			spin_unlock(&fc->lock);
 		}
 		fuse_put_request(fc, req);
 		if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 8e106163aaed..e9114237f31f 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -195,7 +195,9 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
 	}
 
 	fi = get_fuse_inode(inode);
+	spin_lock(&fc->lock);
 	fi->nlookup ++;
+	spin_unlock(&fc->lock);
 	fuse_change_attributes(inode, attr);
 	return inode;
 }
-- 
cgit v1.2.3


From 265126ba9e1f8e217e61d1017c6609f76828aa7a Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Tue, 17 Oct 2006 00:10:09 -0700
Subject: [PATCH] fuse: fix spurious BUG

Fix a spurious BUG in an unlikely race, where at least three parallel lookups
return the same inode, but with different file type.  This has not yet been
observed in real life.

Allowing unlimited retries could delay fuse_iget() indefinitely, but this is
really for the broken userspace filesystem to worry about.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/inode.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index e9114237f31f..4ee8f72e6380 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -172,7 +172,6 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
 	struct inode *inode;
 	struct fuse_inode *fi;
 	struct fuse_conn *fc = get_fuse_conn_super(sb);
-	int retried = 0;
 
  retry:
 	inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
@@ -186,11 +185,9 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
 		fuse_init_inode(inode, attr);
 		unlock_new_inode(inode);
 	} else if ((inode->i_mode ^ attr->mode) & S_IFMT) {
-		BUG_ON(retried);
 		/* Inode has changed type, any I/O on the old should fail */
 		make_bad_inode(inode);
 		iput(inode);
-		retried = 1;
 		goto retry;
 	}
 
-- 
cgit v1.2.3


From d2a85164aaa8d514ef5efbf5d05746e85dd13ddd Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Tue, 17 Oct 2006 00:10:11 -0700
Subject: [PATCH] fuse: fix handling of moved directory

Fuse considered it an error (EIO) if lookup returned a directory inode, to
which a dentry already refered.  This is because directory aliases are not
allowed.

But in a network filesystem this could happen legitimately, if a directory is
moved on a remote client.  This patch attempts to relax the restriction by
trying to first evict the offending alias from the cache.  If this fails, it
still returns an error (EBUSY).

A rarer situation is if an mkdir races with an indenpendent lookup, which
finds the newly created directory already moved.  In this situation the mkdir
should return success, but that would be incorrect, since the dentry cannot be
instantiated, so return EBUSY.

Previously checking for a directory alias and instantiation of the dentry
weren't done atomically in lookup/mkdir, hence two such calls racing with each
other could create aliased directories.  To prevent this introduce a new
per-connection mutex: fuse_conn->inst_mutex, which is taken for instantiations
with a directory inode.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/dir.c    | 70 +++++++++++++++++++++++++++++++++++---------------------
 fs/fuse/fuse_i.h |  3 +++
 fs/fuse/inode.c  |  5 +++-
 3 files changed, 51 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 7ecfe95795cd..9d0ef5e18740 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -177,22 +177,6 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
 	return 1;
 }
 
-/*
- * Check if there's already a hashed alias of this directory inode.
- * If yes, then lookup and mkdir must not create a new alias.
- */
-static int dir_alias(struct inode *inode)
-{
-	if (S_ISDIR(inode->i_mode)) {
-		struct dentry *alias = d_find_alias(inode);
-		if (alias) {
-			dput(alias);
-			return 1;
-		}
-	}
-	return 0;
-}
-
 static int invalid_nodeid(u64 nodeid)
 {
 	return !nodeid || nodeid == FUSE_ROOT_ID;
@@ -208,6 +192,24 @@ static int valid_mode(int m)
 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
 }
 
+/*
+ * Add a directory inode to a dentry, ensuring that no other dentry
+ * refers to this inode.  Called with fc->inst_mutex.
+ */
+static int fuse_d_add_directory(struct dentry *entry, struct inode *inode)
+{
+	struct dentry *alias = d_find_alias(inode);
+	if (alias) {
+		/* This tries to shrink the subtree below alias */
+		fuse_invalidate_entry(alias);
+		dput(alias);
+		if (!list_empty(&inode->i_dentry))
+			return -EBUSY;
+	}
+	d_add(entry, inode);
+	return 0;
+}
+
 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 				  struct nameidata *nd)
 {
@@ -243,11 +245,17 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 	if (err && err != -ENOENT)
 		return ERR_PTR(err);
 
-	if (inode && dir_alias(inode)) {
-		iput(inode);
-		return ERR_PTR(-EIO);
-	}
-	d_add(entry, inode);
+	if (inode && S_ISDIR(inode->i_mode)) {
+		mutex_lock(&fc->inst_mutex);
+		err = fuse_d_add_directory(entry, inode);
+		mutex_unlock(&fc->inst_mutex);
+		if (err) {
+			iput(inode);
+			return ERR_PTR(err);
+		}
+	} else
+		d_add(entry, inode);
+
 	entry->d_op = &fuse_dentry_operations;
 	if (!err)
 		fuse_change_timeout(entry, &outarg);
@@ -403,12 +411,22 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
 	}
 	fuse_put_request(fc, req);
 
-	if (dir_alias(inode)) {
-		iput(inode);
-		return -EIO;
-	}
+	if (S_ISDIR(inode->i_mode)) {
+		struct dentry *alias;
+		mutex_lock(&fc->inst_mutex);
+		alias = d_find_alias(inode);
+		if (alias) {
+			/* New directory must have moved since mkdir */
+			mutex_unlock(&fc->inst_mutex);
+			dput(alias);
+			iput(inode);
+			return -EBUSY;
+		}
+		d_instantiate(entry, inode);
+		mutex_unlock(&fc->inst_mutex);
+	} else
+		d_instantiate(entry, inode);
 
-	d_instantiate(entry, inode);
 	fuse_change_timeout(entry, &outarg);
 	fuse_invalidate_attr(dir);
 	return 0;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 69c7750d55b8..91edb8932d90 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -239,6 +239,9 @@ struct fuse_conn {
 	/** Lock protecting accessess to  members of this structure */
 	spinlock_t lock;
 
+	/** Mutex protecting against directory alias creation */
+	struct mutex inst_mutex;
+
 	/** Refcount */
 	atomic_t count;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 4ee8f72e6380..fc4203570370 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -379,6 +379,7 @@ static struct fuse_conn *new_conn(void)
 	fc = kzalloc(sizeof(*fc), GFP_KERNEL);
 	if (fc) {
 		spin_lock_init(&fc->lock);
+		mutex_init(&fc->inst_mutex);
 		atomic_set(&fc->count, 1);
 		init_waitqueue_head(&fc->waitq);
 		init_waitqueue_head(&fc->blocked_waitq);
@@ -398,8 +399,10 @@ static struct fuse_conn *new_conn(void)
 
 void fuse_conn_put(struct fuse_conn *fc)
 {
-	if (atomic_dec_and_test(&fc->count))
+	if (atomic_dec_and_test(&fc->count)) {
+		mutex_destroy(&fc->inst_mutex);
 		kfree(fc);
+	}
 }
 
 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
-- 
cgit v1.2.3


From e956edd0523b6b48ed367c63b0c82d8f4c447a58 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Tue, 17 Oct 2006 00:10:12 -0700
Subject: [PATCH] fuse: fix dereferencing dentry parent

There's no locking for ->d_revalidate, so fuse_dentry_revalidate() should use
dget_parent() instead of simply dereferencing ->d_parent.

Due to topology changes in the directory tree the parent could become negative
or be destroyed while being used.  There hasn't been any reports about this
yet.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/dir.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 9d0ef5e18740..cfc8f81e60d0 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -138,6 +138,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
 		struct fuse_entry_out outarg;
 		struct fuse_conn *fc;
 		struct fuse_req *req;
+		struct dentry *parent;
 
 		/* Doesn't hurt to "reset" the validity timeout */
 		fuse_invalidate_entry_cache(entry);
@@ -151,8 +152,10 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
 		if (IS_ERR(req))
 			return 0;
 
-		fuse_lookup_init(req, entry->d_parent->d_inode, entry, &outarg);
+		parent = dget_parent(entry);
+		fuse_lookup_init(req, parent->d_inode, entry, &outarg);
 		request_send(fc, req);
+		dput(parent);
 		err = req->out.h.error;
 		/* Zero nodeid is same as -ENOENT */
 		if (!err && !outarg.nodeid)
-- 
cgit v1.2.3


From dc730e173785e29b297aa605786c94adaffe2544 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Tue, 17 Oct 2006 00:10:13 -0700
Subject: [PATCH] knfsd: nfsd4: fix owner-override on open

If a client creates a file using an open which sets the mode to 000, or if a
chmod changes permissions after a file is opened, then situations may arise
where an NFS client knows that some IO is permitted (because a process holds
the file open), but the NFS server does not (because it doesn't know about the
open, and only sees that the IO conflicts with the current mode of the file).

As a hack to solve this problem, NFS servers normally allow the owner to
override permissions on IO.  The client can still enforce correct
permissions-checking on open by performing an explicit access check.

In NFSv4 the client can rely on the explicit on-the-wire open instead of an
access check.

Therefore we should not be allowing the owner to override permissions on an
over-the-wire open!

However, we should still allow the owner to override permissions in the case
where the client is claiming an open that it already made either before a
reboot, or while it was holding a delegation.

Thanks to Jim Rees for reporting the bug.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 8333db12caca..a05d3376cc46 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -68,20 +68,18 @@ fh_dup2(struct svc_fh *dst, struct svc_fh *src)
 }
 
 static int
-do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
+do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open, int accmode)
 {
-	int accmode, status;
+	int status;
 
 	if (open->op_truncate &&
 		!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
 		return nfserr_inval;
 
-	accmode = MAY_NOP;
 	if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
-		accmode = MAY_READ;
+		accmode |= MAY_READ;
 	if (open->op_share_deny & NFS4_SHARE_ACCESS_WRITE)
 		accmode |= (MAY_WRITE | MAY_TRUNC);
-	accmode |= MAY_OWNER_OVERRIDE;
 
 	status = fh_verify(rqstp, current_fh, S_IFREG, accmode);
 
@@ -124,7 +122,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
 				&resfh.fh_handle.fh_base,
 				resfh.fh_handle.fh_size);
 
-		status = do_open_permission(rqstp, current_fh, open);
+		status = do_open_permission(rqstp, current_fh, open, MAY_NOP);
 	}
 
 	fh_put(&resfh);
@@ -155,7 +153,7 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_
 	open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) &&
 		(open->op_iattr.ia_size == 0);
 
-	status = do_open_permission(rqstp, current_fh, open);
+	status = do_open_permission(rqstp, current_fh, open, MAY_OWNER_OVERRIDE);
 
 	return status;
 }
-- 
cgit v1.2.3


From 9801d8a39cfe6c34f39f9552a246a6bd002e735e Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Tue, 17 Oct 2006 00:10:14 -0700
Subject: [PATCH] knfsd: nfsd4: fix open permission checking

We weren't actually checking for SHARE_ACCESS_WRITE, with the result that the
owner could open a non-writeable file for write!

Continue to allow DENY_WRITE only with write access.

Thanks to Jim Rees for reporting the bug.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a05d3376cc46..d1fac6872c44 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -78,8 +78,10 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
 
 	if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
 		accmode |= MAY_READ;
-	if (open->op_share_deny & NFS4_SHARE_ACCESS_WRITE)
+	if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
 		accmode |= (MAY_WRITE | MAY_TRUNC);
+	if (open->op_share_deny & NFS4_SHARE_DENY_WRITE)
+		accmode |= MAY_WRITE;
 
 	status = fh_verify(rqstp, current_fh, S_IFREG, accmode);
 
-- 
cgit v1.2.3


From 0942176f4353ffebcd6e3f95abce9fd8e24f2cb1 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Tue, 17 Oct 2006 00:10:16 -0700
Subject: [PATCH] knfsd: nfsd4: Fix error handling in nfsd's callback client

Coverity noticed that the error handling code in the NFSv4 callback client
sets cb->cb_client to NULL, then calls rpc_shutdown_client with the NULL
pointer.

Coverity: #cid 1397

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4callback.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index f6ca9fb3fc63..324a278f2808 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -421,7 +421,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 
 	/* Create RPC client */
 	cb->cb_client = rpc_create(&args);
-	if (!cb->cb_client) {
+	if (IS_ERR(cb->cb_client)) {
 		dprintk("NFSD: couldn't create callback client\n");
 		goto out_err;
 	}
@@ -448,10 +448,10 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 out_rpciod:
 	atomic_dec(&clp->cl_count);
 	rpciod_down();
-	cb->cb_client = NULL;
 out_clnt:
 	rpc_shutdown_client(cb->cb_client);
 out_err:
+	cb->cb_client = NULL;
 	dprintk("NFSD: warning: no callback path to client %.*s\n",
 		(int)clp->cl_name.len, clp->cl_name.data);
 }
-- 
cgit v1.2.3


From 4481d1038f4116f3f5c307d919e6dc815a3acbb9 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 17 Oct 2006 00:10:17 -0700
Subject: [PATCH] knfsd: Fix bug in recent lockd patches that can cause reclaim
 to fail

When an nfs server shuts down, lockd needs to release all the locks even
though the client still holds them.

It should therefore not 'unmonitor' the clients, so that the files in nfs/sm
will still be there when the nfs server restarts, so that those clients will
be told to reclaim their locks.

However the hosts are fully unmonitored, so statd may well remove the files.

lockd has a test for 'sm_sticky' and avoid the unmonitor call if it is set,
but it is currently not set.

So set it when tearing down lockd.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/lockd/svcsubs.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 514f5f20701e..c5f9113cdc70 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -324,7 +324,16 @@ nlmsvc_same_host(struct nlm_host *host, struct nlm_host *other)
 static int
 nlmsvc_is_client(struct nlm_host *host, struct nlm_host *dummy)
 {
-	return host->h_server;
+	if (host->h_server) {
+		/* we are destroying locks even though the client
+		 * hasn't asked us too, so don't unmonitor the
+		 * client
+		 */
+		if (host->h_nsmhandle)
+			host->h_nsmhandle->sm_sticky = 1;
+		return 1;
+	} else
+		return 0;
 }
 
 /*
-- 
cgit v1.2.3


From d343fce148a4eee24a907a05c4101d3268045aae Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 17 Oct 2006 00:10:18 -0700
Subject: [PATCH] knfsd: Allow lockd to drop replies as appropriate

It is possible for the ->fopen callback from lockd into nfsd to find that an
answer cannot be given straight away (an upcall is needed) and so the request
has to be 'dropped', to be retried later.  That error status is not currently
propagated back.

So:
  Change nlm_fopen to return nlm error codes (rather than a private
  protocol) and define a new nlm_drop_reply code.
  Cause nlm_drop_reply to cause the rpc request to get rpc_drop_reply
  when this error comes back.
  Cause svc_process to drop a request which returns a status of
  rpc_drop_reply.

[akpm@osdl.org: fix warning storm]
Cc: Marc Eshel <eshel@almaden.ibm.com>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/lockd/svc4proc.c | 12 ++++++------
 fs/lockd/svcproc.c  | 16 +++++++++-------
 fs/lockd/svcsubs.c  |  6 ------
 fs/nfsd/lockd.c     | 14 ++++++++------
 4 files changed, 23 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index fa370f6eb07b..399ad11b97be 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -96,7 +96,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
-		return rpc_success;
+		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 	/* Now check for conflicting locks */
 	resp->status = nlmsvc_testlock(file, &argp->lock, &resp->lock);
@@ -126,7 +126,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
-		return rpc_success;
+		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 #if 0
 	/* If supplied state doesn't match current state, we assume it's
@@ -169,7 +169,7 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
-		return rpc_success;
+		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 	/* Try to cancel request. */
 	resp->status = nlmsvc_cancel_blocked(file, &argp->lock);
@@ -202,7 +202,7 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
-		return rpc_success;
+		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 	/* Now try to remove the lock */
 	resp->status = nlmsvc_unlock(file, &argp->lock);
@@ -339,7 +339,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
-		return rpc_success;
+		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 	/* Now try to create the share */
 	resp->status = nlmsvc_share_file(host, file, argp);
@@ -372,7 +372,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
-		return rpc_success;
+		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 	/* Now try to lock the file */
 	resp->status = nlmsvc_unshare_file(host, file, argp);
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 75b2c81bcb93..6a931f4ab75c 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -59,7 +59,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
 	struct nlm_host		*host = NULL;
 	struct nlm_file		*file = NULL;
 	struct nlm_lock		*lock = &argp->lock;
-	u32			error;
+	u32			error = 0;
 
 	/* nfsd callbacks must have been installed for this procedure */
 	if (!nlmsvc_ops)
@@ -88,6 +88,8 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
 no_locks:
 	if (host)
 		nlm_release_host(host);
+	if (error)
+		return error;
 	return nlm_lck_denied_nolocks;
 }
 
@@ -122,7 +124,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
-		return rpc_success;
+		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 	/* Now check for conflicting locks */
 	resp->status = cast_status(nlmsvc_testlock(file, &argp->lock, &resp->lock));
@@ -153,7 +155,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
-		return rpc_success;
+		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 #if 0
 	/* If supplied state doesn't match current state, we assume it's
@@ -196,7 +198,7 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
-		return rpc_success;
+		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 	/* Try to cancel request. */
 	resp->status = cast_status(nlmsvc_cancel_blocked(file, &argp->lock));
@@ -229,7 +231,7 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
-		return rpc_success;
+		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 	/* Now try to remove the lock */
 	resp->status = cast_status(nlmsvc_unlock(file, &argp->lock));
@@ -368,7 +370,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
-		return rpc_success;
+		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 	/* Now try to create the share */
 	resp->status = cast_status(nlmsvc_share_file(host, file, argp));
@@ -401,7 +403,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
-		return rpc_success;
+		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 	/* Now try to unshare the file */
 	resp->status = cast_status(nlmsvc_unshare_file(host, file, argp));
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index c5f9113cdc70..7dac96e6c82c 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -135,12 +135,6 @@ out_unlock:
 
 out_free:
 	kfree(file);
-#ifdef CONFIG_LOCKD_V4
-	if (nfserr == 1)
-		nfserr = nlm4_stale_fh;
-	else
-#endif
-	nfserr = nlm_lck_denied;
 	goto out_unlock;
 }
 
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 7b889ff15ae6..9b9e7e127c03 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -39,18 +39,20 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
 	fh_put(&fh);
 	rqstp->rq_client = NULL;
 	exp_readunlock();
- 	/* nlm and nfsd don't share error codes.
-	 * we invent: 0 = no error
-	 *            1 = stale file handle
-	 *	      2 = other error
+ 	/* We return nlm error codes as nlm doesn't know
+	 * about nfsd, but nfsd does know about nlm..
 	 */
 	switch (nfserr) {
 	case nfs_ok:
 		return 0;
+	case nfserr_dropit:
+		return nlm_drop_reply;
+#ifdef CONFIG_LOCKD_V4
 	case nfserr_stale:
-		return 1;
+		return nlm4_stale_fh;
+#endif
 	default:
-		return 2;
+		return nlm_lck_denied;
 	}
 }
 
-- 
cgit v1.2.3


From 58ff407bee5a55f9c1188a3f9d70ffc79485183c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 17 Oct 2006 00:10:19 -0700
Subject: [PATCH] Fix IO error reporting on fsync()

When IO error happens on metadata buffer, buffer is freed from memory and
later fsync() is called, filesystems like ext2 fail to report EIO.  We

solve the problem by introducing a pointer to associated address space into
the buffer_head.  When a buffer is removed from a list of metadata buffers
associated with an address space, IO error is transferred from the buffer to
the address space, so that fsync can later report it.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index f65ef8821c73..35527dca1dbc 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -452,6 +452,7 @@ static void end_buffer_async_write(struct buffer_head *bh, int uptodate)
 			       bdevname(bh->b_bdev, b));
 		}
 		set_bit(AS_EIO, &page->mapping->flags);
+		set_buffer_write_io_error(bh);
 		clear_buffer_uptodate(bh);
 		SetPageError(page);
 	}
@@ -571,6 +572,10 @@ EXPORT_SYMBOL(mark_buffer_async_write);
 static inline void __remove_assoc_queue(struct buffer_head *bh)
 {
 	list_del_init(&bh->b_assoc_buffers);
+	WARN_ON(!bh->b_assoc_map);
+	if (buffer_write_io_error(bh))
+		set_bit(AS_EIO, &bh->b_assoc_map->flags);
+	bh->b_assoc_map = NULL;
 }
 
 int inode_has_buffers(struct inode *inode)
@@ -669,6 +674,7 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
 		spin_lock(&buffer_mapping->private_lock);
 		list_move_tail(&bh->b_assoc_buffers,
 				&mapping->private_list);
+		bh->b_assoc_map = mapping;
 		spin_unlock(&buffer_mapping->private_lock);
 	}
 }
@@ -765,7 +771,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
 	spin_lock(lock);
 	while (!list_empty(list)) {
 		bh = BH_ENTRY(list->next);
-		list_del_init(&bh->b_assoc_buffers);
+		__remove_assoc_queue(bh);
 		if (buffer_dirty(bh) || buffer_locked(bh)) {
 			list_add(&bh->b_assoc_buffers, &tmp);
 			if (buffer_dirty(bh)) {
@@ -786,7 +792,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
 
 	while (!list_empty(&tmp)) {
 		bh = BH_ENTRY(tmp.prev);
-		__remove_assoc_queue(bh);
+		list_del_init(&bh->b_assoc_buffers);
 		get_bh(bh);
 		spin_unlock(lock);
 		wait_on_buffer(bh);
@@ -1167,6 +1173,7 @@ void __bforget(struct buffer_head *bh)
 
 		spin_lock(&buffer_mapping->private_lock);
 		list_del_init(&bh->b_assoc_buffers);
+		bh->b_assoc_map = NULL;
 		spin_unlock(&buffer_mapping->private_lock);
 	}
 	__brelse(bh);
-- 
cgit v1.2.3


From eee44cca665aa1a5663e6a00c2bdfc275739dac5 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jeff@garzik.org>
Date: Tue, 17 Oct 2006 00:10:23 -0700
Subject: [PATCH] fs/partitions/check: add sysfs error handling

Handle errors thrown in disk_sysfs_symlinks(), and propagate back to
caller.

The callers and associated functions don't do a real good job of handling
kobject errors anyway (add_partition, register_disk, rescan_partitions), so
this should do until something better comes along.

Signed-off-by: Jeff Garzik <jeff@garzik.org>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/partitions/check.c | 50 ++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 42 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 51c6a748df49..6fb4b6150d77 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -376,18 +376,48 @@ static char *make_block_name(struct gendisk *disk)
 	return name;
 }
 
-static void disk_sysfs_symlinks(struct gendisk *disk)
+static int disk_sysfs_symlinks(struct gendisk *disk)
 {
 	struct device *target = get_device(disk->driverfs_dev);
+	int err;
+	char *disk_name = NULL;
+
 	if (target) {
-		char *disk_name = make_block_name(disk);
-		sysfs_create_link(&disk->kobj,&target->kobj,"device");
-		if (disk_name) {
-			sysfs_create_link(&target->kobj,&disk->kobj,disk_name);
-			kfree(disk_name);
+		disk_name = make_block_name(disk);
+		if (!disk_name) {
+			err = -ENOMEM;
+			goto err_out;
 		}
+
+		err = sysfs_create_link(&disk->kobj, &target->kobj, "device");
+		if (err)
+			goto err_out_disk_name;
+
+		err = sysfs_create_link(&target->kobj, &disk->kobj, disk_name);
+		if (err)
+			goto err_out_dev_link;
 	}
-	sysfs_create_link(&disk->kobj, &block_subsys.kset.kobj, "subsystem");
+
+	err = sysfs_create_link(&disk->kobj, &block_subsys.kset.kobj,
+				"subsystem");
+	if (err)
+		goto err_out_disk_name_lnk;
+
+	kfree(disk_name);
+
+	return 0;
+
+err_out_disk_name_lnk:
+	if (target) {
+		sysfs_remove_link(&target->kobj, disk_name);
+err_out_dev_link:
+		sysfs_remove_link(&disk->kobj, "device");
+err_out_disk_name:
+		kfree(disk_name);
+err_out:
+		put_device(target);
+	}
+	return err;
 }
 
 /* Not exported, helper to add_disk(). */
@@ -406,7 +436,11 @@ void register_disk(struct gendisk *disk)
 		*s = '!';
 	if ((err = kobject_add(&disk->kobj)))
 		return;
-	disk_sysfs_symlinks(disk);
+	err = disk_sysfs_symlinks(disk);
+	if (err) {
+		kobject_del(&disk->kobj);
+		return;
+	}
  	disk_sysfs_add_subdirs(disk);
 
 	/* No minors to use for partitions */
-- 
cgit v1.2.3


From e42344514c6e8ca7f5427da9b1407b56550dfa01 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Wed, 20 Sep 2006 16:38:00 +0900
Subject: sysfs: remove duplicated dput in sysfs_update_file

Following function can drops d_count twice against one reference
by lookup_one_len.

<SOURCE>
/**
 * sysfs_update_file - update the modified timestamp on an object attribute.
 * @kobj: object we're acting for.
 * @attr: attribute descriptor.
 */
int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
{
        struct dentry * dir = kobj->dentry;
        struct dentry * victim;
        int res = -ENOENT;

        mutex_lock(&dir->d_inode->i_mutex);
        victim = lookup_one_len(attr->name, dir, strlen(attr->name));
        if (!IS_ERR(victim)) {
                /* make sure dentry is really there */
                if (victim->d_inode &&
                    (victim->d_parent->d_inode == dir->d_inode)) {
                        victim->d_inode->i_mtime = CURRENT_TIME;
                        fsnotify_modify(victim);

                        /**
                         * Drop reference from initial sysfs_get_dentry().
                         */
                        dput(victim);
                        res = 0;
                } else
                        d_drop(victim);

                /**
                 * Drop the reference acquired from sysfs_get_dentry() above.
                 */
                dput(victim);
        }
        mutex_unlock(&dir->d_inode->i_mutex);

        return res;
}
</SOURCE>

PCI-hotplug (drivers/pci/hotplug/pci_hotplug_core.c) is only user of
this function. I confirmed that dentry of /sys/bus/pci/slots/XXX/*
have negative d_count value.

This patch removes unnecessary dput().

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Acked-by: Maneesh Soni <maneesh@in.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/file.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 146f1dedec84..93218ccb2f6b 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -483,11 +483,6 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
 		    (victim->d_parent->d_inode == dir->d_inode)) {
 			victim->d_inode->i_mtime = CURRENT_TIME;
 			fsnotify_modify(victim);
-
-			/**
-			 * Drop reference from initial sysfs_get_dentry().
-			 */
-			dput(victim);
 			res = 0;
 		} else
 			d_drop(victim);
-- 
cgit v1.2.3


From 97a501849d60f3dbb8bfcd2300cf65dd5ebc0355 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Wed, 20 Sep 2006 16:49:02 +0900
Subject: sysfs: update obsolete comment in sysfs_update_file

And the obsolete comment should be updated (or totally removed).

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 93218ccb2f6b..298303b5a716 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -488,7 +488,7 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
 			d_drop(victim);
 		
 		/**
-		 * Drop the reference acquired from sysfs_get_dentry() above.
+		 * Drop the reference acquired from lookup_one_len() above.
 		 */
 		dput(victim);
 	}
-- 
cgit v1.2.3


From 62752ee198dca9209b7dee504763e51b11e9e0ca Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Tue, 17 Oct 2006 10:31:38 +0200
Subject: [PATCH] Take i_mutex in splice_from_pipe()

The splice_actor may be calling ->prepare_write() and ->commit_write(). We
want i_mutex on the inode being written to before calling those so that we
don't race i_size changes.

The double locking behavior is done elsewhere in splice.c, and if we
eventually want _nolock variants of generic_file_splice_write(), fs modules
might have to replicate the nasty locking code. We introduce
inode_double_lock() and inode_double_unlock() to consolidate the locking
rules into one set of functions.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/inode.c  | 36 ++++++++++++++++++++++++++++++++++++
 fs/splice.c | 24 +++++++++++-------------
 2 files changed, 47 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index d9a21d122926..26cdb115ce67 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1306,6 +1306,42 @@ void wake_up_inode(struct inode *inode)
 	wake_up_bit(&inode->i_state, __I_LOCK);
 }
 
+/*
+ * We rarely want to lock two inodes that do not have a parent/child
+ * relationship (such as directory, child inode) simultaneously. The
+ * vast majority of file systems should be able to get along fine
+ * without this. Do not use these functions except as a last resort.
+ */
+void inode_double_lock(struct inode *inode1, struct inode *inode2)
+{
+	if (inode1 == NULL || inode2 == NULL || inode1 == inode2) {
+		if (inode1)
+			mutex_lock(&inode1->i_mutex);
+		else if (inode2)
+			mutex_lock(&inode2->i_mutex);
+		return;
+	}
+
+	if (inode1 < inode2) {
+		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
+		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+	} else {
+		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
+		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
+	}
+}
+EXPORT_SYMBOL(inode_double_lock);
+
+void inode_double_unlock(struct inode *inode1, struct inode *inode2)
+{
+	if (inode1)
+		mutex_unlock(&inode1->i_mutex);
+
+	if (inode2 && inode2 != inode1)
+		mutex_unlock(&inode2->i_mutex);
+}
+EXPORT_SYMBOL(inode_double_unlock);
+
 static __initdata unsigned long ihash_entries;
 static int __init set_ihash_entries(char *str)
 {
diff --git a/fs/splice.c b/fs/splice.c
index a567010b62ac..c1072b6940c3 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -713,6 +713,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 {
 	int ret, do_wakeup, err;
 	struct splice_desc sd;
+	struct inode *inode = out->f_mapping->host;
 
 	ret = 0;
 	do_wakeup = 0;
@@ -722,8 +723,13 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 	sd.file = out;
 	sd.pos = *ppos;
 
-	if (pipe->inode)
-		mutex_lock(&pipe->inode->i_mutex);
+	/*
+	 * The actor worker might be calling ->prepare_write and
+	 * ->commit_write. Most of the time, these expect i_mutex to
+	 * be held. Since this may result in an ABBA deadlock with
+	 * pipe->inode, we have to order lock acquiry here.
+	 */
+	inode_double_lock(inode, pipe->inode);
 
 	for (;;) {
 		if (pipe->nrbufs) {
@@ -797,8 +803,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 		pipe_wait(pipe);
 	}
 
-	if (pipe->inode)
-		mutex_unlock(&pipe->inode->i_mutex);
+	inode_double_unlock(inode, pipe->inode);
 
 	if (do_wakeup) {
 		smp_mb();
@@ -1400,13 +1405,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 	 * grabbing by inode address. Otherwise two different processes
 	 * could deadlock (one doing tee from A -> B, the other from B -> A).
 	 */
-	if (ipipe->inode < opipe->inode) {
-		mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT);
-		mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD);
-	} else {
-		mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT);
-		mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD);
-	}
+	inode_double_lock(ipipe->inode, opipe->inode);
 
 	do {
 		if (!opipe->readers) {
@@ -1450,8 +1449,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 		i++;
 	} while (len);
 
-	mutex_unlock(&ipipe->inode->i_mutex);
-	mutex_unlock(&opipe->inode->i_mutex);
+	inode_double_unlock(ipipe->inode, opipe->inode);
 
 	/*
 	 * If we put data in the output pipe, wakeup any potential readers.
-- 
cgit v1.2.3


From 6da61809822c22634a3de2dcb3c60283b836a88a Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Tue, 17 Oct 2006 18:43:07 +0200
Subject: [PATCH] Introduce generic_file_splice_write_nolock()

This allows file systems to manage their own i_mutex locking while
still re-using the generic_file_splice_write() logic.

OCFS2 in particular wants this so that it can order cluster locks within
i_mutex.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/splice.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 66 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/splice.c b/fs/splice.c
index c1072b6940c3..68e20e65c6e1 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -707,13 +707,12 @@ out_ret:
  * key here is the 'actor' worker passed in that actually moves the data
  * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
  */
-ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
-			 loff_t *ppos, size_t len, unsigned int flags,
-			 splice_actor *actor)
+static ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
+				  struct file *out, loff_t *ppos, size_t len,
+				  unsigned int flags, splice_actor *actor)
 {
 	int ret, do_wakeup, err;
 	struct splice_desc sd;
-	struct inode *inode = out->f_mapping->host;
 
 	ret = 0;
 	do_wakeup = 0;
@@ -723,14 +722,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 	sd.file = out;
 	sd.pos = *ppos;
 
-	/*
-	 * The actor worker might be calling ->prepare_write and
-	 * ->commit_write. Most of the time, these expect i_mutex to
-	 * be held. Since this may result in an ABBA deadlock with
-	 * pipe->inode, we have to order lock acquiry here.
-	 */
-	inode_double_lock(inode, pipe->inode);
-
 	for (;;) {
 		if (pipe->nrbufs) {
 			struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
@@ -803,8 +794,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 		pipe_wait(pipe);
 	}
 
-	inode_double_unlock(inode, pipe->inode);
-
 	if (do_wakeup) {
 		smp_mb();
 		if (waitqueue_active(&pipe->wait))
@@ -815,6 +804,69 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 	return ret;
 }
 
+ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
+			 loff_t *ppos, size_t len, unsigned int flags,
+			 splice_actor *actor)
+{
+	ssize_t ret;
+	struct inode *inode = out->f_mapping->host;
+
+	/*
+	 * The actor worker might be calling ->prepare_write and
+	 * ->commit_write. Most of the time, these expect i_mutex to
+	 * be held. Since this may result in an ABBA deadlock with
+	 * pipe->inode, we have to order lock acquiry here.
+	 */
+	inode_double_lock(inode, pipe->inode);
+	ret = __splice_from_pipe(pipe, out, ppos, len, flags, actor);
+	inode_double_unlock(inode, pipe->inode);
+
+	return ret;
+}
+
+/**
+ * generic_file_splice_write_nolock - generic_file_splice_write without mutexes
+ * @pipe:	pipe info
+ * @out:	file to write to
+ * @len:	number of bytes to splice
+ * @flags:	splice modifier flags
+ *
+ * Will either move or copy pages (determined by @flags options) from
+ * the given pipe inode to the given file. The caller is responsible
+ * for acquiring i_mutex on both inodes.
+ *
+ */
+ssize_t
+generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
+				 loff_t *ppos, size_t len, unsigned int flags)
+{
+	struct address_space *mapping = out->f_mapping;
+	struct inode *inode = mapping->host;
+	ssize_t ret;
+	int err;
+
+	ret = __splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
+	if (ret > 0) {
+		*ppos += ret;
+
+		/*
+		 * If file or inode is SYNC and we actually wrote some data,
+		 * sync it.
+		 */
+		if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
+			err = generic_osync_inode(inode, mapping,
+						  OSYNC_METADATA|OSYNC_DATA);
+
+			if (err)
+				ret = err;
+		}
+	}
+
+	return ret;
+}
+
+EXPORT_SYMBOL(generic_file_splice_write_nolock);
+
 /**
  * generic_file_splice_write - splice data from a pipe to a file
  * @pipe:	pipe info
-- 
cgit v1.2.3


From 8c34e2d63231d4bf4852bac8521883944d770fe3 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 17 Oct 2006 19:43:22 +0200
Subject: [PATCH] Remove SUID when splicing into an inode

Originally from Mark Fasheh <mark.fasheh@oracle.com>

generic_file_splice_write() does not remove S_ISUID or S_ISGID. This is
inconsistent with the way we generally write to files.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/splice.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/splice.c b/fs/splice.c
index 68e20e65c6e1..49fb9f129938 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -845,6 +845,10 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
 	ssize_t ret;
 	int err;
 
+	err = remove_suid(out->f_dentry);
+	if (unlikely(err))
+		return err;
+
 	ret = __splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
 	if (ret > 0) {
 		*ppos += ret;
@@ -883,12 +887,21 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 			  loff_t *ppos, size_t len, unsigned int flags)
 {
 	struct address_space *mapping = out->f_mapping;
+	struct inode *inode = mapping->host;
 	ssize_t ret;
+	int err;
+
+	err = should_remove_suid(out->f_dentry);
+	if (unlikely(err)) {
+		mutex_lock(&inode->i_mutex);
+		err = __remove_suid(out->f_dentry, err);
+		mutex_unlock(&inode->i_mutex);
+		if (err)
+			return err;
+	}
 
 	ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
 	if (ret > 0) {
-		struct inode *inode = mapping->host;
-
 		*ppos += ret;
 
 		/*
@@ -896,8 +909,6 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 		 * sync it.
 		 */
 		if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
-			int err;
-
 			mutex_lock(&inode->i_mutex);
 			err = generic_osync_inode(inode, mapping,
 						  OSYNC_METADATA|OSYNC_DATA);
-- 
cgit v1.2.3


From 42fb00838a644d03f9a2a5fbbe0b668a5ff5df4d Mon Sep 17 00:00:00 2001
From: Patrick Caulfield <pcaulfie@redhat.com>
Date: Fri, 13 Oct 2006 17:12:05 +0100
Subject: [DLM] fix iovec length in recvmsg

I didn't spot that the msg_iovlen was set to 2 if there
were two elements in the iovec but left at zero if not :(

I think this might be why bob was still seeing trouble.

Signed-Off-By: Patrick Caulfield <pcaulfie@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/lowcomms.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 867f93d0417e..6da6b14d5a61 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -519,6 +519,7 @@ static int receive_from_sock(void)
 	msg.msg_flags = 0;
 	msg.msg_control = incmsg;
 	msg.msg_controllen = sizeof(incmsg);
+	msg.msg_iovlen = 1;
 
 	/* I don't see why this circular buffer stuff is necessary for SCTP
 	 * which is a packet-based protocol, but the whole thing breaks under
-- 
cgit v1.2.3


From 23591256d61354e20f12e98d7a496ad5c23de74c Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 13 Oct 2006 17:25:45 -0400
Subject: [GFS2] Fix bmap to map extents properly

This fix means that bmap will map extents of the length requested
by the VFS rather than guessing at it, or just mapping one block
at a time. The other callers of gfs2_block_map are audited to ensure
they send the correct max extent lengths (i.e. set bh->b_size correctly).

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/bmap.c        | 13 +++++++------
 fs/gfs2/bmap.h        |  2 +-
 fs/gfs2/log.c         |  6 ++++--
 fs/gfs2/ops_address.c |  6 +++---
 fs/gfs2/quota.c       |  5 +++--
 fs/gfs2/recovery.c    |  5 +++--
 6 files changed, 21 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index cc57f2ecd219..06e9a8cb45e9 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -434,8 +434,7 @@ static int lookup_block(struct gfs2_inode *ip, struct buffer_head *bh,
  */
 
 static int gfs2_block_pointers(struct inode *inode, u64 lblock, int create,
-			       struct buffer_head *bh_map, struct metapath *mp,
-			       unsigned int maxlen)
+			       struct buffer_head *bh_map, struct metapath *mp)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -448,6 +447,7 @@ static int gfs2_block_pointers(struct inode *inode, u64 lblock, int create,
 	int new = 0;
 	u64 dblock = 0;
 	int boundary;
+	unsigned int maxlen = bh_map->b_size >> inode->i_blkbits;
 
 	BUG_ON(maxlen == 0);
 
@@ -541,13 +541,13 @@ static inline void bmap_unlock(struct inode *inode, int create)
 }
 
 int gfs2_block_map(struct inode *inode, u64 lblock, int create,
-		   struct buffer_head *bh, unsigned int maxlen)
+		   struct buffer_head *bh)
 {
 	struct metapath mp;
 	int ret;
 
 	bmap_lock(inode, create);
-	ret = gfs2_block_pointers(inode, lblock, create, bh, &mp, maxlen);
+	ret = gfs2_block_pointers(inode, lblock, create, bh, &mp);
 	bmap_unlock(inode, create);
 	return ret;
 }
@@ -555,7 +555,7 @@ int gfs2_block_map(struct inode *inode, u64 lblock, int create,
 int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
 {
 	struct metapath mp;
-	struct buffer_head bh = { .b_state = 0, .b_blocknr = 0, .b_size = 0 };
+	struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
 	int ret;
 	int create = *new;
 
@@ -563,8 +563,9 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi
 	BUG_ON(!dblock);
 	BUG_ON(!new);
 
+	bh.b_size = 1 << (inode->i_blkbits + 5);
 	bmap_lock(inode, create);
-	ret = gfs2_block_pointers(inode, lblock, create, &bh, &mp, 32);
+	ret = gfs2_block_pointers(inode, lblock, create, &bh, &mp);
 	bmap_unlock(inode, create);
 	*extlen = bh.b_size >> inode->i_blkbits;
 	*dblock = bh.b_blocknr;
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index 0fd379b4cd9e..ac2fd04370dc 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h
@@ -15,7 +15,7 @@ struct gfs2_inode;
 struct page;
 
 int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
-int gfs2_block_map(struct inode *inode, u64 lblock, int create, struct buffer_head *bh, unsigned int maxlen);
+int gfs2_block_map(struct inode *inode, u64 lblock, int create, struct buffer_head *bh);
 int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen);
 
 int gfs2_truncatei(struct gfs2_inode *ip, u64 size);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 72eec6542d6a..0cace3da9dbb 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -312,10 +312,12 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
 
 static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
 {
+	struct inode *inode = sdp->sd_jdesc->jd_inode;
 	int error;
-	struct buffer_head bh_map;
+	struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
 
-	error = gfs2_block_map(sdp->sd_jdesc->jd_inode, lbn, 0, &bh_map, 1);
+	bh_map.b_size = 1 << inode->i_blkbits;
+	error = gfs2_block_map(inode, lbn, 0, &bh_map);
 	if (error || !bh_map.b_blocknr)
 		printk(KERN_INFO "error=%d, dbn=%llu lbn=%u", error, bh_map.b_blocknr, lbn);
 	gfs2_assert_withdraw(sdp, !error && bh_map.b_blocknr);
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index e0599fed99ce..8d5963c7e123 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -65,7 +65,7 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
 int gfs2_get_block(struct inode *inode, sector_t lblock,
 	           struct buffer_head *bh_result, int create)
 {
-	return gfs2_block_map(inode, lblock, create, bh_result, 32);
+	return gfs2_block_map(inode, lblock, create, bh_result);
 }
 
 /**
@@ -83,7 +83,7 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
 {
 	int error;
 
-	error = gfs2_block_map(inode, lblock, 0, bh_result, 1);
+	error = gfs2_block_map(inode, lblock, 0, bh_result);
 	if (error)
 		return error;
 	if (bh_result->b_blocknr == 0)
@@ -94,7 +94,7 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
 static int gfs2_get_block_direct(struct inode *inode, sector_t lblock,
 				 struct buffer_head *bh_result, int create)
 {
-	return gfs2_block_map(inode, lblock, 0, bh_result, 32);
+	return gfs2_block_map(inode, lblock, 0, bh_result);
 }
 
 /**
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index c69b94a55588..a3deae7416c9 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -251,7 +251,7 @@ static int bh_get(struct gfs2_quota_data *qd)
 	unsigned int block, offset;
 	struct buffer_head *bh;
 	int error;
-	struct buffer_head bh_map;
+	struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
 
 	mutex_lock(&sdp->sd_quota_mutex);
 
@@ -263,7 +263,8 @@ static int bh_get(struct gfs2_quota_data *qd)
 	block = qd->qd_slot / sdp->sd_qc_per_block;
 	offset = qd->qd_slot % sdp->sd_qc_per_block;;
 
-	error = gfs2_block_map(&ip->i_inode, block, 0, &bh_map, 1);
+	bh_map.b_size = 1 << ip->i_inode.i_blkbits;
+	error = gfs2_block_map(&ip->i_inode, block, 0, &bh_map);
 	if (error)
 		goto fail;
 	error = gfs2_meta_read(ip->i_gl, bh_map.b_blocknr, DIO_WAIT, &bh);
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 0a8a4b87dcc6..62cd223819b7 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -372,11 +372,12 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
 	u32 hash;
 	struct buffer_head *bh;
 	int error;
-	struct buffer_head bh_map;
+	struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
 
 	lblock = head->lh_blkno;
 	gfs2_replay_incr_blk(sdp, &lblock);
-	error = gfs2_block_map(&ip->i_inode, lblock, 0, &bh_map, 1);
+	bh_map.b_size = 1 << ip->i_inode.i_blkbits;
+	error = gfs2_block_map(&ip->i_inode, lblock, 0, &bh_map);
 	if (error)
 		return error;
 	if (!bh_map.b_blocknr) {
-- 
cgit v1.2.3


From a2d7d021d78dbc00d24d9c809c64a7f3e61fa773 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 14 Oct 2006 16:49:30 +0100
Subject: [GFS2] gfs2 endianness bug: be16 assigned to be32 field

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 459498cac93b..d43caf04bb68 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -815,7 +815,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh,
 	leaf = (struct gfs2_leaf *)bh->b_data;
 	leaf->lf_depth = cpu_to_be16(depth);
 	leaf->lf_entries = 0;
-	leaf->lf_dirent_format = cpu_to_be16(GFS2_FORMAT_DE);
+	leaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE);
 	leaf->lf_next = 0;
 	memset(leaf->lf_reserved, 0, sizeof(leaf->lf_reserved));
 	dent = (struct gfs2_dirent *)(leaf+1);
-- 
cgit v1.2.3


From abbdbd2065e74411dc2c401501c2c85a82f60e06 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Thu, 19 Oct 2006 15:12:24 +0200
Subject: [GFS2] fs/gfs2/dir.c:gfs2_dir_write_data(): remove dead code

The Coverity checker spotted this obviously dead code.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/dir.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index d43caf04bb68..ce52bd954df6 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -212,8 +212,6 @@ static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
 		gfs2_trans_add_bh(ip->i_gl, bh, 1);
 		memcpy(bh->b_data + o, buf, amount);
 		brelse(bh);
-		if (error)
-			goto fail;
 
 		buf += amount;
 		copied += amount;
-- 
cgit v1.2.3


From b0cb66955f4bf7a72b544096ceef48a829361a3c Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Thu, 19 Oct 2006 15:13:26 +0200
Subject: [GFS2] fs/gfs2/ops_fstype.c:gfs2_get_sb_meta(): remove unused
 variable

The Coverity checker spotted this unused variable.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ops_fstype.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 178b33911843..e99444dffeb8 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -854,7 +854,6 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
 	int error = 0;
 	struct super_block *sb = NULL, *new;
 	struct gfs2_sbd *sdp;
-	char *gfs2mnt = NULL;
 
 	sb = get_gfs2_sb(dev_name);
 	if (!sb) {
@@ -892,8 +891,6 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
 	atomic_inc(&sdp->sd_gfs2mnt->mnt_count);
 	return simple_set_mnt(mnt, new);
 error:
-	if (gfs2mnt)
-		kfree(gfs2mnt);
 	return error;
 }
 
-- 
cgit v1.2.3


From 348acd48f050f5ba7fa917b1421ae34443be97dd Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Thu, 19 Oct 2006 15:20:04 +0200
Subject: [GFS2] fs/gfs2/dir.c:gfs2_dir_write_data(): don't use an
 uninitialized variable

In the "if (extlen)" case, "new" might be used uninitialized.

Looking at the code, it should be initialized to 0.

Spotted by the Coverity checker.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index ce52bd954df6..ead7df066853 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -184,7 +184,7 @@ static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
 	while (copied < size) {
 		unsigned int amount;
 		struct buffer_head *bh;
-		int new;
+		int new = 0;
 
 		amount = size - copied;
 		if (amount > sdp->sd_sb.sb_bsize - o)
-- 
cgit v1.2.3


From bbbe4512735eb0f15f09ffd14876091a8e91bc69 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Thu, 19 Oct 2006 15:27:00 +0200
Subject: [GFS2] fs/gfs2/ops_fstype.c:fill_super_meta(): fix NULL dereference

Don't dereference new->s_root when we do know it's NULL.

Spotted by the Coverity checker.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ops_fstype.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index e99444dffeb8..882873a6bd69 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -794,8 +794,8 @@ static int fill_super_meta(struct super_block *sb, struct super_block *new,
 		fs_err(sdp, "can't get root dentry\n");
 		error = -ENOMEM;
 		iput(inode);
-	}
-	new->s_root->d_op = &gfs2_dops;
+	} else
+		new->s_root->d_op = &gfs2_dops;
 
 	return error;
 }
-- 
cgit v1.2.3


From b7d8ac3e1779c30ddef0a8f38042076c5007a23d Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Thu, 19 Oct 2006 16:02:07 +0200
Subject: [GFS2] gfs2_dir_read_data(): fix uninitialized variable usage

In the "if (extlen)" case, "bh" was used uninitialized.

This patch changes the code to what seems to have been intended.

Spotted by the Coverity checker.

This patch also removes a pointless "bh = NULL" asignment (the variable
is never accessed again after this point).

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/dir.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index ead7df066853..e24af28b1a12 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -315,8 +315,7 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
 			if (!ra)
 				extlen = 1;
 			bh = gfs2_meta_ra(ip->i_gl, dblock, extlen);
-		}
-		if (!bh) {
+		} else {
 			error = gfs2_meta_read(ip->i_gl, dblock, DIO_WAIT, &bh);
 			if (error)
 				goto fail;
@@ -330,7 +329,6 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
 		extlen--;
 		memcpy(buf, bh->b_data + o, amount);
 		brelse(bh);
-		bh = NULL;
 		buf += amount;
 		copied += amount;
 		lblock++;
-- 
cgit v1.2.3


From 26da82058e62ea173559a26881b16d10089645ba Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 19 Oct 2006 23:28:14 -0700
Subject: [PATCH] ecryptfs: use special_file()

Use the special_file() macro to check whether an inode is special instead of
open-coding it.

Acked-by: Mike Halcrow <mhalcrow@us.ibm.com>
Cc: Phillip Hellewell <phillip@hellewell.homeip.net>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/main.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 7a11b8ae6644..5938a232d11b 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -104,10 +104,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
 		inode->i_op = &ecryptfs_dir_iops;
 	if (S_ISDIR(lower_inode->i_mode))
 		inode->i_fop = &ecryptfs_dir_fops;
-	/* TODO: Is there a better way to identify if the inode is
-	 * special? */
-	if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) ||
-	    S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode))
+	if (special_file(lower_inode->i_mode))
 		init_special_inode(inode, lower_inode->i_mode,
 				   lower_inode->i_rdev);
 	dentry->d_op = &ecryptfs_dops;
-- 
cgit v1.2.3


From 3fcfab16c5b86eaa3db3a9a31adba550c5b67141 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Thu, 19 Oct 2006 23:28:16 -0700
Subject: [PATCH] separate bdi congestion functions from queue congestion
 functions

Separate out the concept of "queue congestion" from "backing-dev congestion".
Congestion is a backing-dev concept, not a queue concept.

The blk_* congestion functions are retained, as wrappers around the core
backing-dev congestion functions.

This proper layering is needed so that NFS can cleanly use the congestion
functions, and so that CONFIG_BLOCK=n actually links.

Cc: "Thomas Maier" <balagi@justmail.de>
Cc: "Jens Axboe" <jens.axboe@oracle.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: David Howells <dhowells@redhat.com>
Cc: Peter Osterlund <petero2@telia.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fat/file.c              | 3 ++-
 fs/nfs/write.c             | 4 +++-
 fs/reiserfs/journal.c      | 3 ++-
 fs/xfs/linux-2.6/kmem.c    | 5 +++--
 fs/xfs/linux-2.6/xfs_buf.c | 3 ++-
 5 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/fat/file.c b/fs/fat/file.c
index f4b8f8b3fbdd..8337451e7897 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -13,6 +13,7 @@
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 #include <linux/writeback.h>
+#include <linux/backing-dev.h>
 #include <linux/blkdev.h>
 
 int fat_generic_ioctl(struct inode *inode, struct file *filp,
@@ -118,7 +119,7 @@ static int fat_file_release(struct inode *inode, struct file *filp)
 	if ((filp->f_mode & FMODE_WRITE) &&
 	     MSDOS_SB(inode->i_sb)->options.flush) {
 		fat_flush_inodes(inode->i_sb, inode, NULL);
-		blk_congestion_wait(WRITE, HZ/10);
+		congestion_wait(WRITE, HZ/10);
 	}
 	return 0;
 }
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f6675d2c386c..ca92ac36fe9d 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -57,6 +57,8 @@
 #include <linux/nfs_fs.h>
 #include <linux/nfs_mount.h>
 #include <linux/nfs_page.h>
+#include <linux/backing-dev.h>
+
 #include <asm/uaccess.h>
 #include <linux/smp_lock.h>
 
@@ -395,7 +397,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 out:
 	clear_bit(BDI_write_congested, &bdi->state);
 	wake_up_all(&nfs_write_congestion);
-	writeback_congestion_end();
+	congestion_end(WRITE);
 	return err;
 }
 
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index ad8cbc49883a..85ce23268302 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -53,6 +53,7 @@
 #include <linux/workqueue.h>
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
+#include <linux/backing-dev.h>
 
 /* gets a struct reiserfs_journal_list * from a list head */
 #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
@@ -970,7 +971,7 @@ int reiserfs_async_progress_wait(struct super_block *s)
 	DEFINE_WAIT(wait);
 	struct reiserfs_journal *j = SB_JOURNAL(s);
 	if (atomic_read(&j->j_async_throttle))
-		blk_congestion_wait(WRITE, HZ / 10);
+		congestion_wait(WRITE, HZ / 10);
 	return 0;
 }
 
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index d59737589815..004baf600611 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -21,6 +21,7 @@
 #include <linux/highmem.h>
 #include <linux/swap.h>
 #include <linux/blkdev.h>
+#include <linux/backing-dev.h>
 #include "time.h"
 #include "kmem.h"
 
@@ -53,7 +54,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
 			printk(KERN_ERR "XFS: possible memory allocation "
 					"deadlock in %s (mode:0x%x)\n",
 					__FUNCTION__, lflags);
-		blk_congestion_wait(WRITE, HZ/50);
+		congestion_wait(WRITE, HZ/50);
 	} while (1);
 }
 
@@ -131,7 +132,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
 			printk(KERN_ERR "XFS: possible memory allocation "
 					"deadlock in %s (mode:0x%x)\n",
 					__FUNCTION__, lflags);
-		blk_congestion_wait(WRITE, HZ/50);
+		congestion_wait(WRITE, HZ/50);
 	} while (1);
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 9bbadafdcb00..db5f5a3608ca 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -30,6 +30,7 @@
 #include <linux/hash.h>
 #include <linux/kthread.h>
 #include <linux/migrate.h>
+#include <linux/backing-dev.h>
 #include "xfs_linux.h"
 
 STATIC kmem_zone_t *xfs_buf_zone;
@@ -395,7 +396,7 @@ _xfs_buf_lookup_pages(
 
 			XFS_STATS_INC(xb_page_retries);
 			xfsbufd_wakeup(0, gfp_mask);
-			blk_congestion_wait(WRITE, HZ/50);
+			congestion_wait(WRITE, HZ/50);
 			goto retry;
 		}
 
-- 
cgit v1.2.3


From 8ac773b4f73afa6fd66695131103944b975d5d5c Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 19 Oct 2006 23:28:32 -0700
Subject: [PATCH] OOM killer meets userspace headers

Despite mm.h is not being exported header, it does contain one thing
which is part of userspace ABI -- value disabling OOM killer for given
process. So,
a) create and export include/linux/oom.h
b) move OOM_DISABLE define there.
c) turn bounding values of /proc/$PID/oom_adj into defines and export
   them too.

Note: mass __KERNEL__ removal will be done later.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/base.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 26a8f8416b79..8df27401d292 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -72,6 +72,7 @@
 #include <linux/audit.h>
 #include <linux/poll.h>
 #include <linux/nsproxy.h>
+#include <linux/oom.h>
 #include "internal.h"
 
 /* NOTE:
@@ -689,7 +690,8 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
 	if (copy_from_user(buffer, buf, count))
 		return -EFAULT;
 	oom_adjust = simple_strtol(buffer, &end, 0);
-	if ((oom_adjust < -16 || oom_adjust > 15) && oom_adjust != OOM_DISABLE)
+	if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
+	     oom_adjust != OOM_DISABLE)
 		return -EINVAL;
 	if (*end == '\n')
 		end++;
-- 
cgit v1.2.3


From f2fbc6c2dad7bbcbf226c094749534f1e84d3be2 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 19 Oct 2006 23:28:35 -0700
Subject: [PATCH] fs/Kconfig: move GENERIC_ACL, fix acl() call errors

GENERIC_ACL shouldn't be under Network File Systems (which made it depend
on NET) as far as I can tell.  Having it there and having many (FS) config
symbols disabled gives this (which the patch fixes):

mm/built-in.o: In function `shmem_check_acl':
shmem_acl.c:(.text.shmem_check_acl+0x33): undefined reference to `posix_acl_permission'
fs/built-in.o: In function `generic_acl_get':
(.text.generic_acl_get+0x30): undefined reference to `posix_acl_to_xattr'
fs/built-in.o: In function `generic_acl_set':
(.text.generic_acl_set+0x75): undefined reference to `posix_acl_from_xattr'
fs/built-in.o: In function `generic_acl_set':
(.text.generic_acl_set+0x94): undefined reference to `posix_acl_valid'
fs/built-in.o: In function `generic_acl_set':
(.text.generic_acl_set+0xc1): undefined reference to `posix_acl_equiv_mode'
fs/built-in.o: In function `generic_acl_init':
(.text.generic_acl_init+0x7a): undefined reference to `posix_acl_clone'
fs/built-in.o: In function `generic_acl_init':
(.text.generic_acl_init+0xb4): undefined reference to `posix_acl_clone'
fs/built-in.o: In function `generic_acl_init':
(.text.generic_acl_init+0xc8): undefined reference to `posix_acl_create_masq'
fs/built-in.o: In function `generic_acl_chmod':
(.text.generic_acl_chmod+0x49): undefined reference to `posix_acl_clone'
fs/built-in.o: In function `generic_acl_chmod':
(.text.generic_acl_chmod+0x76): undefined reference to `posix_acl_chmod_masq'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/Kconfig | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index 6a3df055280a..fee318e6f4bb 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -634,6 +634,10 @@ config FUSE_FS
 	  If you want to develop a userspace FS, or if you want to use
 	  a filesystem based on FUSE, answer Y or M.
 
+config GENERIC_ACL
+	bool
+	select FS_POSIX_ACL
+
 if BLOCK
 menu "CD-ROM/DVD Filesystems"
 
@@ -2080,10 +2084,6 @@ config 9P_FS
 
 	  If unsure, say N.
 
-config GENERIC_ACL
-	bool
-	select FS_POSIX_ACL
-
 endmenu
 
 if BLOCK
-- 
cgit v1.2.3


From 0e7d73824e6b0024100701da246fec769dd8f087 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 19 Oct 2006 23:28:36 -0700
Subject: [PATCH] autofs3: Make sure all dentries refs are released before
 calling kill_anon_super()

Make sure all dentries refs are released before calling kill_anon_super()
so that the assumption that generic_shutdown_super() can completely destroy
the dentry tree for there will be no external references holds true.

What was being done in the put_super() superblock op, is now done in the
kill_sb() filesystem op instead, prior to calling kill_anon_super().

The call to shrink_dcache_sb() is removed as it is redundant since
shrink_dcache_for_umount() will now be called after the cleanup routine.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Ian Kent <raven@themaw.net>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs/autofs_i.h | 1 +
 fs/autofs/dirhash.c  | 1 -
 fs/autofs/init.c     | 2 +-
 fs/autofs/inode.c    | 4 ++--
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index c7700d9b3f96..906ba5ce2261 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -149,6 +149,7 @@ extern const struct file_operations autofs_root_operations;
 /* Initializing function */
 
 int autofs_fill_super(struct super_block *, void *, int);
+void autofs_kill_sb(struct super_block *sb);
 
 /* Queue management functions */
 
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
index 3fded389d06b..bf8c8af98004 100644
--- a/fs/autofs/dirhash.c
+++ b/fs/autofs/dirhash.c
@@ -246,5 +246,4 @@ void autofs_hash_nuke(struct autofs_sb_info *sbi)
 			kfree(ent);
 		}
 	}
-	shrink_dcache_sb(sbi->sb);
 }
diff --git a/fs/autofs/init.c b/fs/autofs/init.c
index aca123752406..cea5219b4f37 100644
--- a/fs/autofs/init.c
+++ b/fs/autofs/init.c
@@ -24,7 +24,7 @@ static struct file_system_type autofs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "autofs",
 	.get_sb		= autofs_get_sb,
-	.kill_sb	= kill_anon_super,
+	.kill_sb	= autofs_kill_sb,
 };
 
 static int __init init_autofs_fs(void)
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index 2c9759baad61..54c518c89e4c 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -20,7 +20,7 @@
 #include "autofs_i.h"
 #include <linux/module.h>
 
-static void autofs_put_super(struct super_block *sb)
+void autofs_kill_sb(struct super_block *sb)
 {
 	struct autofs_sb_info *sbi = autofs_sbi(sb);
 	unsigned int n;
@@ -37,13 +37,13 @@ static void autofs_put_super(struct super_block *sb)
 	kfree(sb->s_fs_info);
 
 	DPRINTK(("autofs: shutting down\n"));
+	kill_anon_super(sb);
 }
 
 static void autofs_read_inode(struct inode *inode);
 
 static struct super_operations autofs_sops = {
 	.read_inode	= autofs_read_inode,
-	.put_super	= autofs_put_super,
 	.statfs		= simple_statfs,
 };
 
-- 
cgit v1.2.3


From 575b5c7870c940326a11614e0279b74356c1d44f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 19 Oct 2006 23:28:37 -0700
Subject: [PATCH] NFSv4: Fix thinko in fs/nfs/super.c

Duh. addr.sin_port should be in network byte order.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 28659a919d6e..28108c82b887 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -834,7 +834,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 	}
 	/* RFC3530: The default port for NFS is 2049 */
 	if (addr.sin_port == 0)
-		addr.sin_port = NFS_PORT;
+		addr.sin_port = htons(NFS_PORT);
 
 	/* Grab the authentication type */
 	authflavour = RPC_AUTH_UNIX;
-- 
cgit v1.2.3


From b6dff26a08189932eeb0fa4261e09e733b0fc540 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 19 Oct 2006 23:28:38 -0700
Subject: [PATCH] NFS: Fix oops in nfs_cancel_commit_list

Fix two bugs:
 - nfs_inode_remove_request will call nfs_clear_request, so we cannot
   reference req->wb_page after it. Move the call to dec_zone_page_state so
   that it occurs while req->wb_page is still valid.
 - Calling nfs_clear_page_writeback is unnecessary since the radix tree
   tags will have been cleared by the call to nfs_inode_remove_request.
   Replace with a simple call to nfs_unlock_request.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/write.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ca92ac36fe9d..883dd4a1c157 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -590,10 +590,10 @@ static void nfs_cancel_commit_list(struct list_head *head)
 
 	while(!list_empty(head)) {
 		req = nfs_list_entry(head->next);
+		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
 		nfs_list_remove_request(req);
 		nfs_inode_remove_request(req);
-		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
-		nfs_clear_page_writeback(req);
+		nfs_unlock_request(req);
 	}
 }
 
-- 
cgit v1.2.3


From eda3cef8dd2b83875affe82595db9d0c278879b2 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 19 Oct 2006 23:28:38 -0700
Subject: [PATCH] NFS: Fix error handling in nfs_direct_write_result()

If the RPC call tanked, we should not be checking the return value
of data->res.verf->committed, since it is unlikely to even be
initialised.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/direct.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 9f7f8b9ea1e2..1e873fcab947 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -532,10 +532,12 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
 
 	spin_lock(&dreq->lock);
 
-	if (likely(status >= 0))
-		dreq->count += data->res.count;
-	else
-		dreq->error = task->tk_status;
+	if (unlikely(status < 0)) {
+		dreq->error = status;
+		goto out_unlock;
+	}
+
+	dreq->count += data->res.count;
 
 	if (data->res.verf->committed != NFS_FILE_SYNC) {
 		switch (dreq->flags) {
@@ -550,7 +552,7 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
 				}
 		}
 	}
-
+out_unlock:
 	spin_unlock(&dreq->lock);
 }
 
-- 
cgit v1.2.3


From 7d9ac06f26fe8d477c813405f1a8c7c90eecef2d Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Thu, 19 Oct 2006 23:28:39 -0700
Subject: [PATCH] nfs4: initialize cl_ipaddr

David forgot to do this.  I'm not sure if this is the right place to put
it....

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/client.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 34c3996bd0f5..8b123f6a7d02 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -849,6 +849,7 @@ error:
  */
 static int nfs4_init_client(struct nfs_client *clp,
 		int proto, int timeo, int retrans,
+		const char *ip_addr,
 		rpc_authflavor_t authflavour)
 {
 	int error;
@@ -865,6 +866,7 @@ static int nfs4_init_client(struct nfs_client *clp,
 	error = nfs_create_rpc_client(clp, proto, timeo, retrans, authflavour);
 	if (error < 0)
 		goto error;
+	memcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
 
 	error = nfs_idmap_new(clp);
 	if (error < 0) {
@@ -888,6 +890,7 @@ error:
  */
 static int nfs4_set_client(struct nfs_server *server,
 		const char *hostname, const struct sockaddr_in *addr,
+		const char *ip_addr,
 		rpc_authflavor_t authflavour,
 		int proto, int timeo, int retrans)
 {
@@ -902,7 +905,7 @@ static int nfs4_set_client(struct nfs_server *server,
 		error = PTR_ERR(clp);
 		goto error;
 	}
-	error = nfs4_init_client(clp, proto, timeo, retrans, authflavour);
+	error = nfs4_init_client(clp, proto, timeo, retrans, ip_addr, authflavour);
 	if (error < 0)
 		goto error_put;
 
@@ -971,7 +974,7 @@ struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data,
 		return ERR_PTR(-ENOMEM);
 
 	/* Get a client record */
-	error = nfs4_set_client(server, hostname, addr, authflavour,
+	error = nfs4_set_client(server, hostname, addr, ip_addr, authflavour,
 			data->proto, data->timeo, data->retrans);
 	if (error < 0)
 		goto error;
@@ -1041,6 +1044,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 	/* Get a client representation.
 	 * Note: NFSv4 always uses TCP, */
 	error = nfs4_set_client(server, data->hostname, data->addr,
+			parent_client->cl_ipaddr,
 			data->authflavor,
 			parent_server->client->cl_xprt->prot,
 			parent_client->retrans_timeo,
-- 
cgit v1.2.3


From 13bbc06af8a5f65df0f888b442e557c617cadba7 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 19 Oct 2006 23:28:40 -0700
Subject: [PATCH] NFS: Fix NFSv4 callback regression

The change in semantics for nfs_find_client() introduced by David breaks the
NFSv4 callback channel.

Also, replace another completely broken BUG_ON() in nfs_find_client().  In
initialised clients, clp->cl_cons_state == 0, and callers of that function
should in any case never want to see clients that are uninitialised.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/client.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 8b123f6a7d02..5fea638743e4 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -232,11 +232,15 @@ void nfs_put_client(struct nfs_client *clp)
  * Find a client by address
  * - caller must hold nfs_client_lock
  */
-static struct nfs_client *__nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
+static struct nfs_client *__nfs_find_client(const struct sockaddr_in *addr, int nfsversion, int match_port)
 {
 	struct nfs_client *clp;
 
 	list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+		/* Don't match clients that failed to initialise properly */
+		if (clp->cl_cons_state < 0)
+			continue;
+
 		/* Different NFS versions cannot share the same nfs_client */
 		if (clp->cl_nfsversion != nfsversion)
 			continue;
@@ -245,7 +249,7 @@ static struct nfs_client *__nfs_find_client(const struct sockaddr_in *addr, int
 			   sizeof(clp->cl_addr.sin_addr)) != 0)
 			continue;
 
-		if (clp->cl_addr.sin_port == addr->sin_port)
+		if (!match_port || clp->cl_addr.sin_port == addr->sin_port)
 			goto found;
 	}
 
@@ -265,11 +269,12 @@ struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversio
 	struct nfs_client *clp;
 
 	spin_lock(&nfs_client_lock);
-	clp = __nfs_find_client(addr, nfsversion);
+	clp = __nfs_find_client(addr, nfsversion, 0);
 	spin_unlock(&nfs_client_lock);
-
-	BUG_ON(clp && clp->cl_cons_state == 0);
-
+	if (clp != NULL && clp->cl_cons_state != NFS_CS_READY) {
+		nfs_put_client(clp);
+		clp = NULL;
+	}
 	return clp;
 }
 
@@ -292,7 +297,7 @@ static struct nfs_client *nfs_get_client(const char *hostname,
 	do {
 		spin_lock(&nfs_client_lock);
 
-		clp = __nfs_find_client(addr, nfsversion);
+		clp = __nfs_find_client(addr, nfsversion, 1);
 		if (clp)
 			goto found_client;
 		if (new)
-- 
cgit v1.2.3


From cd9ae2b6a75bb1fa0d370929c2d7a7da1ed719d9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 19 Oct 2006 23:28:40 -0700
Subject: [PATCH] NFS: Deal with failure of invalidate_inode_pages2()

If invalidate_inode_pages2() fails, then it should in principle just be
because the current process was signalled.  In that case, we just want to
ensure that the inode's page cache remains marked as invalid.

Also add a helper to allow the O_DIRECT code to simply mark the page cache as
invalid once it is finished writing, instead of calling
invalidate_inode_pages2() itself.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/dir.c    |  6 ++++--
 fs/nfs/direct.c | 13 ++-----------
 fs/nfs/inode.c  | 28 +++++++++++++++++++++++-----
 3 files changed, 29 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 481f8892a919..58d44057813e 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -203,8 +203,10 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
 	 * Note: assumes we have exclusive access to this mapping either
 	 *	 through inode->i_mutex or some other mechanism.
 	 */
-	if (page->index == 0)
-		invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1);
+	if (page->index == 0 && invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1) < 0) {
+		/* Should never happen */
+		nfs_zap_mapping(inode, inode->i_mapping);
+	}
 	unlock_page(page);
 	return 0;
  error:
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 1e873fcab947..bdfabf854a51 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -497,6 +497,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
 			if (dreq->commit_data != NULL)
 				nfs_commit_free(dreq->commit_data);
 			nfs_direct_free_writedata(dreq);
+			nfs_zap_mapping(inode, inode->i_mapping);
 			nfs_direct_complete(dreq);
 	}
 }
@@ -517,6 +518,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
 {
 	nfs_end_data_update(inode);
 	nfs_direct_free_writedata(dreq);
+	nfs_zap_mapping(inode, inode->i_mapping);
 	nfs_direct_complete(dreq);
 }
 #endif
@@ -830,17 +832,6 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 
 	retval = nfs_direct_write(iocb, (unsigned long) buf, count, pos);
 
-	/*
-	 * XXX: nfs_end_data_update() already ensures this file's
-	 *      cached data is subsequently invalidated.  Do we really
-	 *      need to call invalidate_inode_pages2() again here?
-	 *
-	 *      For aio writes, this invalidation will almost certainly
-	 *      occur before the writes complete.  Kind of racey.
-	 */
-	if (mapping->nrpages)
-		invalidate_inode_pages2(mapping);
-
 	if (retval > 0)
 		iocb->ki_pos = pos + retval;
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index bc9376ca86cd..9979ad1cf8eb 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -131,6 +131,15 @@ void nfs_zap_caches(struct inode *inode)
 	spin_unlock(&inode->i_lock);
 }
 
+void nfs_zap_mapping(struct inode *inode, struct address_space *mapping)
+{
+	if (mapping->nrpages != 0) {
+		spin_lock(&inode->i_lock);
+		NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
+		spin_unlock(&inode->i_lock);
+	}
+}
+
 static void nfs_zap_acl_cache(struct inode *inode)
 {
 	void (*clear_acl_cache)(struct inode *);
@@ -671,13 +680,20 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
 	if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
 			|| nfs_attribute_timeout(inode))
 		ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	if (ret < 0)
+		goto out;
 
 	if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
-		nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
-		if (S_ISREG(inode->i_mode))
-			nfs_sync_mapping(mapping);
-		invalidate_inode_pages2(mapping);
-
+		if (mapping->nrpages != 0) {
+			if (S_ISREG(inode->i_mode)) {
+				ret = nfs_sync_mapping(mapping);
+				if (ret < 0)
+					goto out;
+			}
+			ret = invalidate_inode_pages2(mapping);
+			if (ret < 0)
+				goto out;
+		}
 		spin_lock(&inode->i_lock);
 		nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
 		if (S_ISDIR(inode->i_mode)) {
@@ -687,10 +703,12 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
 		}
 		spin_unlock(&inode->i_lock);
 
+		nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
 		dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
 				inode->i_sb->s_id,
 				(long long)NFS_FILEID(inode));
 	}
+out:
 	return ret;
 }
 
-- 
cgit v1.2.3


From 39cf8a1374dc51fea169190674d5e4996a7d7ea2 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 19 Oct 2006 23:28:41 -0700
Subject: [PATCH] NFS: fix minor bug in new NFS symlink code

The original code confused a zero return code from pagevec_add() as success.

Test plan:
None.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/dir.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 58d44057813e..c86a1ead4772 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1519,8 +1519,8 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
 	pagevec_init(&lru_pvec, 0);
 	if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0,
 							GFP_KERNEL)) {
-		if (!pagevec_add(&lru_pvec, page))
-			__pagevec_lru_add(&lru_pvec);
+		pagevec_add(&lru_pvec, page);
+		pagevec_lru_add(&lru_pvec);
 		SetPageUptodate(page);
 		unlock_page(page);
 	} else
-- 
cgit v1.2.3


From 85233a7a436a48a0b98e7976a66797e5da79c9d6 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 19 Oct 2006 23:28:42 -0700
Subject: [PATCH] NFS: __nfs_revalidate_inode() can use "inode" before checking
 it is non-NULL

The "!inode" check in __nfs_revalidate_inode() occurs well after the first
time it is dereferenced, so get rid of it.

Coverity: #cid 1372, 1373

Test plan:
Code review; recheck with Coverity.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 9979ad1cf8eb..08cc4c5919ab 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -583,7 +583,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 
 	nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
 	lock_kernel();
-	if (!inode || is_bad_inode(inode))
+	if (is_bad_inode(inode))
  		goto out_nowait;
 	if (NFS_STALE(inode))
  		goto out_nowait;
-- 
cgit v1.2.3


From b87c0adfeaaf8d8310c4f790d76072a5961b3518 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 19 Oct 2006 23:28:42 -0700
Subject: [PATCH] NFS: remove unused check in nfs4_open_revalidate

Coverity spotted a superfluous error check in nfs4_open_revalidate().  Remove
it.

Coverity: #cid 847

Test plan:
Code inspection; another pass through Coverity.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/nfs4proc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 47c7e6e3910d..7421bcb3b728 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1314,11 +1314,9 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
 			case -EROFS:
 				lookup_instantiate_filp(nd, (struct dentry *)state, NULL);
 				return 1;
-			case -ENOENT:
-				if (dentry->d_inode == NULL)
-					return 1;
+			default:
+				goto out_drop;
 		}
-		goto out_drop;
 	}
 	if (state->inode == dentry->d_inode) {
 		nfs4_intent_set_file(nd, dentry, state);
-- 
cgit v1.2.3


From cc45f0175088e000ac7493e5e3f05579b6f7d240 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:44 -0700
Subject: [PATCH] bug: nfsd/nfs4xdr.c misuse of ERR_PTR()

	a) ERR_PTR(nfserr_something) is a bad idea;
IS_ERR() will be false for it.
	b) mixing nfserr_.... with -EOPNOTSUPP is
even worse idea.

nfsd4_path() does both; caller expects to get NFS protocol error out it if
anything goes wrong, but if it does we either do not notice (see (a)) or get
host-endian negative (see (b)).

IOW, that's a case when we can't use ERR_PTR() to return error, even though we
return a pointer in case of success.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4xdr.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 41fc241b729a..77be0c4785e6 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1292,16 +1292,15 @@ static int nfsd4_encode_fs_location4(struct nfsd4_fs_location *location,
  * Returned string is safe to use as long as the caller holds a reference
  * to @exp.
  */
-static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp)
+static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, u32 *stat)
 {
 	struct svc_fh tmp_fh;
 	char *path, *rootpath;
-	int stat;
 
 	fh_init(&tmp_fh, NFS4_FHSIZE);
-	stat = exp_pseudoroot(rqstp->rq_client, &tmp_fh, &rqstp->rq_chandle);
-	if (stat)
-		return ERR_PTR(stat);
+	*stat = exp_pseudoroot(rqstp->rq_client, &tmp_fh, &rqstp->rq_chandle);
+	if (*stat)
+		return NULL;
 	rootpath = tmp_fh.fh_export->ex_path;
 
 	path = exp->ex_path;
@@ -1309,7 +1308,8 @@ static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp)
 	if (strncmp(path, rootpath, strlen(rootpath))) {
 		printk("nfsd: fs_locations failed;"
 			"%s is not contained in %s\n", path, rootpath);
-		return ERR_PTR(-EOPNOTSUPP);
+		*stat = nfserr_notsupp;
+		return NULL;
 	}
 
 	return path + strlen(rootpath);
@@ -1322,13 +1322,14 @@ static int nfsd4_encode_fs_locations(struct svc_rqst *rqstp,
 				     struct svc_export *exp,
 				     u32 **pp, int *buflen)
 {
-	int status, i;
+	u32 status;
+	int i;
 	u32 *p = *pp;
 	struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs;
-	char *root = nfsd4_path(rqstp, exp);
+	char *root = nfsd4_path(rqstp, exp, &status);
 
-	if (IS_ERR(root))
-		return PTR_ERR(root);
+	if (status)
+		return status;
 	status = nfsd4_encode_components('/', root, &p, buflen);
 	if (status)
 		return status;
-- 
cgit v1.2.3


From 7111c66e4e70588c9602035a4996c9cdc2087d2d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:45 -0700
Subject: [PATCH] fix svc_procfunc declaration

svc_procfunc instances return __be32, not int

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/lockd/svc4proc.c   | 40 ++++++++++++++++++++--------------------
 fs/lockd/svcproc.c    | 40 ++++++++++++++++++++--------------------
 fs/nfs/callback_xdr.c |  4 ++--
 fs/nfsd/nfs2acl.c     | 10 +++++-----
 fs/nfsd/nfs3acl.c     |  6 +++---
 fs/nfsd/nfs3proc.c    | 44 ++++++++++++++++++++++----------------------
 fs/nfsd/nfs4proc.c    |  4 ++--
 fs/nfsd/nfsproc.c     | 32 ++++++++++++++++----------------
 8 files changed, 90 insertions(+), 90 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 399ad11b97be..4e719860b4bf 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -68,7 +68,7 @@ no_locks:
 /*
  * NULL: Test for presence of service
  */
-static int
+static __be32
 nlm4svc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 {
 	dprintk("lockd: NULL          called\n");
@@ -78,7 +78,7 @@ nlm4svc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 /*
  * TEST: Check for conflicting lock
  */
-static int
+static __be32
 nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 				         struct nlm_res  *resp)
 {
@@ -107,7 +107,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
-static int
+static __be32
 nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 				         struct nlm_res  *resp)
 {
@@ -150,7 +150,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
-static int
+static __be32
 nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 				           struct nlm_res  *resp)
 {
@@ -183,7 +183,7 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 /*
  * UNLOCK: release a lock
  */
-static int
+static __be32
 nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
 				           struct nlm_res  *resp)
 {
@@ -217,7 +217,7 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
  * GRANTED: A server calls us to tell that a process' lock request
  * was granted
  */
-static int
+static __be32
 nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
 				            struct nlm_res  *resp)
 {
@@ -253,12 +253,12 @@ static const struct rpc_call_ops nlm4svc_callback_ops = {
  * because we send the callback before the reply proper. I hope this
  * doesn't break any clients.
  */
-static int nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
-		int (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
+static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
+		__be32 (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
 {
 	struct nlm_host	*host;
 	struct nlm_rqst	*call;
-	int stat;
+	__be32 stat;
 
 	host = nlmsvc_lookup_host(rqstp,
 				  argp->lock.caller,
@@ -282,35 +282,35 @@ static int nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *a
 	return rpc_success;
 }
 
-static int nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static __be32 nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					     void	     *resp)
 {
 	dprintk("lockd: TEST_MSG      called\n");
 	return nlm4svc_callback(rqstp, NLMPROC_TEST_RES, argp, nlm4svc_proc_test);
 }
 
-static int nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static __be32 nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					     void	     *resp)
 {
 	dprintk("lockd: LOCK_MSG      called\n");
 	return nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlm4svc_proc_lock);
 }
 
-static int nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static __be32 nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					       void	       *resp)
 {
 	dprintk("lockd: CANCEL_MSG    called\n");
 	return nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlm4svc_proc_cancel);
 }
 
-static int nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static __be32 nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                void            *resp)
 {
 	dprintk("lockd: UNLOCK_MSG    called\n");
 	return nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlm4svc_proc_unlock);
 }
 
-static int nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static __be32 nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                 void            *resp)
 {
 	dprintk("lockd: GRANTED_MSG   called\n");
@@ -320,7 +320,7 @@ static int nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *arg
 /*
  * SHARE: create a DOS share or alter existing share.
  */
-static int
+static __be32
 nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
 				          struct nlm_res  *resp)
 {
@@ -353,7 +353,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
 /*
  * UNSHARE: Release a DOS share.
  */
-static int
+static __be32
 nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
 				            struct nlm_res  *resp)
 {
@@ -386,7 +386,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
 /*
  * NM_LOCK: Create an unmonitored lock
  */
-static int
+static __be32
 nlm4svc_proc_nm_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 				            struct nlm_res  *resp)
 {
@@ -399,7 +399,7 @@ nlm4svc_proc_nm_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 /*
  * FREE_ALL: Release all locks and shares held by client
  */
-static int
+static __be32
 nlm4svc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
 					     void            *resp)
 {
@@ -417,7 +417,7 @@ nlm4svc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
 /*
  * SM_NOTIFY: private callback from statd (not part of official NLM proto)
  */
-static int
+static __be32
 nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
 					      void	        *resp)
 {
@@ -446,7 +446,7 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
 /*
  * client sent a GRANTED_RES, let's remove the associated block
  */
-static int
+static __be32
 nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
                                                 void            *resp)
 {
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 6a931f4ab75c..db8d85c32d29 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -96,7 +96,7 @@ no_locks:
 /*
  * NULL: Test for presence of service
  */
-static int
+static __be32
 nlmsvc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 {
 	dprintk("lockd: NULL          called\n");
@@ -106,7 +106,7 @@ nlmsvc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 /*
  * TEST: Check for conflicting lock
  */
-static int
+static __be32
 nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 				         struct nlm_res  *resp)
 {
@@ -136,7 +136,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
-static int
+static __be32
 nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 				         struct nlm_res  *resp)
 {
@@ -179,7 +179,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
-static int
+static __be32
 nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 				           struct nlm_res  *resp)
 {
@@ -212,7 +212,7 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 /*
  * UNLOCK: release a lock
  */
-static int
+static __be32
 nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
 				           struct nlm_res  *resp)
 {
@@ -246,7 +246,7 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
  * GRANTED: A server calls us to tell that a process' lock request
  * was granted
  */
-static int
+static __be32
 nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
 				            struct nlm_res  *resp)
 {
@@ -282,12 +282,12 @@ static const struct rpc_call_ops nlmsvc_callback_ops = {
  * because we send the callback before the reply proper. I hope this
  * doesn't break any clients.
  */
-static int nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
-		int (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
+static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
+		__be32 (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
 {
 	struct nlm_host	*host;
 	struct nlm_rqst	*call;
-	int stat;
+	__be32 stat;
 
 	host = nlmsvc_lookup_host(rqstp,
 				  argp->lock.caller,
@@ -311,28 +311,28 @@ static int nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *ar
 	return rpc_success;
 }
 
-static int nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static __be32 nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					     void	     *resp)
 {
 	dprintk("lockd: TEST_MSG      called\n");
 	return nlmsvc_callback(rqstp, NLMPROC_TEST_RES, argp, nlmsvc_proc_test);
 }
 
-static int nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static __be32 nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					     void	     *resp)
 {
 	dprintk("lockd: LOCK_MSG      called\n");
 	return nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlmsvc_proc_lock);
 }
 
-static int nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static __be32 nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					       void	       *resp)
 {
 	dprintk("lockd: CANCEL_MSG    called\n");
 	return nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlmsvc_proc_cancel);
 }
 
-static int
+static __be32
 nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                void            *resp)
 {
@@ -340,7 +340,7 @@ nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlmsvc_proc_unlock);
 }
 
-static int
+static __be32
 nlmsvc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                 void            *resp)
 {
@@ -351,7 +351,7 @@ nlmsvc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 /*
  * SHARE: create a DOS share or alter existing share.
  */
-static int
+static __be32
 nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
 				          struct nlm_res  *resp)
 {
@@ -384,7 +384,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
 /*
  * UNSHARE: Release a DOS share.
  */
-static int
+static __be32
 nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
 				            struct nlm_res  *resp)
 {
@@ -417,7 +417,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
 /*
  * NM_LOCK: Create an unmonitored lock
  */
-static int
+static __be32
 nlmsvc_proc_nm_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 				            struct nlm_res  *resp)
 {
@@ -430,7 +430,7 @@ nlmsvc_proc_nm_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 /*
  * FREE_ALL: Release all locks and shares held by client
  */
-static int
+static __be32
 nlmsvc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
 					     void            *resp)
 {
@@ -448,7 +448,7 @@ nlmsvc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
 /*
  * SM_NOTIFY: private callback from statd (not part of official NLM proto)
  */
-static int
+static __be32
 nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
 					      void	        *resp)
 {
@@ -477,7 +477,7 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
 /*
  * client sent a GRANTED_RES, let's remove the associated block
  */
-static int
+static __be32
 nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
                                                 void            *resp)
 {
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 29f932192054..5998d0c71757 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -36,7 +36,7 @@ struct callback_op {
 
 static struct callback_op callback_ops[];
 
-static int nfs4_callback_null(struct svc_rqst *rqstp, void *argp, void *resp)
+static __be32 nfs4_callback_null(struct svc_rqst *rqstp, void *argp, void *resp)
 {
 	return htonl(NFS4_OK);
 }
@@ -399,7 +399,7 @@ static unsigned process_op(struct svc_rqst *rqstp,
 /*
  * Decode, process and encode a COMPOUND
  */
-static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp)
+static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp)
 {
 	struct cb_compound_hdr_arg hdr_arg;
 	struct cb_compound_hdr_res hdr_res;
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 9187755661df..8d48616882c1 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -21,7 +21,7 @@
 /*
  * NULL call.
  */
-static int
+static __be32
 nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 {
 	return nfs_ok;
@@ -30,7 +30,7 @@ nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 /*
  * Get the Access and/or Default ACL of a file.
  */
-static int nfsacld_proc_getacl(struct svc_rqst * rqstp,
+static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
 		struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp)
 {
 	svc_fh *fh;
@@ -97,7 +97,7 @@ fail:
 /*
  * Set the Access and/or Default ACL of a file.
  */
-static int nfsacld_proc_setacl(struct svc_rqst * rqstp,
+static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
 		struct nfsd3_setaclargs *argp,
 		struct nfsd_attrstat *resp)
 {
@@ -128,7 +128,7 @@ static int nfsacld_proc_setacl(struct svc_rqst * rqstp,
 /*
  * Check file attributes
  */
-static int nfsacld_proc_getattr(struct svc_rqst * rqstp,
+static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp,
 		struct nfsd_fhandle *argp, struct nfsd_attrstat *resp)
 {
 	dprintk("nfsd: GETATTR  %s\n", SVCFH_fmt(&argp->fh));
@@ -140,7 +140,7 @@ static int nfsacld_proc_getattr(struct svc_rqst * rqstp,
 /*
  * Check file access
  */
-static int nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
+static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
 		struct nfsd3_accessres *resp)
 {
 	int nfserr;
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index d4bdc00c1169..ed6e2c27b5e8 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -19,7 +19,7 @@
 /*
  * NULL call.
  */
-static int
+static __be32
 nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 {
 	return nfs_ok;
@@ -28,7 +28,7 @@ nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 /*
  * Get the Access and/or Default ACL of a file.
  */
-static int nfsd3_proc_getacl(struct svc_rqst * rqstp,
+static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
 		struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp)
 {
 	svc_fh *fh;
@@ -93,7 +93,7 @@ fail:
 /*
  * Set the Access and/or Default ACL of a file.
  */
-static int nfsd3_proc_setacl(struct svc_rqst * rqstp,
+static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
 		struct nfsd3_setaclargs *argp,
 		struct nfsd3_attrstat *resp)
 {
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index a5ebc7dbb384..a12663fdfe16 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -43,7 +43,7 @@ static int	nfs3_ftypes[] = {
 /*
  * NULL call.
  */
-static int
+static __be32
 nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 {
 	return nfs_ok;
@@ -52,7 +52,7 @@ nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 /*
  * Get a file's attributes
  */
-static int
+static __be32
 nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
 					   struct nfsd3_attrstat *resp)
 {
@@ -76,7 +76,7 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
 /*
  * Set a file's attributes
  */
-static int
+static __be32
 nfsd3_proc_setattr(struct svc_rqst *rqstp, struct nfsd3_sattrargs *argp,
 					   struct nfsd3_attrstat  *resp)
 {
@@ -94,7 +94,7 @@ nfsd3_proc_setattr(struct svc_rqst *rqstp, struct nfsd3_sattrargs *argp,
 /*
  * Look up a path name component
  */
-static int
+static __be32
 nfsd3_proc_lookup(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
 					  struct nfsd3_diropres  *resp)
 {
@@ -118,7 +118,7 @@ nfsd3_proc_lookup(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
 /*
  * Check file access
  */
-static int
+static __be32
 nfsd3_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
 					  struct nfsd3_accessres *resp)
 {
@@ -137,7 +137,7 @@ nfsd3_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
 /*
  * Read a symlink.
  */
-static int
+static __be32
 nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd3_readlinkargs *argp,
 					   struct nfsd3_readlinkres *resp)
 {
@@ -155,7 +155,7 @@ nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd3_readlinkargs *argp,
 /*
  * Read a portion of a file.
  */
-static int
+static __be32
 nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
 				        struct nfsd3_readres  *resp)
 {
@@ -195,7 +195,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
 /*
  * Write data to a file
  */
-static int
+static __be32
 nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
 					 struct nfsd3_writeres  *resp)
 {
@@ -223,7 +223,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
  * At least in theory; we'll see how it fares in practice when the
  * first reports about SunOS compatibility problems start to pour in...
  */
-static int
+static __be32
 nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
 					  struct nfsd3_diropres   *resp)
 {
@@ -265,7 +265,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
 /*
  * Make directory. This operation is not idempotent.
  */
-static int
+static __be32
 nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
 					 struct nfsd3_diropres   *resp)
 {
@@ -285,7 +285,7 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
 	RETURN_STATUS(nfserr);
 }
 
-static int
+static __be32
 nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp,
 					   struct nfsd3_diropres    *resp)
 {
@@ -307,7 +307,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp,
 /*
  * Make socket/fifo/device.
  */
-static int
+static __be32
 nfsd3_proc_mknod(struct svc_rqst *rqstp, struct nfsd3_mknodargs *argp,
 					 struct nfsd3_diropres  *resp)
 {
@@ -343,7 +343,7 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp, struct nfsd3_mknodargs *argp,
 /*
  * Remove file/fifo/socket etc.
  */
-static int
+static __be32
 nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
 					  struct nfsd3_attrstat  *resp)
 {
@@ -363,7 +363,7 @@ nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
 /*
  * Remove a directory
  */
-static int
+static __be32
 nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
 					 struct nfsd3_attrstat  *resp)
 {
@@ -379,7 +379,7 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
 	RETURN_STATUS(nfserr);
 }
 
-static int
+static __be32
 nfsd3_proc_rename(struct svc_rqst *rqstp, struct nfsd3_renameargs *argp,
 					  struct nfsd3_renameres  *resp)
 {
@@ -401,7 +401,7 @@ nfsd3_proc_rename(struct svc_rqst *rqstp, struct nfsd3_renameargs *argp,
 	RETURN_STATUS(nfserr);
 }
 
-static int
+static __be32
 nfsd3_proc_link(struct svc_rqst *rqstp, struct nfsd3_linkargs *argp,
 					struct nfsd3_linkres  *resp)
 {
@@ -424,7 +424,7 @@ nfsd3_proc_link(struct svc_rqst *rqstp, struct nfsd3_linkargs *argp,
 /*
  * Read a portion of a directory.
  */
-static int
+static __be32
 nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
 					   struct nfsd3_readdirres  *resp)
 {
@@ -459,7 +459,7 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
  * Read a portion of a directory, including file handles and attrs.
  * For now, we choose to ignore the dircount parameter.
  */
-static int
+static __be32
 nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
 					       struct nfsd3_readdirres  *resp)
 {
@@ -517,7 +517,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
 /*
  * Get file system stats
  */
-static int
+static __be32
 nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
 					   struct nfsd3_fsstatres *resp)
 {
@@ -534,7 +534,7 @@ nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
 /*
  * Get file system info
  */
-static int
+static __be32
 nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
 					   struct nfsd3_fsinfores *resp)
 {
@@ -576,7 +576,7 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
 /*
  * Get pathconf info for the specified file
  */
-static int
+static __be32
 nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle      *argp,
 					     struct nfsd3_pathconfres *resp)
 {
@@ -619,7 +619,7 @@ nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle      *argp,
 /*
  * Commit a file (range) to stable storage.
  */
-static int
+static __be32
 nfsd3_proc_commit(struct svc_rqst * rqstp, struct nfsd3_commitargs *argp,
 					   struct nfsd3_commitres  *resp)
 {
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index d1fac6872c44..795ad6c5cb2c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -715,7 +715,7 @@ out_kfree:
 /*
  * NULL call.
  */
-static int
+static __be32
 nfsd4_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 {
 	return nfs_ok;
@@ -731,7 +731,7 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
 /*
  * COMPOUND call.
  */
-static int
+static __be32
 nfsd4_proc_compound(struct svc_rqst *rqstp,
 		    struct nfsd4_compoundargs *args,
 		    struct nfsd4_compoundres *resp)
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 9ee1dab5d44a..09030afd7249 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -30,7 +30,7 @@ typedef struct svc_buf	svc_buf;
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
 
 
-static int
+static __be32
 nfsd_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 {
 	return nfs_ok;
@@ -56,7 +56,7 @@ nfsd_return_dirop(int err, struct nfsd_diropres *resp)
  * Get a file's attributes
  * N.B. After this call resp->fh needs an fh_put
  */
-static int
+static __be32
 nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
 					  struct nfsd_attrstat *resp)
 {
@@ -72,7 +72,7 @@ nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
  * Set a file's attributes
  * N.B. After this call resp->fh needs an fh_put
  */
-static int
+static __be32
 nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
 					  struct nfsd_attrstat  *resp)
 {
@@ -92,7 +92,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
  * doesn't exist yet.
  * N.B. After this call resp->fh needs an fh_put
  */
-static int
+static __be32
 nfsd_proc_lookup(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
 					 struct nfsd_diropres  *resp)
 {
@@ -112,7 +112,7 @@ nfsd_proc_lookup(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
 /*
  * Read a symlink.
  */
-static int
+static __be32
 nfsd_proc_readlink(struct svc_rqst *rqstp, struct nfsd_readlinkargs *argp,
 					   struct nfsd_readlinkres *resp)
 {
@@ -132,7 +132,7 @@ nfsd_proc_readlink(struct svc_rqst *rqstp, struct nfsd_readlinkargs *argp,
  * Read a portion of a file.
  * N.B. After this call resp->fh needs an fh_put
  */
-static int
+static __be32
 nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
 				       struct nfsd_readres  *resp)
 {
@@ -172,7 +172,7 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
  * Write data to a file
  * N.B. After this call resp->fh needs an fh_put
  */
-static int
+static __be32
 nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
 					struct nfsd_attrstat  *resp)
 {
@@ -197,7 +197,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
  * and the actual create() call in compliance with VFS protocols.
  * N.B. After this call _both_ argp->fh and resp->fh need an fh_put
  */
-static int
+static __be32
 nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
 					 struct nfsd_diropres   *resp)
 {
@@ -348,7 +348,7 @@ done:
 	return nfsd_return_dirop(nfserr, resp);
 }
 
-static int
+static __be32
 nfsd_proc_remove(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
 					 void		       *resp)
 {
@@ -363,7 +363,7 @@ nfsd_proc_remove(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
 	return nfserr;
 }
 
-static int
+static __be32
 nfsd_proc_rename(struct svc_rqst *rqstp, struct nfsd_renameargs *argp,
 				  	 void		        *resp)
 {
@@ -381,7 +381,7 @@ nfsd_proc_rename(struct svc_rqst *rqstp, struct nfsd_renameargs *argp,
 	return nfserr;
 }
 
-static int
+static __be32
 nfsd_proc_link(struct svc_rqst *rqstp, struct nfsd_linkargs *argp,
 				void			    *resp)
 {
@@ -401,7 +401,7 @@ nfsd_proc_link(struct svc_rqst *rqstp, struct nfsd_linkargs *argp,
 	return nfserr;
 }
 
-static int
+static __be32
 nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp,
 				          void			  *resp)
 {
@@ -430,7 +430,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp,
  * Make directory. This operation is not idempotent.
  * N.B. After this call resp->fh needs an fh_put
  */
-static int
+static __be32
 nfsd_proc_mkdir(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
 					struct nfsd_diropres   *resp)
 {
@@ -454,7 +454,7 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
 /*
  * Remove a directory
  */
-static int
+static __be32
 nfsd_proc_rmdir(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
 				 	void		      *resp)
 {
@@ -470,7 +470,7 @@ nfsd_proc_rmdir(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
 /*
  * Read a portion of a directory.
  */
-static int
+static __be32
 nfsd_proc_readdir(struct svc_rqst *rqstp, struct nfsd_readdirargs *argp,
 					  struct nfsd_readdirres  *resp)
 {
@@ -509,7 +509,7 @@ nfsd_proc_readdir(struct svc_rqst *rqstp, struct nfsd_readdirargs *argp,
 /*
  * Get file system info
  */
-static int
+static __be32
 nfsd_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle   *argp,
 					  struct nfsd_statfsres *resp)
 {
-- 
cgit v1.2.3


From 52921e02a4f4163a7b1f4b5dde71e1debc71de4a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:46 -0700
Subject: [PATCH] lockd endianness annotations

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/lockd/clntlock.c |  4 +--
 fs/lockd/mon.c      | 12 ++++----
 fs/lockd/svc4proc.c |  4 +--
 fs/lockd/svclock.c  | 10 +++----
 fs/lockd/svcproc.c  |  8 +++---
 fs/lockd/svcshare.c |  4 +--
 fs/lockd/svcsubs.c  |  4 +--
 fs/lockd/xdr.c      | 76 +++++++++++++++++++++++++-------------------------
 fs/lockd/xdr4.c     | 80 ++++++++++++++++++++++++++---------------------------
 9 files changed, 101 insertions(+), 101 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index e8c7765419e8..b85a0ad2cfb6 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -100,12 +100,12 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
 /*
  * The server lockd has called us back to tell us the lock was granted
  */
-u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
+__be32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
 {
 	const struct file_lock *fl = &lock->fl;
 	const struct nfs_fh *fh = &lock->fh;
 	struct nlm_wait	*block;
-	u32 res = nlm_lck_denied;
+	__be32 res = nlm_lck_denied;
 
 	/*
 	 * Look up blocked request based on arguments. 
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index e0179f8c327f..eb243edf8932 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -148,8 +148,8 @@ nsm_create(void)
  * XDR functions for NSM.
  */
 
-static u32 *
-xdr_encode_common(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
+static __be32 *
+xdr_encode_common(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
 {
 	char	buffer[20], *name;
 
@@ -176,7 +176,7 @@ xdr_encode_common(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
 }
 
 static int
-xdr_encode_mon(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
+xdr_encode_mon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
 {
 	p = xdr_encode_common(rqstp, p, argp);
 	if (IS_ERR(p))
@@ -192,7 +192,7 @@ xdr_encode_mon(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
 }
 
 static int
-xdr_encode_unmon(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
+xdr_encode_unmon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
 {
 	p = xdr_encode_common(rqstp, p, argp);
 	if (IS_ERR(p))
@@ -202,7 +202,7 @@ xdr_encode_unmon(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
 }
 
 static int
-xdr_decode_stat_res(struct rpc_rqst *rqstp, u32 *p, struct nsm_res *resp)
+xdr_decode_stat_res(struct rpc_rqst *rqstp, __be32 *p, struct nsm_res *resp)
 {
 	resp->status = ntohl(*p++);
 	resp->state = ntohl(*p++);
@@ -212,7 +212,7 @@ xdr_decode_stat_res(struct rpc_rqst *rqstp, u32 *p, struct nsm_res *resp)
 }
 
 static int
-xdr_decode_stat(struct rpc_rqst *rqstp, u32 *p, struct nsm_res *resp)
+xdr_decode_stat(struct rpc_rqst *rqstp, __be32 *p, struct nsm_res *resp)
 {
 	resp->state = ntohl(*p++);
 	return 0;
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 4e719860b4bf..0ce5c81ff507 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -24,14 +24,14 @@
 /*
  * Obtain client and file from arguments
  */
-static u32
+static __be32
 nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
 			struct nlm_host **hostp, struct nlm_file **filp)
 {
 	struct nlm_host		*host = NULL;
 	struct nlm_file		*file = NULL;
 	struct nlm_lock		*lock = &argp->lock;
-	u32			error = 0;
+	__be32			error = 0;
 
 	/* nfsd callbacks must have been installed for this procedure */
 	if (!nlmsvc_ops)
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 814c6064c9e0..7e219b938552 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -334,13 +334,13 @@ static void nlmsvc_freegrantargs(struct nlm_rqst *call)
  * Attempt to establish a lock, and if it can't be granted, block it
  * if required.
  */
-u32
+__be32
 nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 			struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
 {
 	struct nlm_block	*block, *newblock = NULL;
 	int			error;
-	u32			ret;
+	__be32			ret;
 
 	dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
 				file->f_file->f_dentry->d_inode->i_sb->s_id,
@@ -415,7 +415,7 @@ out:
 /*
  * Test for presence of a conflicting lock.
  */
-u32
+__be32
 nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
 				       struct nlm_lock *conflock)
 {
@@ -448,7 +448,7 @@ nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
  * afterwards. In this case the block will still be there, and hence
  * must be removed.
  */
-u32
+__be32
 nlmsvc_unlock(struct nlm_file *file, struct nlm_lock *lock)
 {
 	int	error;
@@ -476,7 +476,7 @@ nlmsvc_unlock(struct nlm_file *file, struct nlm_lock *lock)
  * be in progress.
  * The calling procedure must check whether the file can be closed.
  */
-u32
+__be32
 nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
 {
 	struct nlm_block	*block;
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index db8d85c32d29..32e99a6e8dca 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -22,8 +22,8 @@
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 
 #ifdef CONFIG_LOCKD_V4
-static u32
-cast_to_nlm(u32 status, u32 vers)
+static __be32
+cast_to_nlm(__be32 status, u32 vers)
 {
 	/* Note: status is assumed to be in network byte order !!! */
 	if (vers != 4){
@@ -52,14 +52,14 @@ cast_to_nlm(u32 status, u32 vers)
 /*
  * Obtain client and file from arguments
  */
-static u32
+static __be32
 nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
 			struct nlm_host **hostp, struct nlm_file **filp)
 {
 	struct nlm_host		*host = NULL;
 	struct nlm_file		*file = NULL;
 	struct nlm_lock		*lock = &argp->lock;
-	u32			error = 0;
+	__be32			error = 0;
 
 	/* nfsd callbacks must have been installed for this procedure */
 	if (!nlmsvc_ops)
diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c
index b9926ce8782e..6220dc2a3f2c 100644
--- a/fs/lockd/svcshare.c
+++ b/fs/lockd/svcshare.c
@@ -23,7 +23,7 @@ nlm_cmp_owner(struct nlm_share *share, struct xdr_netobj *oh)
 	    && !memcmp(share->s_owner.data, oh->data, oh->len);
 }
 
-u32
+__be32
 nlmsvc_share_file(struct nlm_host *host, struct nlm_file *file,
 			struct nlm_args *argp)
 {
@@ -64,7 +64,7 @@ update:
 /*
  * Delete a share.
  */
-u32
+__be32
 nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file,
 			struct nlm_args *argp)
 {
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 7dac96e6c82c..e83024e16042 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -78,14 +78,14 @@ static inline unsigned int file_hash(struct nfs_fh *f)
  * This is not quite right, but for now, we assume the client performs
  * the proper R/W checking.
  */
-u32
+__be32
 nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
 					struct nfs_fh *f)
 {
 	struct hlist_node *pos;
 	struct nlm_file	*file;
 	unsigned int	hash;
-	u32		nfserr;
+	__be32		nfserr;
 
 	nlm_debug_print_fh("nlm_file_lookup", f);
 
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 61c46facf257..b7c949256e5a 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -43,7 +43,7 @@ loff_t_to_s32(loff_t offset)
 /*
  * XDR functions for basic NLM types
  */
-static u32 *nlm_decode_cookie(u32 *p, struct nlm_cookie *c)
+static __be32 *nlm_decode_cookie(__be32 *p, struct nlm_cookie *c)
 {
 	unsigned int	len;
 
@@ -69,8 +69,8 @@ static u32 *nlm_decode_cookie(u32 *p, struct nlm_cookie *c)
 	return p;
 }
 
-static inline u32 *
-nlm_encode_cookie(u32 *p, struct nlm_cookie *c)
+static inline __be32 *
+nlm_encode_cookie(__be32 *p, struct nlm_cookie *c)
 {
 	*p++ = htonl(c->len);
 	memcpy(p, c->data, c->len);
@@ -78,8 +78,8 @@ nlm_encode_cookie(u32 *p, struct nlm_cookie *c)
 	return p;
 }
 
-static u32 *
-nlm_decode_fh(u32 *p, struct nfs_fh *f)
+static __be32 *
+nlm_decode_fh(__be32 *p, struct nfs_fh *f)
 {
 	unsigned int	len;
 
@@ -95,8 +95,8 @@ nlm_decode_fh(u32 *p, struct nfs_fh *f)
 	return p + XDR_QUADLEN(NFS2_FHSIZE);
 }
 
-static inline u32 *
-nlm_encode_fh(u32 *p, struct nfs_fh *f)
+static inline __be32 *
+nlm_encode_fh(__be32 *p, struct nfs_fh *f)
 {
 	*p++ = htonl(NFS2_FHSIZE);
 	memcpy(p, f->data, NFS2_FHSIZE);
@@ -106,20 +106,20 @@ nlm_encode_fh(u32 *p, struct nfs_fh *f)
 /*
  * Encode and decode owner handle
  */
-static inline u32 *
-nlm_decode_oh(u32 *p, struct xdr_netobj *oh)
+static inline __be32 *
+nlm_decode_oh(__be32 *p, struct xdr_netobj *oh)
 {
 	return xdr_decode_netobj(p, oh);
 }
 
-static inline u32 *
-nlm_encode_oh(u32 *p, struct xdr_netobj *oh)
+static inline __be32 *
+nlm_encode_oh(__be32 *p, struct xdr_netobj *oh)
 {
 	return xdr_encode_netobj(p, oh);
 }
 
-static u32 *
-nlm_decode_lock(u32 *p, struct nlm_lock *lock)
+static __be32 *
+nlm_decode_lock(__be32 *p, struct nlm_lock *lock)
 {
 	struct file_lock	*fl = &lock->fl;
 	s32			start, len, end;
@@ -153,8 +153,8 @@ nlm_decode_lock(u32 *p, struct nlm_lock *lock)
 /*
  * Encode a lock as part of an NLM call
  */
-static u32 *
-nlm_encode_lock(u32 *p, struct nlm_lock *lock)
+static __be32 *
+nlm_encode_lock(__be32 *p, struct nlm_lock *lock)
 {
 	struct file_lock	*fl = &lock->fl;
 	__s32			start, len;
@@ -184,8 +184,8 @@ nlm_encode_lock(u32 *p, struct nlm_lock *lock)
 /*
  * Encode result of a TEST/TEST_MSG call
  */
-static u32 *
-nlm_encode_testres(u32 *p, struct nlm_res *resp)
+static __be32 *
+nlm_encode_testres(__be32 *p, struct nlm_res *resp)
 {
 	s32		start, len;
 
@@ -221,7 +221,7 @@ nlm_encode_testres(u32 *p, struct nlm_res *resp)
  * First, the server side XDR functions
  */
 int
-nlmsvc_decode_testargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 {
 	u32	exclusive;
 
@@ -238,7 +238,7 @@ nlmsvc_decode_testargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_encode_testres(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
+nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm_encode_testres(p, resp)))
 		return 0;
@@ -246,7 +246,7 @@ nlmsvc_encode_testres(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_decode_lockargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 {
 	u32	exclusive;
 
@@ -266,7 +266,7 @@ nlmsvc_decode_lockargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_decode_cancargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 {
 	u32	exclusive;
 
@@ -282,7 +282,7 @@ nlmsvc_decode_cancargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 {
 	if (!(p = nlm_decode_cookie(p, &argp->cookie))
 	 || !(p = nlm_decode_lock(p, &argp->lock)))
@@ -292,7 +292,7 @@ nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 {
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -313,7 +313,7 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_encode_shareres(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
+nlmsvc_encode_shareres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm_encode_cookie(p, &resp->cookie)))
 		return 0;
@@ -323,7 +323,7 @@ nlmsvc_encode_shareres(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_encode_res(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
+nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm_encode_cookie(p, &resp->cookie)))
 		return 0;
@@ -332,7 +332,7 @@ nlmsvc_encode_res(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_decode_notify(struct svc_rqst *rqstp, u32 *p, struct nlm_args *argp)
+nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p, struct nlm_args *argp)
 {
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -344,7 +344,7 @@ nlmsvc_decode_notify(struct svc_rqst *rqstp, u32 *p, struct nlm_args *argp)
 }
 
 int
-nlmsvc_decode_reboot(struct svc_rqst *rqstp, u32 *p, struct nlm_reboot *argp)
+nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp)
 {
 	if (!(p = xdr_decode_string_inplace(p, &argp->mon, &argp->len, SM_MAXSTRLEN)))
 		return 0;
@@ -357,7 +357,7 @@ nlmsvc_decode_reboot(struct svc_rqst *rqstp, u32 *p, struct nlm_reboot *argp)
 }
 
 int
-nlmsvc_decode_res(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
+nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm_decode_cookie(p, &resp->cookie)))
 		return 0;
@@ -366,13 +366,13 @@ nlmsvc_decode_res(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_decode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+nlmsvc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
 {
 	return xdr_argsize_check(rqstp, p);
 }
 
 int
-nlmsvc_encode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
 {
 	return xdr_ressize_check(rqstp, p);
 }
@@ -389,7 +389,7 @@ nlmclt_decode_void(struct rpc_rqst *req, u32 *p, void *ptr)
 #endif
 
 static int
-nlmclt_encode_testargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
+nlmclt_encode_testargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
 {
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -403,7 +403,7 @@ nlmclt_encode_testargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
 }
 
 static int
-nlmclt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
+nlmclt_decode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm_decode_cookie(p, &resp->cookie)))
 		return -EIO;
@@ -438,7 +438,7 @@ nlmclt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
 
 
 static int
-nlmclt_encode_lockargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
+nlmclt_encode_lockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
 {
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -455,7 +455,7 @@ nlmclt_encode_lockargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
 }
 
 static int
-nlmclt_encode_cancargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
+nlmclt_encode_cancargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
 {
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -470,7 +470,7 @@ nlmclt_encode_cancargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
 }
 
 static int
-nlmclt_encode_unlockargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
+nlmclt_encode_unlockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
 {
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -483,7 +483,7 @@ nlmclt_encode_unlockargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
 }
 
 static int
-nlmclt_encode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
+nlmclt_encode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm_encode_cookie(p, &resp->cookie)))
 		return -EIO;
@@ -493,7 +493,7 @@ nlmclt_encode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
 }
 
 static int
-nlmclt_encode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
+nlmclt_encode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm_encode_testres(p, resp)))
 		return -EIO;
@@ -502,7 +502,7 @@ nlmclt_encode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
 }
 
 static int
-nlmclt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
+nlmclt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm_decode_cookie(p, &resp->cookie)))
 		return -EIO;
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 36eb175ec335..f4c0b2b9f75a 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -44,8 +44,8 @@ loff_t_to_s64(loff_t offset)
 /*
  * XDR functions for basic NLM types
  */
-static u32 *
-nlm4_decode_cookie(u32 *p, struct nlm_cookie *c)
+static __be32 *
+nlm4_decode_cookie(__be32 *p, struct nlm_cookie *c)
 {
 	unsigned int	len;
 
@@ -71,8 +71,8 @@ nlm4_decode_cookie(u32 *p, struct nlm_cookie *c)
 	return p;
 }
 
-static u32 *
-nlm4_encode_cookie(u32 *p, struct nlm_cookie *c)
+static __be32 *
+nlm4_encode_cookie(__be32 *p, struct nlm_cookie *c)
 {
 	*p++ = htonl(c->len);
 	memcpy(p, c->data, c->len);
@@ -80,8 +80,8 @@ nlm4_encode_cookie(u32 *p, struct nlm_cookie *c)
 	return p;
 }
 
-static u32 *
-nlm4_decode_fh(u32 *p, struct nfs_fh *f)
+static __be32 *
+nlm4_decode_fh(__be32 *p, struct nfs_fh *f)
 {
 	memset(f->data, 0, sizeof(f->data));
 	f->size = ntohl(*p++);
@@ -95,8 +95,8 @@ nlm4_decode_fh(u32 *p, struct nfs_fh *f)
 	return p + XDR_QUADLEN(f->size);
 }
 
-static u32 *
-nlm4_encode_fh(u32 *p, struct nfs_fh *f)
+static __be32 *
+nlm4_encode_fh(__be32 *p, struct nfs_fh *f)
 {
 	*p++ = htonl(f->size);
 	if (f->size) p[XDR_QUADLEN(f->size)-1] = 0; /* don't leak anything */
@@ -107,20 +107,20 @@ nlm4_encode_fh(u32 *p, struct nfs_fh *f)
 /*
  * Encode and decode owner handle
  */
-static u32 *
-nlm4_decode_oh(u32 *p, struct xdr_netobj *oh)
+static __be32 *
+nlm4_decode_oh(__be32 *p, struct xdr_netobj *oh)
 {
 	return xdr_decode_netobj(p, oh);
 }
 
-static u32 *
-nlm4_encode_oh(u32 *p, struct xdr_netobj *oh)
+static __be32 *
+nlm4_encode_oh(__be32 *p, struct xdr_netobj *oh)
 {
 	return xdr_encode_netobj(p, oh);
 }
 
-static u32 *
-nlm4_decode_lock(u32 *p, struct nlm_lock *lock)
+static __be32 *
+nlm4_decode_lock(__be32 *p, struct nlm_lock *lock)
 {
 	struct file_lock	*fl = &lock->fl;
 	__s64			len, start, end;
@@ -153,8 +153,8 @@ nlm4_decode_lock(u32 *p, struct nlm_lock *lock)
 /*
  * Encode a lock as part of an NLM call
  */
-static u32 *
-nlm4_encode_lock(u32 *p, struct nlm_lock *lock)
+static __be32 *
+nlm4_encode_lock(__be32 *p, struct nlm_lock *lock)
 {
 	struct file_lock	*fl = &lock->fl;
 	__s64			start, len;
@@ -185,8 +185,8 @@ nlm4_encode_lock(u32 *p, struct nlm_lock *lock)
 /*
  * Encode result of a TEST/TEST_MSG call
  */
-static u32 *
-nlm4_encode_testres(u32 *p, struct nlm_res *resp)
+static __be32 *
+nlm4_encode_testres(__be32 *p, struct nlm_res *resp)
 {
 	s64		start, len;
 
@@ -227,7 +227,7 @@ nlm4_encode_testres(u32 *p, struct nlm_res *resp)
  * First, the server side XDR functions
  */
 int
-nlm4svc_decode_testargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 {
 	u32	exclusive;
 
@@ -244,7 +244,7 @@ nlm4svc_decode_testargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_encode_testres(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
+nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm4_encode_testres(p, resp)))
 		return 0;
@@ -252,7 +252,7 @@ nlm4svc_encode_testres(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_decode_lockargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 {
 	u32	exclusive;
 
@@ -272,7 +272,7 @@ nlm4svc_decode_lockargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_decode_cancargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 {
 	u32	exclusive;
 
@@ -288,7 +288,7 @@ nlm4svc_decode_cancargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 {
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie))
 	 || !(p = nlm4_decode_lock(p, &argp->lock)))
@@ -298,7 +298,7 @@ nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 {
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -319,7 +319,7 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_encode_shareres(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
+nlm4svc_encode_shareres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
 		return 0;
@@ -329,7 +329,7 @@ nlm4svc_encode_shareres(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_encode_res(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
+nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
 		return 0;
@@ -338,7 +338,7 @@ nlm4svc_encode_res(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_decode_notify(struct svc_rqst *rqstp, u32 *p, struct nlm_args *argp)
+nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p, struct nlm_args *argp)
 {
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -350,7 +350,7 @@ nlm4svc_decode_notify(struct svc_rqst *rqstp, u32 *p, struct nlm_args *argp)
 }
 
 int
-nlm4svc_decode_reboot(struct svc_rqst *rqstp, u32 *p, struct nlm_reboot *argp)
+nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp)
 {
 	if (!(p = xdr_decode_string_inplace(p, &argp->mon, &argp->len, SM_MAXSTRLEN)))
 		return 0;
@@ -363,7 +363,7 @@ nlm4svc_decode_reboot(struct svc_rqst *rqstp, u32 *p, struct nlm_reboot *argp)
 }
 
 int
-nlm4svc_decode_res(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
+nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
 		return 0;
@@ -372,13 +372,13 @@ nlm4svc_decode_res(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_decode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+nlm4svc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
 {
 	return xdr_argsize_check(rqstp, p);
 }
 
 int
-nlm4svc_encode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+nlm4svc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
 {
 	return xdr_ressize_check(rqstp, p);
 }
@@ -388,14 +388,14 @@ nlm4svc_encode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
  */
 #ifdef NLMCLNT_SUPPORT_SHARES
 static int
-nlm4clt_decode_void(struct rpc_rqst *req, u32 *p, void *ptr)
+nlm4clt_decode_void(struct rpc_rqst *req, __be32 *p, void *ptr)
 {
 	return 0;
 }
 #endif
 
 static int
-nlm4clt_encode_testargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
+nlm4clt_encode_testargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
 {
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -409,7 +409,7 @@ nlm4clt_encode_testargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
 }
 
 static int
-nlm4clt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
+nlm4clt_decode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
 		return -EIO;
@@ -444,7 +444,7 @@ nlm4clt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
 
 
 static int
-nlm4clt_encode_lockargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
+nlm4clt_encode_lockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
 {
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -461,7 +461,7 @@ nlm4clt_encode_lockargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
 }
 
 static int
-nlm4clt_encode_cancargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
+nlm4clt_encode_cancargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
 {
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -476,7 +476,7 @@ nlm4clt_encode_cancargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
 }
 
 static int
-nlm4clt_encode_unlockargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
+nlm4clt_encode_unlockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
 {
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -489,7 +489,7 @@ nlm4clt_encode_unlockargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
 }
 
 static int
-nlm4clt_encode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
+nlm4clt_encode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
 		return -EIO;
@@ -499,7 +499,7 @@ nlm4clt_encode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
 }
 
 static int
-nlm4clt_encode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
+nlm4clt_encode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm4_encode_testres(p, resp)))
 		return -EIO;
@@ -508,7 +508,7 @@ nlm4clt_encode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
 }
 
 static int
-nlm4clt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
+nlm4clt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
 		return -EIO;
-- 
cgit v1.2.3


From 9d787a75a00679c3ebcb88236a7af7b38a0b5932 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:47 -0700
Subject: [PATCH] xdr annotations: NFSv2

on-the-wire data is big-endian

[in large part pulled from Alexey's patch]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/nfs2xdr.c | 74 ++++++++++++++++++++++++++++----------------------------
 1 file changed, 37 insertions(+), 37 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index b49501fc0a79..1d801e30c40e 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -66,15 +66,15 @@
 /*
  * Common NFS XDR functions as inlines
  */
-static inline u32 *
-xdr_encode_fhandle(u32 *p, struct nfs_fh *fhandle)
+static inline __be32 *
+xdr_encode_fhandle(__be32 *p, struct nfs_fh *fhandle)
 {
 	memcpy(p, fhandle->data, NFS2_FHSIZE);
 	return p + XDR_QUADLEN(NFS2_FHSIZE);
 }
 
-static inline u32 *
-xdr_decode_fhandle(u32 *p, struct nfs_fh *fhandle)
+static inline __be32 *
+xdr_decode_fhandle(__be32 *p, struct nfs_fh *fhandle)
 {
 	/* NFSv2 handles have a fixed length */
 	fhandle->size = NFS2_FHSIZE;
@@ -82,8 +82,8 @@ xdr_decode_fhandle(u32 *p, struct nfs_fh *fhandle)
 	return p + XDR_QUADLEN(NFS2_FHSIZE);
 }
 
-static inline u32*
-xdr_encode_time(u32 *p, struct timespec *timep)
+static inline __be32*
+xdr_encode_time(__be32 *p, struct timespec *timep)
 {
 	*p++ = htonl(timep->tv_sec);
 	/* Convert nanoseconds into microseconds */
@@ -91,8 +91,8 @@ xdr_encode_time(u32 *p, struct timespec *timep)
 	return p;
 }
 
-static inline u32*
-xdr_encode_current_server_time(u32 *p, struct timespec *timep)
+static inline __be32*
+xdr_encode_current_server_time(__be32 *p, struct timespec *timep)
 {
 	/*
 	 * Passing the invalid value useconds=1000000 is a
@@ -108,8 +108,8 @@ xdr_encode_current_server_time(u32 *p, struct timespec *timep)
 	return p;
 }
 
-static inline u32*
-xdr_decode_time(u32 *p, struct timespec *timep)
+static inline __be32*
+xdr_decode_time(__be32 *p, struct timespec *timep)
 {
 	timep->tv_sec = ntohl(*p++);
 	/* Convert microseconds into nanoseconds */
@@ -117,8 +117,8 @@ xdr_decode_time(u32 *p, struct timespec *timep)
 	return p;
 }
 
-static u32 *
-xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr)
+static __be32 *
+xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
 {
 	u32 rdev;
 	fattr->type = (enum nfs_ftype) ntohl(*p++);
@@ -146,10 +146,10 @@ xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr)
 	return p;
 }
 
-static inline u32 *
-xdr_encode_sattr(u32 *p, struct iattr *attr)
+static inline __be32 *
+xdr_encode_sattr(__be32 *p, struct iattr *attr)
 {
-	const u32 not_set = __constant_htonl(0xFFFFFFFF);
+	const __be32 not_set = __constant_htonl(0xFFFFFFFF);
 
 	*p++ = (attr->ia_valid & ATTR_MODE) ? htonl(attr->ia_mode) : not_set;
 	*p++ = (attr->ia_valid & ATTR_UID) ? htonl(attr->ia_uid) : not_set;
@@ -184,7 +184,7 @@ xdr_encode_sattr(u32 *p, struct iattr *attr)
  * GETATTR, READLINK, STATFS
  */
 static int
-nfs_xdr_fhandle(struct rpc_rqst *req, u32 *p, struct nfs_fh *fh)
+nfs_xdr_fhandle(struct rpc_rqst *req, __be32 *p, struct nfs_fh *fh)
 {
 	p = xdr_encode_fhandle(p, fh);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
@@ -195,7 +195,7 @@ nfs_xdr_fhandle(struct rpc_rqst *req, u32 *p, struct nfs_fh *fh)
  * Encode SETATTR arguments
  */
 static int
-nfs_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs_sattrargs *args)
+nfs_xdr_sattrargs(struct rpc_rqst *req, __be32 *p, struct nfs_sattrargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fh);
 	p = xdr_encode_sattr(p, args->sattr);
@@ -208,7 +208,7 @@ nfs_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs_sattrargs *args)
  * LOOKUP, REMOVE, RMDIR
  */
 static int
-nfs_xdr_diropargs(struct rpc_rqst *req, u32 *p, struct nfs_diropargs *args)
+nfs_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs_diropargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fh);
 	p = xdr_encode_array(p, args->name, args->len);
@@ -222,7 +222,7 @@ nfs_xdr_diropargs(struct rpc_rqst *req, u32 *p, struct nfs_diropargs *args)
  * exactly to the page we want to fetch.
  */
 static int
-nfs_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args)
+nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
 {
 	struct rpc_auth	*auth = req->rq_task->tk_auth;
 	unsigned int replen;
@@ -246,7 +246,7 @@ nfs_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args)
  * Decode READ reply
  */
 static int
-nfs_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res)
+nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
 {
 	struct kvec *iov = req->rq_rcv_buf.head;
 	int	status, count, recvd, hdrlen;
@@ -286,7 +286,7 @@ nfs_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res)
  * Write arguments. Splice the buffer to be written into the iovec.
  */
 static int
-nfs_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
+nfs_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
 {
 	struct xdr_buf *sndbuf = &req->rq_snd_buf;
 	u32 offset = (u32)args->offset;
@@ -309,7 +309,7 @@ nfs_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
  * CREATE, MKDIR
  */
 static int
-nfs_xdr_createargs(struct rpc_rqst *req, u32 *p, struct nfs_createargs *args)
+nfs_xdr_createargs(struct rpc_rqst *req, __be32 *p, struct nfs_createargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fh);
 	p = xdr_encode_array(p, args->name, args->len);
@@ -322,7 +322,7 @@ nfs_xdr_createargs(struct rpc_rqst *req, u32 *p, struct nfs_createargs *args)
  * Encode RENAME arguments
  */
 static int
-nfs_xdr_renameargs(struct rpc_rqst *req, u32 *p, struct nfs_renameargs *args)
+nfs_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fromfh);
 	p = xdr_encode_array(p, args->fromname, args->fromlen);
@@ -336,7 +336,7 @@ nfs_xdr_renameargs(struct rpc_rqst *req, u32 *p, struct nfs_renameargs *args)
  * Encode LINK arguments
  */
 static int
-nfs_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs_linkargs *args)
+nfs_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs_linkargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fromfh);
 	p = xdr_encode_fhandle(p, args->tofh);
@@ -349,7 +349,7 @@ nfs_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs_linkargs *args)
  * Encode SYMLINK arguments
  */
 static int
-nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args)
+nfs_xdr_symlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_symlinkargs *args)
 {
 	struct xdr_buf *sndbuf = &req->rq_snd_buf;
 	size_t pad;
@@ -378,7 +378,7 @@ nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args)
  * Encode arguments to readdir call
  */
 static int
-nfs_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs_readdirargs *args)
+nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args)
 {
 	struct rpc_task	*task = req->rq_task;
 	struct rpc_auth	*auth = task->tk_auth;
@@ -404,7 +404,7 @@ nfs_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs_readdirargs *args)
  * from nfs_readdir for each entry.
  */
 static int
-nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, void *dummy)
+nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
 {
 	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
 	struct kvec *iov = rcvbuf->head;
@@ -412,7 +412,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, void *dummy)
 	int hdrlen, recvd;
 	int status, nr;
 	unsigned int len, pglen;
-	u32 *end, *entry, *kaddr;
+	__be32 *end, *entry, *kaddr;
 
 	if ((status = ntohl(*p++)))
 		return -nfs_stat_to_errno(status);
@@ -432,8 +432,8 @@ nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, void *dummy)
 	if (pglen > recvd)
 		pglen = recvd;
 	page = rcvbuf->pages;
-	kaddr = p = (u32 *)kmap_atomic(*page, KM_USER0);
-	end = (u32 *)((char *)p + pglen);
+	kaddr = p = kmap_atomic(*page, KM_USER0);
+	end = (__be32 *)((char *)p + pglen);
 	entry = p;
 	for (nr = 0; *p++; nr++) {
 		if (p + 2 > end)
@@ -496,7 +496,7 @@ nfs_decode_dirent(u32 *p, struct nfs_entry *entry, int plus)
  * Decode simple status reply
  */
 static int
-nfs_xdr_stat(struct rpc_rqst *req, u32 *p, void *dummy)
+nfs_xdr_stat(struct rpc_rqst *req, __be32 *p, void *dummy)
 {
 	int	status;
 
@@ -510,7 +510,7 @@ nfs_xdr_stat(struct rpc_rqst *req, u32 *p, void *dummy)
  * GETATTR, SETATTR, WRITE
  */
 static int
-nfs_xdr_attrstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
+nfs_xdr_attrstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
 {
 	int	status;
 
@@ -525,7 +525,7 @@ nfs_xdr_attrstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
  * LOOKUP, CREATE, MKDIR
  */
 static int
-nfs_xdr_diropres(struct rpc_rqst *req, u32 *p, struct nfs_diropok *res)
+nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res)
 {
 	int	status;
 
@@ -540,7 +540,7 @@ nfs_xdr_diropres(struct rpc_rqst *req, u32 *p, struct nfs_diropok *res)
  * Encode READLINK args
  */
 static int
-nfs_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_readlinkargs *args)
+nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args)
 {
 	struct rpc_auth *auth = req->rq_task->tk_auth;
 	unsigned int replen;
@@ -558,7 +558,7 @@ nfs_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_readlinkargs *args
  * Decode READLINK reply
  */
 static int
-nfs_xdr_readlinkres(struct rpc_rqst *req, u32 *p, void *dummy)
+nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
 {
 	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
 	struct kvec *iov = rcvbuf->head;
@@ -601,7 +601,7 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, u32 *p, void *dummy)
  * Decode WRITE reply
  */
 static int
-nfs_xdr_writeres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
+nfs_xdr_writeres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res)
 {
 	res->verf->committed = NFS_FILE_SYNC;
 	return nfs_xdr_attrstat(req, p, res->fattr);
@@ -611,7 +611,7 @@ nfs_xdr_writeres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
  * Decode STATFS reply
  */
 static int
-nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs2_fsstat *res)
+nfs_xdr_statfsres(struct rpc_rqst *req, __be32 *p, struct nfs2_fsstat *res)
 {
 	int	status;
 
-- 
cgit v1.2.3


From d61005a6855160091dca44b718db93fe7aa9876f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:48 -0700
Subject: [PATCH] xdr annotations: NFSv3

on-the-wire data is big-endian

[in large part pulled from Alexey's patch]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/nfs3xdr.c | 114 +++++++++++++++++++++++++++----------------------------
 1 file changed, 57 insertions(+), 57 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 16556fa4effb..b4e740e4494a 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -105,14 +105,14 @@ static struct {
 /*
  * Common NFS XDR functions as inlines
  */
-static inline u32 *
-xdr_encode_fhandle(u32 *p, struct nfs_fh *fh)
+static inline __be32 *
+xdr_encode_fhandle(__be32 *p, struct nfs_fh *fh)
 {
 	return xdr_encode_array(p, fh->data, fh->size);
 }
 
-static inline u32 *
-xdr_decode_fhandle(u32 *p, struct nfs_fh *fh)
+static inline __be32 *
+xdr_decode_fhandle(__be32 *p, struct nfs_fh *fh)
 {
 	if ((fh->size = ntohl(*p++)) <= NFS3_FHSIZE) {
 		memcpy(fh->data, p, fh->size);
@@ -124,24 +124,24 @@ xdr_decode_fhandle(u32 *p, struct nfs_fh *fh)
 /*
  * Encode/decode time.
  */
-static inline u32 *
-xdr_encode_time3(u32 *p, struct timespec *timep)
+static inline __be32 *
+xdr_encode_time3(__be32 *p, struct timespec *timep)
 {
 	*p++ = htonl(timep->tv_sec);
 	*p++ = htonl(timep->tv_nsec);
 	return p;
 }
 
-static inline u32 *
-xdr_decode_time3(u32 *p, struct timespec *timep)
+static inline __be32 *
+xdr_decode_time3(__be32 *p, struct timespec *timep)
 {
 	timep->tv_sec = ntohl(*p++);
 	timep->tv_nsec = ntohl(*p++);
 	return p;
 }
 
-static u32 *
-xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr)
+static __be32 *
+xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
 {
 	unsigned int	type, major, minor;
 	int		fmode;
@@ -177,8 +177,8 @@ xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr)
 	return p;
 }
 
-static inline u32 *
-xdr_encode_sattr(u32 *p, struct iattr *attr)
+static inline __be32 *
+xdr_encode_sattr(__be32 *p, struct iattr *attr)
 {
 	if (attr->ia_valid & ATTR_MODE) {
 		*p++ = xdr_one;
@@ -223,8 +223,8 @@ xdr_encode_sattr(u32 *p, struct iattr *attr)
 	return p;
 }
 
-static inline u32 *
-xdr_decode_wcc_attr(u32 *p, struct nfs_fattr *fattr)
+static inline __be32 *
+xdr_decode_wcc_attr(__be32 *p, struct nfs_fattr *fattr)
 {
 	p = xdr_decode_hyper(p, &fattr->pre_size);
 	p = xdr_decode_time3(p, &fattr->pre_mtime);
@@ -233,16 +233,16 @@ xdr_decode_wcc_attr(u32 *p, struct nfs_fattr *fattr)
 	return p;
 }
 
-static inline u32 *
-xdr_decode_post_op_attr(u32 *p, struct nfs_fattr *fattr)
+static inline __be32 *
+xdr_decode_post_op_attr(__be32 *p, struct nfs_fattr *fattr)
 {
 	if (*p++)
 		p = xdr_decode_fattr(p, fattr);
 	return p;
 }
 
-static inline u32 *
-xdr_decode_pre_op_attr(u32 *p, struct nfs_fattr *fattr)
+static inline __be32 *
+xdr_decode_pre_op_attr(__be32 *p, struct nfs_fattr *fattr)
 {
 	if (*p++)
 		return xdr_decode_wcc_attr(p, fattr);
@@ -250,8 +250,8 @@ xdr_decode_pre_op_attr(u32 *p, struct nfs_fattr *fattr)
 }
 
 
-static inline u32 *
-xdr_decode_wcc_data(u32 *p, struct nfs_fattr *fattr)
+static inline __be32 *
+xdr_decode_wcc_data(__be32 *p, struct nfs_fattr *fattr)
 {
 	p = xdr_decode_pre_op_attr(p, fattr);
 	return xdr_decode_post_op_attr(p, fattr);
@@ -265,7 +265,7 @@ xdr_decode_wcc_data(u32 *p, struct nfs_fattr *fattr)
  * Encode file handle argument
  */
 static int
-nfs3_xdr_fhandle(struct rpc_rqst *req, u32 *p, struct nfs_fh *fh)
+nfs3_xdr_fhandle(struct rpc_rqst *req, __be32 *p, struct nfs_fh *fh)
 {
 	p = xdr_encode_fhandle(p, fh);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
@@ -276,7 +276,7 @@ nfs3_xdr_fhandle(struct rpc_rqst *req, u32 *p, struct nfs_fh *fh)
  * Encode SETATTR arguments
  */
 static int
-nfs3_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs3_sattrargs *args)
+nfs3_xdr_sattrargs(struct rpc_rqst *req, __be32 *p, struct nfs3_sattrargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fh);
 	p = xdr_encode_sattr(p, args->sattr);
@@ -291,7 +291,7 @@ nfs3_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs3_sattrargs *args)
  * Encode directory ops argument
  */
 static int
-nfs3_xdr_diropargs(struct rpc_rqst *req, u32 *p, struct nfs3_diropargs *args)
+nfs3_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs3_diropargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fh);
 	p = xdr_encode_array(p, args->name, args->len);
@@ -303,7 +303,7 @@ nfs3_xdr_diropargs(struct rpc_rqst *req, u32 *p, struct nfs3_diropargs *args)
  * Encode access() argument
  */
 static int
-nfs3_xdr_accessargs(struct rpc_rqst *req, u32 *p, struct nfs3_accessargs *args)
+nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fh);
 	*p++ = htonl(args->access);
@@ -317,7 +317,7 @@ nfs3_xdr_accessargs(struct rpc_rqst *req, u32 *p, struct nfs3_accessargs *args)
  * exactly to the page we want to fetch.
  */
 static int
-nfs3_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args)
+nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
 {
 	struct rpc_auth	*auth = req->rq_task->tk_auth;
 	unsigned int replen;
@@ -339,7 +339,7 @@ nfs3_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args)
  * Write arguments. Splice the buffer to be written into the iovec.
  */
 static int
-nfs3_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
+nfs3_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
 {
 	struct xdr_buf *sndbuf = &req->rq_snd_buf;
 	u32 count = args->count;
@@ -360,7 +360,7 @@ nfs3_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
  * Encode CREATE arguments
  */
 static int
-nfs3_xdr_createargs(struct rpc_rqst *req, u32 *p, struct nfs3_createargs *args)
+nfs3_xdr_createargs(struct rpc_rqst *req, __be32 *p, struct nfs3_createargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fh);
 	p = xdr_encode_array(p, args->name, args->len);
@@ -380,7 +380,7 @@ nfs3_xdr_createargs(struct rpc_rqst *req, u32 *p, struct nfs3_createargs *args)
  * Encode MKDIR arguments
  */
 static int
-nfs3_xdr_mkdirargs(struct rpc_rqst *req, u32 *p, struct nfs3_mkdirargs *args)
+nfs3_xdr_mkdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_mkdirargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fh);
 	p = xdr_encode_array(p, args->name, args->len);
@@ -393,7 +393,7 @@ nfs3_xdr_mkdirargs(struct rpc_rqst *req, u32 *p, struct nfs3_mkdirargs *args)
  * Encode SYMLINK arguments
  */
 static int
-nfs3_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_symlinkargs *args)
+nfs3_xdr_symlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_symlinkargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fromfh);
 	p = xdr_encode_array(p, args->fromname, args->fromlen);
@@ -410,7 +410,7 @@ nfs3_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_symlinkargs *args
  * Encode MKNOD arguments
  */
 static int
-nfs3_xdr_mknodargs(struct rpc_rqst *req, u32 *p, struct nfs3_mknodargs *args)
+nfs3_xdr_mknodargs(struct rpc_rqst *req, __be32 *p, struct nfs3_mknodargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fh);
 	p = xdr_encode_array(p, args->name, args->len);
@@ -429,7 +429,7 @@ nfs3_xdr_mknodargs(struct rpc_rqst *req, u32 *p, struct nfs3_mknodargs *args)
  * Encode RENAME arguments
  */
 static int
-nfs3_xdr_renameargs(struct rpc_rqst *req, u32 *p, struct nfs3_renameargs *args)
+nfs3_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs3_renameargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fromfh);
 	p = xdr_encode_array(p, args->fromname, args->fromlen);
@@ -443,7 +443,7 @@ nfs3_xdr_renameargs(struct rpc_rqst *req, u32 *p, struct nfs3_renameargs *args)
  * Encode LINK arguments
  */
 static int
-nfs3_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs3_linkargs *args)
+nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fromfh);
 	p = xdr_encode_fhandle(p, args->tofh);
@@ -456,7 +456,7 @@ nfs3_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs3_linkargs *args)
  * Encode arguments to readdir call
  */
 static int
-nfs3_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs3_readdirargs *args)
+nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args)
 {
 	struct rpc_auth	*auth = req->rq_task->tk_auth;
 	unsigned int replen;
@@ -485,7 +485,7 @@ nfs3_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs3_readdirargs *args
  * We just check for syntactical correctness.
  */
 static int
-nfs3_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs3_readdirres *res)
+nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res)
 {
 	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
 	struct kvec *iov = rcvbuf->head;
@@ -493,7 +493,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs3_readdirres *res)
 	int hdrlen, recvd;
 	int status, nr;
 	unsigned int len, pglen;
-	u32 *entry, *end, *kaddr;
+	__be32 *entry, *end, *kaddr;
 
 	status = ntohl(*p++);
 	/* Decode post_op_attrs */
@@ -523,8 +523,8 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs3_readdirres *res)
 	if (pglen > recvd)
 		pglen = recvd;
 	page = rcvbuf->pages;
-	kaddr = p = (u32 *)kmap_atomic(*page, KM_USER0);
-	end = (u32 *)((char *)p + pglen);
+	kaddr = p = kmap_atomic(*page, KM_USER0);
+	end = (__be32 *)((char *)p + pglen);
 	entry = p;
 	for (nr = 0; *p++; nr++) {
 		if (p + 3 > end)
@@ -626,7 +626,7 @@ nfs3_decode_dirent(u32 *p, struct nfs_entry *entry, int plus)
  * Encode COMMIT arguments
  */
 static int
-nfs3_xdr_commitargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
+nfs3_xdr_commitargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
 {
 	p = xdr_encode_fhandle(p, args->fh);
 	p = xdr_encode_hyper(p, args->offset);
@@ -640,7 +640,7 @@ nfs3_xdr_commitargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
  * Encode GETACL arguments
  */
 static int
-nfs3_xdr_getaclargs(struct rpc_rqst *req, u32 *p,
+nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p,
 		    struct nfs3_getaclargs *args)
 {
 	struct rpc_auth *auth = req->rq_task->tk_auth;
@@ -664,7 +664,7 @@ nfs3_xdr_getaclargs(struct rpc_rqst *req, u32 *p,
  * Encode SETACL arguments
  */
 static int
-nfs3_xdr_setaclargs(struct rpc_rqst *req, u32 *p,
+nfs3_xdr_setaclargs(struct rpc_rqst *req, __be32 *p,
                    struct nfs3_setaclargs *args)
 {
 	struct xdr_buf *buf = &req->rq_snd_buf;
@@ -711,7 +711,7 @@ nfs3_xdr_setaclargs(struct rpc_rqst *req, u32 *p,
  * Decode attrstat reply.
  */
 static int
-nfs3_xdr_attrstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
+nfs3_xdr_attrstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
 {
 	int	status;
 
@@ -726,7 +726,7 @@ nfs3_xdr_attrstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
  * SATTR, REMOVE, RMDIR
  */
 static int
-nfs3_xdr_wccstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
+nfs3_xdr_wccstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
 {
 	int	status;
 
@@ -740,7 +740,7 @@ nfs3_xdr_wccstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
  * Decode LOOKUP reply
  */
 static int
-nfs3_xdr_lookupres(struct rpc_rqst *req, u32 *p, struct nfs3_diropres *res)
+nfs3_xdr_lookupres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res)
 {
 	int	status;
 
@@ -759,7 +759,7 @@ nfs3_xdr_lookupres(struct rpc_rqst *req, u32 *p, struct nfs3_diropres *res)
  * Decode ACCESS reply
  */
 static int
-nfs3_xdr_accessres(struct rpc_rqst *req, u32 *p, struct nfs3_accessres *res)
+nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res)
 {
 	int	status = ntohl(*p++);
 
@@ -771,7 +771,7 @@ nfs3_xdr_accessres(struct rpc_rqst *req, u32 *p, struct nfs3_accessres *res)
 }
 
 static int
-nfs3_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_readlinkargs *args)
+nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args)
 {
 	struct rpc_auth *auth = req->rq_task->tk_auth;
 	unsigned int replen;
@@ -789,7 +789,7 @@ nfs3_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_readlinkargs *ar
  * Decode READLINK reply
  */
 static int
-nfs3_xdr_readlinkres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
+nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
 {
 	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
 	struct kvec *iov = rcvbuf->head;
@@ -837,7 +837,7 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
  * Decode READ reply
  */
 static int
-nfs3_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res)
+nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
 {
 	struct kvec *iov = req->rq_rcv_buf.head;
 	int	status, count, ocount, recvd, hdrlen;
@@ -888,7 +888,7 @@ nfs3_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res)
  * Decode WRITE response
  */
 static int
-nfs3_xdr_writeres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
+nfs3_xdr_writeres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res)
 {
 	int	status;
 
@@ -910,7 +910,7 @@ nfs3_xdr_writeres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
  * Decode a CREATE response
  */
 static int
-nfs3_xdr_createres(struct rpc_rqst *req, u32 *p, struct nfs3_diropres *res)
+nfs3_xdr_createres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res)
 {
 	int	status;
 
@@ -937,7 +937,7 @@ nfs3_xdr_createres(struct rpc_rqst *req, u32 *p, struct nfs3_diropres *res)
  * Decode RENAME reply
  */
 static int
-nfs3_xdr_renameres(struct rpc_rqst *req, u32 *p, struct nfs3_renameres *res)
+nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs3_renameres *res)
 {
 	int	status;
 
@@ -952,7 +952,7 @@ nfs3_xdr_renameres(struct rpc_rqst *req, u32 *p, struct nfs3_renameres *res)
  * Decode LINK reply
  */
 static int
-nfs3_xdr_linkres(struct rpc_rqst *req, u32 *p, struct nfs3_linkres *res)
+nfs3_xdr_linkres(struct rpc_rqst *req, __be32 *p, struct nfs3_linkres *res)
 {
 	int	status;
 
@@ -967,7 +967,7 @@ nfs3_xdr_linkres(struct rpc_rqst *req, u32 *p, struct nfs3_linkres *res)
  * Decode FSSTAT reply
  */
 static int
-nfs3_xdr_fsstatres(struct rpc_rqst *req, u32 *p, struct nfs_fsstat *res)
+nfs3_xdr_fsstatres(struct rpc_rqst *req, __be32 *p, struct nfs_fsstat *res)
 {
 	int		status;
 
@@ -992,7 +992,7 @@ nfs3_xdr_fsstatres(struct rpc_rqst *req, u32 *p, struct nfs_fsstat *res)
  * Decode FSINFO reply
  */
 static int
-nfs3_xdr_fsinfores(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
+nfs3_xdr_fsinfores(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *res)
 {
 	int		status;
 
@@ -1020,7 +1020,7 @@ nfs3_xdr_fsinfores(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
  * Decode PATHCONF reply
  */
 static int
-nfs3_xdr_pathconfres(struct rpc_rqst *req, u32 *p, struct nfs_pathconf *res)
+nfs3_xdr_pathconfres(struct rpc_rqst *req, __be32 *p, struct nfs_pathconf *res)
 {
 	int		status;
 
@@ -1040,7 +1040,7 @@ nfs3_xdr_pathconfres(struct rpc_rqst *req, u32 *p, struct nfs_pathconf *res)
  * Decode COMMIT reply
  */
 static int
-nfs3_xdr_commitres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
+nfs3_xdr_commitres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res)
 {
 	int		status;
 
@@ -1059,7 +1059,7 @@ nfs3_xdr_commitres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
  * Decode GETACL reply
  */
 static int
-nfs3_xdr_getaclres(struct rpc_rqst *req, u32 *p,
+nfs3_xdr_getaclres(struct rpc_rqst *req, __be32 *p,
 		   struct nfs3_getaclres *res)
 {
 	struct xdr_buf *buf = &req->rq_rcv_buf;
@@ -1091,7 +1091,7 @@ nfs3_xdr_getaclres(struct rpc_rqst *req, u32 *p,
  * Decode setacl reply.
  */
 static int
-nfs3_xdr_setaclres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
+nfs3_xdr_setaclres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
 {
 	int status = ntohl(*p++);
 
-- 
cgit v1.2.3


From 8687b63afbe42103730bff4d3f7bfff3463c303e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:48 -0700
Subject: [PATCH] xdr annotations: NFSv4

on-the-wire data is big-endian

[in large part pulled from Alexey's patch]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/nfs4xdr.c | 358 ++++++++++++++++++++++++++++---------------------------
 1 file changed, 181 insertions(+), 177 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 3dd413f52da1..e284123b9774 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -471,7 +471,7 @@ struct compound_hdr {
 
 static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	p = xdr_reserve_space(xdr, 4 + len);
 	BUG_ON(p == NULL);
@@ -480,7 +480,7 @@ static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *
 
 static int encode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag);
 	BUG_ON(hdr->taglen > NFS4_MAXTAGLEN);
@@ -494,7 +494,7 @@ static int encode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
 
 static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
 	BUG_ON(p == NULL);
@@ -507,8 +507,8 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
 	char owner_group[IDMAP_NAMESZ];
 	int owner_namelen = 0;
 	int owner_grouplen = 0;
-	uint32_t *p;
-	uint32_t *q;
+	__be32 *p;
+	__be32 *q;
 	int len;
 	uint32_t bmval0 = 0;
 	uint32_t bmval1 = 0;
@@ -630,7 +630,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
 
 static int encode_access(struct xdr_stream *xdr, u32 access)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(8);
 	WRITE32(OP_ACCESS);
@@ -641,7 +641,7 @@ static int encode_access(struct xdr_stream *xdr, u32 access)
 
 static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(8+sizeof(arg->stateid->data));
 	WRITE32(OP_CLOSE);
@@ -653,7 +653,7 @@ static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg)
 
 static int encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args)
 {
-	uint32_t *p;
+	__be32 *p;
         
         RESERVE_SPACE(16);
         WRITE32(OP_COMMIT);
@@ -665,7 +665,7 @@ static int encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *arg
 
 static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create)
 {
-	uint32_t *p;
+	__be32 *p;
 	
 	RESERVE_SPACE(8);
 	WRITE32(OP_CREATE);
@@ -697,7 +697,7 @@ static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *c
 
 static int encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap)
 {
-        uint32_t *p;
+        __be32 *p;
 
         RESERVE_SPACE(12);
         WRITE32(OP_GETATTR);
@@ -708,7 +708,7 @@ static int encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap)
 
 static int encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1)
 {
-        uint32_t *p;
+        __be32 *p;
 
         RESERVE_SPACE(16);
         WRITE32(OP_GETATTR);
@@ -740,7 +740,7 @@ static int encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask)
 
 static int encode_getfh(struct xdr_stream *xdr)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(4);
 	WRITE32(OP_GETFH);
@@ -750,7 +750,7 @@ static int encode_getfh(struct xdr_stream *xdr)
 
 static int encode_link(struct xdr_stream *xdr, const struct qstr *name)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(8 + name->len);
 	WRITE32(OP_LINK);
@@ -780,7 +780,7 @@ static inline uint64_t nfs4_lock_length(struct file_lock *fl)
  */
 static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(32);
 	WRITE32(OP_LOCK);
@@ -809,7 +809,7 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args)
 
 static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(40);
 	WRITE32(OP_LOCKT);
@@ -825,7 +825,7 @@ static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *arg
 
 static int encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(44);
 	WRITE32(OP_LOCKU);
@@ -841,7 +841,7 @@ static int encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *arg
 static int encode_lookup(struct xdr_stream *xdr, const struct qstr *name)
 {
 	int len = name->len;
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(8 + len);
 	WRITE32(OP_LOOKUP);
@@ -853,7 +853,7 @@ static int encode_lookup(struct xdr_stream *xdr, const struct qstr *name)
 
 static void encode_share_access(struct xdr_stream *xdr, int open_flags)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(8);
 	switch (open_flags & (FMODE_READ|FMODE_WRITE)) {
@@ -874,7 +874,7 @@ static void encode_share_access(struct xdr_stream *xdr, int open_flags)
 
 static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_openargs *arg)
 {
-	uint32_t *p;
+	__be32 *p;
  /*
  * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4,
  * owner 4 = 32
@@ -891,7 +891,7 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
 
 static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(4);
 	switch(arg->open_flags & O_EXCL) {
@@ -907,7 +907,7 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op
 
 static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *arg)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(4);
 	switch (arg->open_flags & O_CREAT) {
@@ -923,7 +923,7 @@ static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *a
 
 static inline void encode_delegation_type(struct xdr_stream *xdr, int delegation_type)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(4);
 	switch (delegation_type) {
@@ -943,7 +943,7 @@ static inline void encode_delegation_type(struct xdr_stream *xdr, int delegation
 
 static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr *name)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(4);
 	WRITE32(NFS4_OPEN_CLAIM_NULL);
@@ -952,7 +952,7 @@ static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr *
 
 static inline void encode_claim_previous(struct xdr_stream *xdr, int type)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(4);
 	WRITE32(NFS4_OPEN_CLAIM_PREVIOUS);
@@ -961,7 +961,7 @@ static inline void encode_claim_previous(struct xdr_stream *xdr, int type)
 
 static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struct qstr *name, const nfs4_stateid *stateid)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(4+sizeof(stateid->data));
 	WRITE32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
@@ -991,7 +991,7 @@ static int encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg)
 
 static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(8+sizeof(arg->stateid->data));
 	WRITE32(OP_OPEN_CONFIRM);
@@ -1003,7 +1003,7 @@ static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_con
 
 static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(8+sizeof(arg->stateid->data));
 	WRITE32(OP_OPEN_DOWNGRADE);
@@ -1017,7 +1017,7 @@ static int
 encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh)
 {
 	int len = fh->size;
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(8 + len);
 	WRITE32(OP_PUTFH);
@@ -1029,7 +1029,7 @@ encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh)
 
 static int encode_putrootfh(struct xdr_stream *xdr)
 {
-        uint32_t *p;
+        __be32 *p;
         
         RESERVE_SPACE(4);
         WRITE32(OP_PUTROOTFH);
@@ -1040,7 +1040,7 @@ static int encode_putrootfh(struct xdr_stream *xdr)
 static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx)
 {
 	nfs4_stateid stateid;
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(16);
 	if (ctx->state != NULL) {
@@ -1052,7 +1052,7 @@ static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context
 
 static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(4);
 	WRITE32(OP_READ);
@@ -1074,7 +1074,7 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
 		FATTR4_WORD1_MOUNTED_ON_FILEID,
 	};
 	int replen;
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(32+sizeof(nfs4_verifier));
 	WRITE32(OP_READDIR);
@@ -1116,7 +1116,7 @@ static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *r
 {
 	struct rpc_auth *auth = req->rq_task->tk_auth;
 	unsigned int replen;
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(4);
 	WRITE32(OP_READLINK);
@@ -1134,7 +1134,7 @@ static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *r
 
 static int encode_remove(struct xdr_stream *xdr, const struct qstr *name)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(8 + name->len);
 	WRITE32(OP_REMOVE);
@@ -1146,7 +1146,7 @@ static int encode_remove(struct xdr_stream *xdr, const struct qstr *name)
 
 static int encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(8 + oldname->len);
 	WRITE32(OP_RENAME);
@@ -1162,7 +1162,7 @@ static int encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, con
 
 static int encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(12);
 	WRITE32(OP_RENEW);
@@ -1174,7 +1174,7 @@ static int encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_
 static int
 encode_restorefh(struct xdr_stream *xdr)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(4);
 	WRITE32(OP_RESTOREFH);
@@ -1185,7 +1185,7 @@ encode_restorefh(struct xdr_stream *xdr)
 static int
 encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(4+sizeof(zero_stateid.data));
 	WRITE32(OP_SETATTR);
@@ -1204,7 +1204,7 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg)
 static int
 encode_savefh(struct xdr_stream *xdr)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(4);
 	WRITE32(OP_SAVEFH);
@@ -1215,7 +1215,7 @@ encode_savefh(struct xdr_stream *xdr)
 static int encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server)
 {
 	int status;
-	uint32_t *p;
+	__be32 *p;
 	
         RESERVE_SPACE(4+sizeof(arg->stateid.data));
         WRITE32(OP_SETATTR);
@@ -1229,7 +1229,7 @@ static int encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *
 
 static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(4 + sizeof(setclientid->sc_verifier->data));
 	WRITE32(OP_SETCLIENTID);
@@ -1248,7 +1248,7 @@ static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclien
 
 static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state)
 {
-        uint32_t *p;
+        __be32 *p;
 
         RESERVE_SPACE(12 + sizeof(client_state->cl_confirm.data));
         WRITE32(OP_SETCLIENTID_CONFIRM);
@@ -1260,7 +1260,7 @@ static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_c
 
 static int encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(4);
 	WRITE32(OP_WRITE);
@@ -1279,7 +1279,7 @@ static int encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args
 
 static int encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	RESERVE_SPACE(20);
 
@@ -1295,7 +1295,7 @@ static int encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *statei
 /*
  * Encode an ACCESS request
  */
-static int nfs4_xdr_enc_access(struct rpc_rqst *req, uint32_t *p, const struct nfs4_accessargs *args)
+static int nfs4_xdr_enc_access(struct rpc_rqst *req, __be32 *p, const struct nfs4_accessargs *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1313,7 +1313,7 @@ static int nfs4_xdr_enc_access(struct rpc_rqst *req, uint32_t *p, const struct n
 /*
  * Encode LOOKUP request
  */
-static int nfs4_xdr_enc_lookup(struct rpc_rqst *req, uint32_t *p, const struct nfs4_lookup_arg *args)
+static int nfs4_xdr_enc_lookup(struct rpc_rqst *req, __be32 *p, const struct nfs4_lookup_arg *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1337,7 +1337,7 @@ out:
 /*
  * Encode LOOKUP_ROOT request
  */
-static int nfs4_xdr_enc_lookup_root(struct rpc_rqst *req, uint32_t *p, const struct nfs4_lookup_root_arg *args)
+static int nfs4_xdr_enc_lookup_root(struct rpc_rqst *req, __be32 *p, const struct nfs4_lookup_root_arg *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1358,7 +1358,7 @@ out:
 /*
  * Encode REMOVE request
  */
-static int nfs4_xdr_enc_remove(struct rpc_rqst *req, uint32_t *p, const struct nfs4_remove_arg *args)
+static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs4_remove_arg *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1380,7 +1380,7 @@ out:
 /*
  * Encode RENAME request
  */
-static int nfs4_xdr_enc_rename(struct rpc_rqst *req, uint32_t *p, const struct nfs4_rename_arg *args)
+static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs4_rename_arg *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1410,7 +1410,7 @@ out:
 /*
  * Encode LINK request
  */
-static int nfs4_xdr_enc_link(struct rpc_rqst *req, uint32_t *p, const struct nfs4_link_arg *args)
+static int nfs4_xdr_enc_link(struct rpc_rqst *req, __be32 *p, const struct nfs4_link_arg *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1440,7 +1440,7 @@ out:
 /*
  * Encode CREATE request
  */
-static int nfs4_xdr_enc_create(struct rpc_rqst *req, uint32_t *p, const struct nfs4_create_arg *args)
+static int nfs4_xdr_enc_create(struct rpc_rqst *req, __be32 *p, const struct nfs4_create_arg *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1470,7 +1470,7 @@ out:
 /*
  * Encode SYMLINK request
  */
-static int nfs4_xdr_enc_symlink(struct rpc_rqst *req, uint32_t *p, const struct nfs4_create_arg *args)
+static int nfs4_xdr_enc_symlink(struct rpc_rqst *req, __be32 *p, const struct nfs4_create_arg *args)
 {
 	return nfs4_xdr_enc_create(req, p, args);
 }
@@ -1478,7 +1478,7 @@ static int nfs4_xdr_enc_symlink(struct rpc_rqst *req, uint32_t *p, const struct
 /*
  * Encode GETATTR request
  */
-static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, uint32_t *p, const struct nfs4_getattr_arg *args)
+static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, __be32 *p, const struct nfs4_getattr_arg *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1496,7 +1496,7 @@ static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, uint32_t *p, const struct
 /*
  * Encode a CLOSE request
  */
-static int nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args)
+static int nfs4_xdr_enc_close(struct rpc_rqst *req, __be32 *p, struct nfs_closeargs *args)
 {
         struct xdr_stream xdr;
         struct compound_hdr hdr = {
@@ -1520,7 +1520,7 @@ out:
 /*
  * Encode an OPEN request
  */
-static int nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_openargs *args)
+static int nfs4_xdr_enc_open(struct rpc_rqst *req, __be32 *p, struct nfs_openargs *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1556,7 +1556,7 @@ out:
 /*
  * Encode an OPEN_CONFIRM request
  */
-static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_open_confirmargs *args)
+static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_open_confirmargs *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1577,7 +1577,7 @@ out:
 /*
  * Encode an OPEN request with no attributes.
  */
-static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, uint32_t *p, struct nfs_openargs *args)
+static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, __be32 *p, struct nfs_openargs *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1601,7 +1601,7 @@ out:
 /*
  * Encode an OPEN_DOWNGRADE request
  */
-static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args)
+static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, __be32 *p, struct nfs_closeargs *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1625,7 +1625,7 @@ out:
 /*
  * Encode a LOCK request
  */
-static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_lock_args *args)
+static int nfs4_xdr_enc_lock(struct rpc_rqst *req, __be32 *p, struct nfs_lock_args *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1646,7 +1646,7 @@ out:
 /*
  * Encode a LOCKT request
  */
-static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, uint32_t *p, struct nfs_lockt_args *args)
+static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, __be32 *p, struct nfs_lockt_args *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1667,7 +1667,7 @@ out:
 /*
  * Encode a LOCKU request
  */
-static int nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_locku_args *args)
+static int nfs4_xdr_enc_locku(struct rpc_rqst *req, __be32 *p, struct nfs_locku_args *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1688,7 +1688,7 @@ out:
 /*
  * Encode a READLINK request
  */
-static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, uint32_t *p, const struct nfs4_readlink *args)
+static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, __be32 *p, const struct nfs4_readlink *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1709,7 +1709,7 @@ out:
 /*
  * Encode a READDIR request
  */
-static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, uint32_t *p, const struct nfs4_readdir_arg *args)
+static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nfs4_readdir_arg *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1730,7 +1730,7 @@ out:
 /*
  * Encode a READ request
  */
-static int nfs4_xdr_enc_read(struct rpc_rqst *req, uint32_t *p, struct nfs_readargs *args)
+static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
 {
 	struct rpc_auth	*auth = req->rq_task->tk_auth;
 	struct xdr_stream xdr;
@@ -1762,7 +1762,7 @@ out:
 /*
  * Encode an SETATTR request
  */
-static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, uint32_t *p, struct nfs_setattrargs *args)
+static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, __be32 *p, struct nfs_setattrargs *args)
 
 {
         struct xdr_stream xdr;
@@ -1788,7 +1788,7 @@ out:
  * Encode a GETACL request
  */
 static int
-nfs4_xdr_enc_getacl(struct rpc_rqst *req, uint32_t *p,
+nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p,
 		struct nfs_getaclargs *args)
 {
 	struct xdr_stream xdr;
@@ -1815,7 +1815,7 @@ out:
 /*
  * Encode a WRITE request
  */
-static int nfs4_xdr_enc_write(struct rpc_rqst *req, uint32_t *p, struct nfs_writeargs *args)
+static int nfs4_xdr_enc_write(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1839,7 +1839,7 @@ out:
 /*
  *  a COMMIT request
  */
-static int nfs4_xdr_enc_commit(struct rpc_rqst *req, uint32_t *p, struct nfs_writeargs *args)
+static int nfs4_xdr_enc_commit(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1863,7 +1863,7 @@ out:
 /*
  * FSINFO request
  */
-static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs4_fsinfo_arg *args)
+static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs4_fsinfo_arg *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1882,7 +1882,7 @@ static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs4_fs
 /*
  * a PATHCONF request
  */
-static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, const struct nfs4_pathconf_arg *args)
+static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, __be32 *p, const struct nfs4_pathconf_arg *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1902,7 +1902,7 @@ static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, const struct
 /*
  * a STATFS request
  */
-static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, uint32_t *p, const struct nfs4_statfs_arg *args)
+static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, __be32 *p, const struct nfs4_statfs_arg *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1923,7 +1923,7 @@ static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, uint32_t *p, const struct n
 /*
  * GETATTR_BITMAP request
  */
-static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, uint32_t *p, const struct nfs_fh *fhandle)
+static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, __be32 *p, const struct nfs_fh *fhandle)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1945,7 +1945,7 @@ static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, uint32_t *p, const str
 /*
  * a RENEW request
  */
-static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs_client *clp)
+static int nfs4_xdr_enc_renew(struct rpc_rqst *req, __be32 *p, struct nfs_client *clp)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1960,7 +1960,7 @@ static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs_clie
 /*
  * a SETCLIENTID request
  */
-static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, uint32_t *p, struct nfs4_setclientid *sc)
+static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, __be32 *p, struct nfs4_setclientid *sc)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1975,7 +1975,7 @@ static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, uint32_t *p, struct nf
 /*
  * a SETCLIENTID_CONFIRM request
  */
-static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_client *clp)
+static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_client *clp)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1997,7 +1997,7 @@ static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, s
 /*
  * DELEGRETURN request
  */
-static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, uint32_t *p, const struct nfs4_delegreturnargs *args)
+static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, __be32 *p, const struct nfs4_delegreturnargs *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -2021,7 +2021,7 @@ out:
 /*
  * Encode FS_LOCATIONS request
  */
-static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, uint32_t *p, struct nfs4_fs_locations_arg *args)
+static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs4_fs_locations_arg *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -2086,7 +2086,7 @@ out:
 
 static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	READ_BUF(4);
 	READ32(*len);
@@ -2097,7 +2097,7 @@ static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char
 
 static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	READ_BUF(8);
 	READ32(hdr->status);
@@ -2112,7 +2112,7 @@ static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
 
 static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
 {
-	uint32_t *p;
+	__be32 *p;
 	uint32_t opnum;
 	int32_t nfserr;
 
@@ -2134,7 +2134,7 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
 /* Dummy routine */
 static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs_client *clp)
 {
-	uint32_t *p;
+	__be32 *p;
 	unsigned int strlen;
 	char *str;
 
@@ -2144,7 +2144,8 @@ static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs_client *clp)
 
 static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
 {
-	uint32_t bmlen, *p;
+	uint32_t bmlen;
+	__be32 *p;
 
 	READ_BUF(4);
 	READ32(bmlen);
@@ -2159,9 +2160,9 @@ static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
 	return 0;
 }
 
-static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, uint32_t **savep)
+static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, __be32 **savep)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	READ_BUF(4);
 	READ32(*attrlen);
@@ -2182,7 +2183,7 @@ static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint3
 
 static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *type)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	*type = 0;
 	if (unlikely(bitmap[0] & (FATTR4_WORD0_TYPE - 1U)))
@@ -2202,7 +2203,7 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *
 
 static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	*change = 0;
 	if (unlikely(bitmap[0] & (FATTR4_WORD0_CHANGE - 1U)))
@@ -2219,7 +2220,7 @@ static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
 
 static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	*size = 0;
 	if (unlikely(bitmap[0] & (FATTR4_WORD0_SIZE - 1U)))
@@ -2235,7 +2236,7 @@ static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *
 
 static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	*res = 0;
 	if (unlikely(bitmap[0] & (FATTR4_WORD0_LINK_SUPPORT - 1U)))
@@ -2251,7 +2252,7 @@ static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, ui
 
 static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	*res = 0;
 	if (unlikely(bitmap[0] & (FATTR4_WORD0_SYMLINK_SUPPORT - 1U)))
@@ -2267,7 +2268,7 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap,
 
 static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	fsid->major = 0;
 	fsid->minor = 0;
@@ -2287,7 +2288,7 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs
 
 static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	*res = 60;
 	if (unlikely(bitmap[0] & (FATTR4_WORD0_LEASE_TIME - 1U)))
@@ -2303,7 +2304,7 @@ static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint
 
 static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	*res = ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL;
 	if (unlikely(bitmap[0] & (FATTR4_WORD0_ACLSUPPORT - 1U)))
@@ -2319,7 +2320,7 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint
 
 static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	*fileid = 0;
 	if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEID - 1U)))
@@ -2335,7 +2336,7 @@ static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
 
 static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	*fileid = 0;
 	if (unlikely(bitmap[1] & (FATTR4_WORD1_MOUNTED_ON_FILEID - 1U)))
@@ -2351,7 +2352,7 @@ static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitma
 
 static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
 {
-	uint32_t *p;
+	__be32 *p;
 	int status = 0;
 
 	*res = 0;
@@ -2368,7 +2369,7 @@ static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uin
 
 static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
 {
-	uint32_t *p;
+	__be32 *p;
 	int status = 0;
 
 	*res = 0;
@@ -2385,7 +2386,7 @@ static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint
 
 static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
 {
-	uint32_t *p;
+	__be32 *p;
 	int status = 0;
 
 	*res = 0;
@@ -2403,7 +2404,7 @@ static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uin
 static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
 {
 	int n;
-	uint32_t *p;
+	__be32 *p;
 	int status = 0;
 
 	READ_BUF(4);
@@ -2448,7 +2449,7 @@ out_eio:
 static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fs_locations *res)
 {
 	int n;
-	uint32_t *p;
+	__be32 *p;
 	int status = -EIO;
 
 	if (unlikely(bitmap[0] & (FATTR4_WORD0_FS_LOCATIONS -1U)))
@@ -2512,7 +2513,7 @@ out_eio:
 
 static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
 {
-	uint32_t *p;
+	__be32 *p;
 	int status = 0;
 
 	*res = 0;
@@ -2529,7 +2530,7 @@ static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uin
 
 static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxlink)
 {
-	uint32_t *p;
+	__be32 *p;
 	int status = 0;
 
 	*maxlink = 1;
@@ -2546,7 +2547,7 @@ static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
 
 static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxname)
 {
-	uint32_t *p;
+	__be32 *p;
 	int status = 0;
 
 	*maxname = 1024;
@@ -2563,7 +2564,7 @@ static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
 
 static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
 {
-	uint32_t *p;
+	__be32 *p;
 	int status = 0;
 
 	*res = 1024;
@@ -2584,7 +2585,7 @@ static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
 
 static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
 {
-	uint32_t *p;
+	__be32 *p;
 	int status = 0;
 
 	*res = 1024;
@@ -2605,7 +2606,7 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32
 
 static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *mode)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	*mode = 0;
 	if (unlikely(bitmap[1] & (FATTR4_WORD1_MODE - 1U)))
@@ -2622,7 +2623,7 @@ static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *
 
 static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	*nlink = 1;
 	if (unlikely(bitmap[1] & (FATTR4_WORD1_NUMLINKS - 1U)))
@@ -2638,7 +2639,8 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t
 
 static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *uid)
 {
-	uint32_t len, *p;
+	uint32_t len;
+	__be32 *p;
 
 	*uid = -2;
 	if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U)))
@@ -2662,7 +2664,8 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
 
 static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *gid)
 {
-	uint32_t len, *p;
+	uint32_t len;
+	__be32 *p;
 
 	*gid = -2;
 	if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U)))
@@ -2686,7 +2689,8 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
 
 static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev)
 {
-	uint32_t major = 0, minor = 0, *p;
+	uint32_t major = 0, minor = 0;
+	__be32 *p;
 
 	*rdev = MKDEV(0,0);
 	if (unlikely(bitmap[1] & (FATTR4_WORD1_RAWDEV - 1U)))
@@ -2708,7 +2712,7 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde
 
 static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
 {
-	uint32_t *p;
+	__be32 *p;
 	int status = 0;
 
 	*res = 0;
@@ -2725,7 +2729,7 @@ static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uin
 
 static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
 {
-	uint32_t *p;
+	__be32 *p;
 	int status = 0;
 
 	*res = 0;
@@ -2742,7 +2746,7 @@ static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint
 
 static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
 {
-	uint32_t *p;
+	__be32 *p;
 	int status = 0;
 
 	*res = 0;
@@ -2759,7 +2763,7 @@ static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uin
 
 static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	*used = 0;
 	if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_USED - 1U)))
@@ -2776,7 +2780,7 @@ static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint
 
 static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
 {
-	uint32_t *p;
+	__be32 *p;
 	uint64_t sec;
 	uint32_t nsec;
 
@@ -2836,7 +2840,7 @@ static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, str
 	return status;
 }
 
-static int verify_attr_len(struct xdr_stream *xdr, uint32_t *savep, uint32_t attrlen)
+static int verify_attr_len(struct xdr_stream *xdr, __be32 *savep, uint32_t attrlen)
 {
 	unsigned int attrwords = XDR_QUADLEN(attrlen);
 	unsigned int nwords = xdr->p - savep;
@@ -2854,7 +2858,7 @@ static int verify_attr_len(struct xdr_stream *xdr, uint32_t *savep, uint32_t att
 
 static int decode_change_info(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	READ_BUF(20);
 	READ32(cinfo->atomic);
@@ -2865,7 +2869,7 @@ static int decode_change_info(struct xdr_stream *xdr, struct nfs4_change_info *c
 
 static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access)
 {
-	uint32_t *p;
+	__be32 *p;
 	uint32_t supp, acc;
 	int status;
 
@@ -2882,7 +2886,7 @@ static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access)
 
 static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
 {
-	uint32_t *p;
+	__be32 *p;
 	int status;
 
 	status = decode_op_hdr(xdr, OP_CLOSE);
@@ -2895,7 +2899,7 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
 
 static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res)
 {
-	uint32_t *p;
+	__be32 *p;
 	int status;
 
 	status = decode_op_hdr(xdr, OP_COMMIT);
@@ -2908,7 +2912,7 @@ static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res)
 
 static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
 {
-	uint32_t *p;
+	__be32 *p;
 	uint32_t bmlen;
 	int status;
 
@@ -2925,7 +2929,7 @@ static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
 
 static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res)
 {
-	uint32_t *savep;
+	__be32 *savep;
 	uint32_t attrlen, 
 		 bitmap[2] = {0};
 	int status;
@@ -2952,7 +2956,7 @@ xdr_error:
 	
 static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat)
 {
-	uint32_t *savep;
+	__be32 *savep;
 	uint32_t attrlen, 
 		 bitmap[2] = {0};
 	int status;
@@ -2985,7 +2989,7 @@ xdr_error:
 
 static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf)
 {
-	uint32_t *savep;
+	__be32 *savep;
 	uint32_t attrlen, 
 		 bitmap[2] = {0};
 	int status;
@@ -3010,7 +3014,7 @@ xdr_error:
 
 static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, const struct nfs_server *server)
 {
-	uint32_t *savep;
+	__be32 *savep;
 	uint32_t attrlen,
 		 bitmap[2] = {0},
 		 type;
@@ -3079,7 +3083,7 @@ xdr_error:
 
 static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
 {
-	uint32_t *savep;
+	__be32 *savep;
 	uint32_t attrlen, bitmap[2];
 	int status;
 
@@ -3111,7 +3115,7 @@ xdr_error:
 
 static int decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh)
 {
-	uint32_t *p;
+	__be32 *p;
 	uint32_t len;
 	int status;
 
@@ -3147,7 +3151,7 @@ static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
 static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
 {
 	uint64_t offset, length, clientid;
-	uint32_t *p;
+	__be32 *p;
 	uint32_t namelen, type;
 
 	READ_BUF(32);
@@ -3172,7 +3176,7 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
 
 static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res)
 {
-	uint32_t *p;
+	__be32 *p;
 	int status;
 
 	status = decode_op_hdr(xdr, OP_LOCK);
@@ -3195,7 +3199,7 @@ static int decode_lockt(struct xdr_stream *xdr, struct nfs_lockt_res *res)
 
 static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res)
 {
-	uint32_t *p;
+	__be32 *p;
 	int status;
 
 	status = decode_op_hdr(xdr, OP_LOCKU);
@@ -3214,7 +3218,7 @@ static int decode_lookup(struct xdr_stream *xdr)
 /* This is too sick! */
 static int decode_space_limit(struct xdr_stream *xdr, u64 *maxsize)
 {
-        uint32_t *p;
+        __be32 *p;
 	uint32_t limit_type, nblocks, blocksize;
 
 	READ_BUF(12);
@@ -3233,7 +3237,7 @@ static int decode_space_limit(struct xdr_stream *xdr, u64 *maxsize)
 
 static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
 {
-        uint32_t *p;
+        __be32 *p;
         uint32_t delegation_type;
 
 	READ_BUF(4);
@@ -3259,7 +3263,7 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
 
 static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
 {
-        uint32_t *p;
+        __be32 *p;
         uint32_t bmlen;
         int status;
 
@@ -3287,7 +3291,7 @@ xdr_error:
 
 static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res)
 {
-        uint32_t *p;
+        __be32 *p;
 	int status;
 
         status = decode_op_hdr(xdr, OP_OPEN_CONFIRM);
@@ -3300,7 +3304,7 @@ static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmre
 
 static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res)
 {
-	uint32_t *p;
+	__be32 *p;
 	int status;
 
 	status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE);
@@ -3324,7 +3328,7 @@ static int decode_putrootfh(struct xdr_stream *xdr)
 static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_readres *res)
 {
 	struct kvec *iov = req->rq_rcv_buf.head;
-	uint32_t *p;
+	__be32 *p;
 	uint32_t count, eof, recvd, hdrlen;
 	int status;
 
@@ -3354,7 +3358,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
 	struct page	*page = *rcvbuf->pages;
 	struct kvec	*iov = rcvbuf->head;
 	unsigned int	nr, pglen = rcvbuf->page_len;
-	uint32_t	*end, *entry, *p, *kaddr;
+	__be32		*end, *entry, *p, *kaddr;
 	uint32_t	len, attrlen, xlen;
 	int 		hdrlen, recvd, status;
 
@@ -3376,7 +3380,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
 	xdr_read_pages(xdr, pglen);
 
 	BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE);
-	kaddr = p = (uint32_t *) kmap_atomic(page, KM_USER0);
+	kaddr = p = kmap_atomic(page, KM_USER0);
 	end = p + ((pglen + readdir->pgbase) >> 2);
 	entry = p;
 	for (nr = 0; *p++; nr++) {
@@ -3428,7 +3432,7 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
 	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
 	struct kvec *iov = rcvbuf->head;
 	int hdrlen, len, recvd;
-	uint32_t *p;
+	__be32 *p;
 	char *kaddr;
 	int status;
 
@@ -3505,7 +3509,7 @@ decode_restorefh(struct xdr_stream *xdr)
 static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
 		size_t *acl_len)
 {
-	uint32_t *savep;
+	__be32 *savep;
 	uint32_t attrlen,
 		 bitmap[2] = {0};
 	struct kvec *iov = req->rq_rcv_buf.head;
@@ -3551,7 +3555,7 @@ decode_savefh(struct xdr_stream *xdr)
 
 static int decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res)
 {
-	uint32_t *p;
+	__be32 *p;
 	uint32_t bmlen;
 	int status;
 
@@ -3567,7 +3571,7 @@ static int decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res)
 
 static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
 {
-	uint32_t *p;
+	__be32 *p;
 	uint32_t opnum;
 	int32_t nfserr;
 
@@ -3610,7 +3614,7 @@ static int decode_setclientid_confirm(struct xdr_stream *xdr)
 
 static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res)
 {
-	uint32_t *p;
+	__be32 *p;
 	int status;
 
 	status = decode_op_hdr(xdr, OP_WRITE);
@@ -3632,7 +3636,7 @@ static int decode_delegreturn(struct xdr_stream *xdr)
 /*
  * Decode OPEN_DOWNGRADE response
  */
-static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_closeres *res)
+static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res)
 {
         struct xdr_stream xdr;
         struct compound_hdr hdr;
@@ -3660,7 +3664,7 @@ out:
 /*
  * Decode ACCESS response
  */
-static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_accessres *res)
+static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_accessres *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -3678,7 +3682,7 @@ out:
 /*
  * Decode LOOKUP response
  */
-static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_lookup_res *res)
+static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lookup_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -3701,7 +3705,7 @@ out:
 /*
  * Decode LOOKUP_ROOT response
  */
-static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_lookup_res *res)
+static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lookup_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -3721,7 +3725,7 @@ out:
 /*
  * Decode REMOVE response
  */
-static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_remove_res *res)
+static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_remove_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -3742,7 +3746,7 @@ out:
 /*
  * Decode RENAME response
  */
-static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_rename_res *res)
+static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_rename_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -3772,7 +3776,7 @@ out:
 /*
  * Decode LINK response
  */
-static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_link_res *res)
+static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_link_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -3805,7 +3809,7 @@ out:
 /*
  * Decode CREATE response
  */
-static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_create_res *res)
+static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_create_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -3834,7 +3838,7 @@ out:
 /*
  * Decode SYMLINK response
  */
-static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_create_res *res)
+static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_create_res *res)
 {
 	return nfs4_xdr_dec_create(rqstp, p, res);
 }
@@ -3842,7 +3846,7 @@ static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4
 /*
  * Decode GETATTR response
  */
-static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_getattr_res *res)
+static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_getattr_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -3865,7 +3869,7 @@ out:
  * Encode an SETACL request
  */
 static int
-nfs4_xdr_enc_setacl(struct rpc_rqst *req, uint32_t *p, struct nfs_setaclargs *args)
+nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args)
 {
         struct xdr_stream xdr;
         struct compound_hdr hdr = {
@@ -3886,7 +3890,7 @@ out:
  * Decode SETACL response
  */
 static int
-nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, uint32_t *p, void *res)
+nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, __be32 *p, void *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -3908,7 +3912,7 @@ out:
  * Decode GETACL response
  */
 static int
-nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, uint32_t *p, size_t *acl_len)
+nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, __be32 *p, size_t *acl_len)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -3930,7 +3934,7 @@ out:
 /*
  * Decode CLOSE response
  */
-static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_closeres *res)
+static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res)
 {
         struct xdr_stream xdr;
         struct compound_hdr hdr;
@@ -3960,7 +3964,7 @@ out:
 /*
  * Decode OPEN response
  */
-static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_openres *res)
+static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res)
 {
         struct xdr_stream xdr;
         struct compound_hdr hdr;
@@ -3994,7 +3998,7 @@ out:
 /*
  * Decode OPEN_CONFIRM response
  */
-static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_open_confirmres *res)
+static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp, __be32 *p, struct nfs_open_confirmres *res)
 {
         struct xdr_stream xdr;
         struct compound_hdr hdr;
@@ -4015,7 +4019,7 @@ out:
 /*
  * Decode OPEN response
  */
-static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_openres *res)
+static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res)
 {
         struct xdr_stream xdr;
         struct compound_hdr hdr;
@@ -4039,7 +4043,7 @@ out:
 /*
  * Decode SETATTR response
  */
-static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_setattrres *res)
+static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_setattrres *res)
 {
         struct xdr_stream xdr;
         struct compound_hdr hdr;
@@ -4065,7 +4069,7 @@ out:
 /*
  * Decode LOCK response
  */
-static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lock_res *res)
+static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lock_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4086,7 +4090,7 @@ out:
 /*
  * Decode LOCKT response
  */
-static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockt_res *res)
+static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lockt_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4107,7 +4111,7 @@ out:
 /*
  * Decode LOCKU response
  */
-static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_locku_res *res)
+static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, __be32 *p, struct nfs_locku_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4128,7 +4132,7 @@ out:
 /*
  * Decode READLINK response
  */
-static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, uint32_t *p, void *res)
+static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, __be32 *p, void *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4149,7 +4153,7 @@ out:
 /*
  * Decode READDIR response
  */
-static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_readdir_res *res)
+static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_readdir_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4170,7 +4174,7 @@ out:
 /*
  * Decode Read response
  */
-static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_readres *res)
+static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, __be32 *p, struct nfs_readres *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4193,7 +4197,7 @@ out:
 /*
  * Decode WRITE response
  */
-static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_writeres *res)
+static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writeres *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4219,7 +4223,7 @@ out:
 /*
  * Decode COMMIT response
  */
-static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_writeres *res)
+static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writeres *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4243,7 +4247,7 @@ out:
 /*
  * FSINFO request
  */
-static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs_fsinfo *fsinfo)
+static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *fsinfo)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4263,7 +4267,7 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs_fsi
 /*
  * PATHCONF request
  */
-static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, uint32_t *p, struct nfs_pathconf *pathconf)
+static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p, struct nfs_pathconf *pathconf)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4281,7 +4285,7 @@ static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, uint32_t *p, struct nfs_p
 /*
  * STATFS request
  */
-static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, uint32_t *p, struct nfs_fsstat *fsstat)
+static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p, struct nfs_fsstat *fsstat)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4299,7 +4303,7 @@ static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, uint32_t *p, struct nfs_fss
 /*
  * GETATTR_BITMAP request
  */
-static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req, uint32_t *p, struct nfs4_server_caps_res *res)
+static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req, __be32 *p, struct nfs4_server_caps_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4318,7 +4322,7 @@ out:
 /*
  * Decode RENEW response
  */
-static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, uint32_t *p, void *dummy)
+static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, __be32 *p, void *dummy)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4334,7 +4338,7 @@ static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, uint32_t *p, void *dummy)
 /*
  * a SETCLIENTID request
  */
-static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p,
+static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p,
 		struct nfs_client *clp)
 {
 	struct xdr_stream xdr;
@@ -4353,7 +4357,7 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p,
 /*
  * a SETCLIENTID_CONFIRM request
  */
-static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_fsinfo *fsinfo)
+static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *fsinfo)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4375,7 +4379,7 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, s
 /*
  * DELEGRETURN request
  */
-static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_delegreturnres *res)
+static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_delegreturnres *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4397,7 +4401,7 @@ out:
 /*
  * FS_LOCATIONS request
  */
-static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, uint32_t *p, struct nfs4_fs_locations *res)
+static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs4_fs_locations *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
-- 
cgit v1.2.3


From 0dbb4c6799cf8fa8c5ba1926153a30960117477d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:49 -0700
Subject: [PATCH] xdr annotations: NFS readdir entries

on-the-wire data is big-endian

[in large part pulled from Alexey's patch]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/dir.c      | 6 +++---
 fs/nfs/internal.h | 6 +++---
 fs/nfs/nfs2xdr.c  | 4 ++--
 fs/nfs/nfs3xdr.c  | 4 ++--
 fs/nfs/nfs4_fs.h  | 2 +-
 fs/nfs/nfs4proc.c | 4 ++--
 fs/nfs/nfs4xdr.c  | 2 +-
 7 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index c86a1ead4772..4133ef5264e5 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -142,12 +142,12 @@ nfs_opendir(struct inode *inode, struct file *filp)
 	return res;
 }
 
-typedef u32 * (*decode_dirent_t)(u32 *, struct nfs_entry *, int);
+typedef __be32 * (*decode_dirent_t)(__be32 *, struct nfs_entry *, int);
 typedef struct {
 	struct file	*file;
 	struct page	*page;
 	unsigned long	page_index;
-	u32		*ptr;
+	__be32		*ptr;
 	u64		*dir_cookie;
 	loff_t		current_index;
 	struct nfs_entry *entry;
@@ -220,7 +220,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
 static inline
 int dir_decode(nfs_readdir_descriptor_t *desc)
 {
-	u32	*p = desc->ptr;
+	__be32	*p = desc->ptr;
 	p = desc->decode(p, desc->entry, desc->plus);
 	if (IS_ERR(p))
 		return PTR_ERR(p);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index bea0b016bd70..d205466233f6 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -93,15 +93,15 @@ extern void nfs_destroy_directcache(void);
 /* nfs2xdr.c */
 extern int nfs_stat_to_errno(int);
 extern struct rpc_procinfo nfs_procedures[];
-extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
+extern __be32 * nfs_decode_dirent(__be32 *, struct nfs_entry *, int);
 
 /* nfs3xdr.c */
 extern struct rpc_procinfo nfs3_procedures[];
-extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
+extern __be32 *nfs3_decode_dirent(__be32 *, struct nfs_entry *, int);
 
 /* nfs4xdr.c */
 #ifdef CONFIG_NFS_V4
-extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
+extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus);
 #endif
 
 /* nfs4proc.c */
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 1d801e30c40e..3be4e72a0227 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -468,8 +468,8 @@ err_unmap:
 	goto out;
 }
 
-u32 *
-nfs_decode_dirent(u32 *p, struct nfs_entry *entry, int plus)
+__be32 *
+nfs_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
 {
 	if (!*p++) {
 		if (!*p)
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index b4e740e4494a..0ace092d126f 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -583,8 +583,8 @@ err_unmap:
 	goto out;
 }
 
-u32 *
-nfs3_decode_dirent(u32 *p, struct nfs_entry *entry, int plus)
+__be32 *
+nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
 {
 	struct nfs_entry old = *entry;
 
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 61095fe4b5ca..6f346677332d 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -212,7 +212,7 @@ extern void nfs_free_seqid(struct nfs_seqid *seqid);
 extern const nfs4_stateid zero_stateid;
 
 /* nfs4xdr.c */
-extern uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus);
+extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus);
 extern struct rpc_procinfo nfs4_procedures[];
 
 struct nfs4_mount_data;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7421bcb3b728..8d09b47c91b9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -141,7 +141,7 @@ const u32 nfs4_fs_locations_bitmap[2] = {
 static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry,
 		struct nfs4_readdir_arg *readdir)
 {
-	u32 *start, *p;
+	__be32 *start, *p;
 
 	BUG_ON(readdir->count < 80);
 	if (cookie > 2) {
@@ -162,7 +162,7 @@ static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry,
 	 * when talking to the server, we always send cookie 0
 	 * instead of 1 or 2.
 	 */
-	start = p = (u32 *)kmap_atomic(*readdir->pages, KM_USER0);
+	start = p = kmap_atomic(*readdir->pages, KM_USER0);
 	
 	if (cookie == 0) {
 		*p++ = xdr_one;                                  /* next */
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e284123b9774..0cf3fa312a33 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4421,7 +4421,7 @@ out:
 	return status;
 }
 
-uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus)
+__be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
 {
 	uint32_t bitmap[2] = {0};
 	uint32_t len;
-- 
cgit v1.2.3


From e6f684f6443dd37384c63d2f27571350e0b5c8aa Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:50 -0700
Subject: [PATCH] fs/nfs/callback* passes error values big-endian

[pulled from Alexey's patch]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/callback.h      |  6 ++---
 fs/nfs/callback_proc.c |  6 ++---
 fs/nfs/callback_xdr.c  | 60 +++++++++++++++++++++++++-------------------------
 3 files changed, 36 insertions(+), 36 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 5676163d26e8..6921d82b850b 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -44,7 +44,7 @@ struct cb_getattrargs {
 };
 
 struct cb_getattrres {
-	uint32_t status;
+	__be32 status;
 	uint32_t bitmap[2];
 	uint64_t size;
 	uint64_t change_attr;
@@ -59,8 +59,8 @@ struct cb_recallargs {
 	uint32_t truncate;
 };
 
-extern unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
-extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
+extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
+extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
 
 #ifdef CONFIG_NFS_V4
 extern int nfs_callback_up(void);
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 97cf8f71451f..72e55d83756d 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -14,7 +14,7 @@
 
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
  
-unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res)
+__be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res)
 {
 	struct nfs_client *clp;
 	struct nfs_delegation *delegation;
@@ -55,11 +55,11 @@ out:
 	return res->status;
 }
 
-unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
+__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
 {
 	struct nfs_client *clp;
 	struct inode *inode;
-	unsigned res;
+	__be32 res;
 	
 	res = htonl(NFS4ERR_BADHANDLE);
 	clp = nfs_find_client(args->addr, 4);
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 5998d0c71757..909a1408bcab 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -22,9 +22,9 @@
 
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
 
-typedef unsigned (*callback_process_op_t)(void *, void *);
-typedef unsigned (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *);
-typedef unsigned (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *);
+typedef __be32 (*callback_process_op_t)(void *, void *);
+typedef __be32 (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *);
+typedef __be32 (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *);
 
 
 struct callback_op {
@@ -61,7 +61,7 @@ static uint32_t *read_buf(struct xdr_stream *xdr, int nbytes)
 	return p;
 }
 
-static unsigned decode_string(struct xdr_stream *xdr, unsigned int *len, const char **str)
+static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len, const char **str)
 {
 	uint32_t *p;
 
@@ -81,7 +81,7 @@ static unsigned decode_string(struct xdr_stream *xdr, unsigned int *len, const c
 	return 0;
 }
 
-static unsigned decode_fh(struct xdr_stream *xdr, struct nfs_fh *fh)
+static __be32 decode_fh(struct xdr_stream *xdr, struct nfs_fh *fh)
 {
 	uint32_t *p;
 
@@ -99,7 +99,7 @@ static unsigned decode_fh(struct xdr_stream *xdr, struct nfs_fh *fh)
 	return 0;
 }
 
-static unsigned decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
+static __be32 decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
 {
 	uint32_t *p;
 	unsigned int attrlen;
@@ -118,7 +118,7 @@ static unsigned decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
 	return 0;
 }
 
-static unsigned decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
+static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
 {
 	uint32_t *p;
 
@@ -129,11 +129,11 @@ static unsigned decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
 	return 0;
 }
 
-static unsigned decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound_hdr_arg *hdr)
+static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound_hdr_arg *hdr)
 {
 	uint32_t *p;
 	unsigned int minor_version;
-	unsigned status;
+	__be32 status;
 
 	status = decode_string(xdr, &hdr->taglen, &hdr->tag);
 	if (unlikely(status != 0))
@@ -159,7 +159,7 @@ static unsigned decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compou
 	return 0;
 }
 
-static unsigned decode_op_hdr(struct xdr_stream *xdr, unsigned int *op)
+static __be32 decode_op_hdr(struct xdr_stream *xdr, unsigned int *op)
 {
 	uint32_t *p;
 	p = read_buf(xdr, 4);
@@ -169,9 +169,9 @@ static unsigned decode_op_hdr(struct xdr_stream *xdr, unsigned int *op)
 	return 0;
 }
 
-static unsigned decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_getattrargs *args)
+static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_getattrargs *args)
 {
-	unsigned status;
+	__be32 status;
 
 	status = decode_fh(xdr, &args->fh);
 	if (unlikely(status != 0))
@@ -183,10 +183,10 @@ out:
 	return status;
 }
 
-static unsigned decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallargs *args)
+static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallargs *args)
 {
 	uint32_t *p;
-	unsigned status;
+	__be32 status;
 
 	args->addr = &rqstp->rq_addr;
 	status = decode_stateid(xdr, &args->stateid);
@@ -204,7 +204,7 @@ out:
 	return status;
 }
 
-static unsigned encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
+static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
 {
 	uint32_t *p;
 
@@ -217,7 +217,7 @@ static unsigned encode_string(struct xdr_stream *xdr, unsigned int len, const ch
 
 #define CB_SUPPORTED_ATTR0 (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE)
 #define CB_SUPPORTED_ATTR1 (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY)
-static unsigned encode_attr_bitmap(struct xdr_stream *xdr, const uint32_t *bitmap, uint32_t **savep)
+static __be32 encode_attr_bitmap(struct xdr_stream *xdr, const uint32_t *bitmap, uint32_t **savep)
 {
 	uint32_t bm[2];
 	uint32_t *p;
@@ -247,7 +247,7 @@ static unsigned encode_attr_bitmap(struct xdr_stream *xdr, const uint32_t *bitma
 	return 0;
 }
 
-static unsigned encode_attr_change(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t change)
+static __be32 encode_attr_change(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t change)
 {
 	uint32_t *p;
 
@@ -260,7 +260,7 @@ static unsigned encode_attr_change(struct xdr_stream *xdr, const uint32_t *bitma
 	return 0;
 }
 
-static unsigned encode_attr_size(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t size)
+static __be32 encode_attr_size(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t size)
 {
 	uint32_t *p;
 
@@ -273,7 +273,7 @@ static unsigned encode_attr_size(struct xdr_stream *xdr, const uint32_t *bitmap,
 	return 0;
 }
 
-static unsigned encode_attr_time(struct xdr_stream *xdr, const struct timespec *time)
+static __be32 encode_attr_time(struct xdr_stream *xdr, const struct timespec *time)
 {
 	uint32_t *p;
 
@@ -285,23 +285,23 @@ static unsigned encode_attr_time(struct xdr_stream *xdr, const struct timespec *
 	return 0;
 }
 
-static unsigned encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time)
+static __be32 encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time)
 {
 	if (!(bitmap[1] & FATTR4_WORD1_TIME_METADATA))
 		return 0;
 	return encode_attr_time(xdr,time);
 }
 
-static unsigned encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time)
+static __be32 encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time)
 {
 	if (!(bitmap[1] & FATTR4_WORD1_TIME_MODIFY))
 		return 0;
 	return encode_attr_time(xdr,time);
 }
 
-static unsigned encode_compound_hdr_res(struct xdr_stream *xdr, struct cb_compound_hdr_res *hdr)
+static __be32 encode_compound_hdr_res(struct xdr_stream *xdr, struct cb_compound_hdr_res *hdr)
 {
-	unsigned status;
+	__be32 status;
 
 	hdr->status = xdr_reserve_space(xdr, 4);
 	if (unlikely(hdr->status == NULL))
@@ -315,7 +315,7 @@ static unsigned encode_compound_hdr_res(struct xdr_stream *xdr, struct cb_compou
 	return 0;
 }
 
-static unsigned encode_op_hdr(struct xdr_stream *xdr, uint32_t op, uint32_t res)
+static __be32 encode_op_hdr(struct xdr_stream *xdr, uint32_t op, __be32 res)
 {
 	uint32_t *p;
 	
@@ -327,10 +327,10 @@ static unsigned encode_op_hdr(struct xdr_stream *xdr, uint32_t op, uint32_t res)
 	return 0;
 }
 
-static unsigned encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res)
+static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res)
 {
 	uint32_t *savep = NULL;
-	unsigned status = res->status;
+	__be32 status = res->status;
 	
 	if (unlikely(status != 0))
 		goto out;
@@ -353,15 +353,15 @@ out:
 	return status;
 }
 
-static unsigned process_op(struct svc_rqst *rqstp,
+static __be32 process_op(struct svc_rqst *rqstp,
 		struct xdr_stream *xdr_in, void *argp,
 		struct xdr_stream *xdr_out, void *resp)
 {
 	struct callback_op *op = &callback_ops[0];
 	unsigned int op_nr = OP_CB_ILLEGAL;
-	unsigned int status = 0;
+	__be32 status = 0;
 	long maxlen;
-	unsigned res;
+	__be32 res;
 
 	dprintk("%s: start\n", __FUNCTION__);
 	status = decode_op_hdr(xdr_in, &op_nr);
@@ -405,7 +405,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 	struct cb_compound_hdr_res hdr_res;
 	struct xdr_stream xdr_in, xdr_out;
 	uint32_t *p;
-	unsigned int status;
+	__be32 status;
 	unsigned int nops = 1;
 
 	dprintk("%s: start\n", __FUNCTION__);
-- 
cgit v1.2.3


From 5704fdeb41c9fb282ae576516f221ea0b8f64b2b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:51 -0700
Subject: [PATCH] xdr annotations: fs/nfs/callback*

on-the-wire data is big-endian

[mostly pulled from Alexey's patch]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/callback.h     |  4 ++--
 fs/nfs/callback_xdr.c | 44 ++++++++++++++++++++++----------------------
 2 files changed, 24 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 6921d82b850b..db3d7919c601 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -31,10 +31,10 @@ struct cb_compound_hdr_arg {
 };
 
 struct cb_compound_hdr_res {
-	uint32_t *status;
+	__be32 *status;
 	int taglen;
 	const char *tag;
-	uint32_t *nops;
+	__be32 *nops;
 };
 
 struct cb_getattrargs {
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 909a1408bcab..f8ea1f51f590 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -41,19 +41,19 @@ static __be32 nfs4_callback_null(struct svc_rqst *rqstp, void *argp, void *resp)
 	return htonl(NFS4_OK);
 }
 
-static int nfs4_decode_void(struct svc_rqst *rqstp, uint32_t *p, void *dummy)
+static int nfs4_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
 {
 	return xdr_argsize_check(rqstp, p);
 }
 
-static int nfs4_encode_void(struct svc_rqst *rqstp, uint32_t *p, void *dummy)
+static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
 {
 	return xdr_ressize_check(rqstp, p);
 }
 
-static uint32_t *read_buf(struct xdr_stream *xdr, int nbytes)
+static __be32 *read_buf(struct xdr_stream *xdr, int nbytes)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	p = xdr_inline_decode(xdr, nbytes);
 	if (unlikely(p == NULL))
@@ -63,7 +63,7 @@ static uint32_t *read_buf(struct xdr_stream *xdr, int nbytes)
 
 static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len, const char **str)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	p = read_buf(xdr, 4);
 	if (unlikely(p == NULL))
@@ -83,7 +83,7 @@ static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len, const cha
 
 static __be32 decode_fh(struct xdr_stream *xdr, struct nfs_fh *fh)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	p = read_buf(xdr, 4);
 	if (unlikely(p == NULL))
@@ -101,7 +101,7 @@ static __be32 decode_fh(struct xdr_stream *xdr, struct nfs_fh *fh)
 
 static __be32 decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
 {
-	uint32_t *p;
+	__be32 *p;
 	unsigned int attrlen;
 
 	p = read_buf(xdr, 4);
@@ -120,7 +120,7 @@ static __be32 decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
 
 static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	p = read_buf(xdr, 16);
 	if (unlikely(p == NULL))
@@ -131,7 +131,7 @@ static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
 
 static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound_hdr_arg *hdr)
 {
-	uint32_t *p;
+	__be32 *p;
 	unsigned int minor_version;
 	__be32 status;
 
@@ -161,7 +161,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
 
 static __be32 decode_op_hdr(struct xdr_stream *xdr, unsigned int *op)
 {
-	uint32_t *p;
+	__be32 *p;
 	p = read_buf(xdr, 4);
 	if (unlikely(p == NULL))
 		return htonl(NFS4ERR_RESOURCE);
@@ -185,7 +185,7 @@ out:
 
 static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallargs *args)
 {
-	uint32_t *p;
+	__be32 *p;
 	__be32 status;
 
 	args->addr = &rqstp->rq_addr;
@@ -206,7 +206,7 @@ out:
 
 static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	p = xdr_reserve_space(xdr, 4 + len);
 	if (unlikely(p == NULL))
@@ -217,10 +217,10 @@ static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char
 
 #define CB_SUPPORTED_ATTR0 (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE)
 #define CB_SUPPORTED_ATTR1 (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY)
-static __be32 encode_attr_bitmap(struct xdr_stream *xdr, const uint32_t *bitmap, uint32_t **savep)
+static __be32 encode_attr_bitmap(struct xdr_stream *xdr, const uint32_t *bitmap, __be32 **savep)
 {
-	uint32_t bm[2];
-	uint32_t *p;
+	__be32 bm[2];
+	__be32 *p;
 
 	bm[0] = htonl(bitmap[0] & CB_SUPPORTED_ATTR0);
 	bm[1] = htonl(bitmap[1] & CB_SUPPORTED_ATTR1);
@@ -249,7 +249,7 @@ static __be32 encode_attr_bitmap(struct xdr_stream *xdr, const uint32_t *bitmap,
 
 static __be32 encode_attr_change(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t change)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	if (!(bitmap[0] & FATTR4_WORD0_CHANGE))
 		return 0;
@@ -262,7 +262,7 @@ static __be32 encode_attr_change(struct xdr_stream *xdr, const uint32_t *bitmap,
 
 static __be32 encode_attr_size(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t size)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	if (!(bitmap[0] & FATTR4_WORD0_SIZE))
 		return 0;
@@ -275,7 +275,7 @@ static __be32 encode_attr_size(struct xdr_stream *xdr, const uint32_t *bitmap, u
 
 static __be32 encode_attr_time(struct xdr_stream *xdr, const struct timespec *time)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	p = xdr_reserve_space(xdr, 12);
 	if (unlikely(p == 0))
@@ -317,7 +317,7 @@ static __be32 encode_compound_hdr_res(struct xdr_stream *xdr, struct cb_compound
 
 static __be32 encode_op_hdr(struct xdr_stream *xdr, uint32_t op, __be32 res)
 {
-	uint32_t *p;
+	__be32 *p;
 	
 	p = xdr_reserve_space(xdr, 8);
 	if (unlikely(p == NULL))
@@ -329,7 +329,7 @@ static __be32 encode_op_hdr(struct xdr_stream *xdr, uint32_t op, __be32 res)
 
 static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res)
 {
-	uint32_t *savep = NULL;
+	__be32 *savep = NULL;
 	__be32 status = res->status;
 	
 	if (unlikely(status != 0))
@@ -404,7 +404,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 	struct cb_compound_hdr_arg hdr_arg;
 	struct cb_compound_hdr_res hdr_res;
 	struct xdr_stream xdr_in, xdr_out;
-	uint32_t *p;
+	__be32 *p;
 	__be32 status;
 	unsigned int nops = 1;
 
@@ -412,7 +412,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 
 	xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base);
 
-	p = (uint32_t*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len);
+	p = (__be32*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len);
 	xdr_init_encode(&xdr_out, &rqstp->rq_res, p);
 
 	decode_compound_hdr_arg(&xdr_in, &hdr_arg);
-- 
cgit v1.2.3


From bc4785cd475a11ba125df7af674e16c6ea1cfc30 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:51 -0700
Subject: [PATCH] nfs: verifier is network-endian

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/nfs3proc.c | 2 +-
 fs/nfs/nfs4proc.c | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 3b234d4601e7..e5f128ffc32d 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -668,7 +668,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 {
 	struct inode		*dir = dentry->d_inode;
 	struct nfs_fattr	dir_attr;
-	u32			*verf = NFS_COOKIEVERF(dir);
+	__be32			*verf = NFS_COOKIEVERF(dir);
 	struct nfs3_readdirargs	arg = {
 		.fh		= NFS_FH(dir),
 		.cookie		= cookie,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 8d09b47c91b9..8118036cc449 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -138,7 +138,7 @@ const u32 nfs4_fs_locations_bitmap[2] = {
 	| FATTR4_WORD1_MOUNTED_ON_FILEID
 };
 
-static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry,
+static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry,
 		struct nfs4_readdir_arg *readdir)
 {
 	__be32 *start, *p;
@@ -2915,11 +2915,11 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short po
 		.rpc_resp = clp,
 		.rpc_cred = cred,
 	};
-	u32 *p;
+	__be32 *p;
 	int loop = 0;
 	int status;
 
-	p = (u32*)sc_verifier.data;
+	p = (__be32*)sc_verifier.data;
 	*p++ = htonl((u32)clp->cl_boot_time.tv_sec);
 	*p = htonl((u32)clp->cl_boot_time.tv_nsec);
 
-- 
cgit v1.2.3


From d21ec0c33d0a9eb7a6f6c716008863a97797709e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:52 -0700
Subject: [PATCH] xdr annotations: mount_clnt

[pulled from Alexey's patch]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/mount_clnt.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index d507b021207f..f75fe72b4160 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -95,7 +95,7 @@ mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version,
  * XDR encode/decode functions for MOUNT
  */
 static int
-xdr_encode_dirpath(struct rpc_rqst *req, u32 *p, const char *path)
+xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path)
 {
 	p = xdr_encode_string(p, path);
 
@@ -104,7 +104,7 @@ xdr_encode_dirpath(struct rpc_rqst *req, u32 *p, const char *path)
 }
 
 static int
-xdr_decode_fhstatus(struct rpc_rqst *req, u32 *p, struct mnt_fhstatus *res)
+xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
 {
 	struct nfs_fh *fh = res->fh;
 
@@ -116,7 +116,7 @@ xdr_decode_fhstatus(struct rpc_rqst *req, u32 *p, struct mnt_fhstatus *res)
 }
 
 static int
-xdr_decode_fhstatus3(struct rpc_rqst *req, u32 *p, struct mnt_fhstatus *res)
+xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
 {
 	struct nfs_fh *fh = res->fh;
 
-- 
cgit v1.2.3


From 83bbe2ef63ec4f6a22aaaa0c03bd918b38300127 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:53 -0700
Subject: [PATCH] nfs_common endianness annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs_common/nfsacl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index 0c2be8c0307d..c11f5375d7c1 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -46,7 +46,7 @@ xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem)
 {
 	struct nfsacl_encode_desc *nfsacl_desc =
 		(struct nfsacl_encode_desc *) desc;
-	u32 *p = (u32 *) elem;
+	__be32 *p = elem;
 
 	struct posix_acl_entry *entry =
 		&nfsacl_desc->acl->a_entries[nfsacl_desc->count++];
@@ -127,7 +127,7 @@ xdr_nfsace_decode(struct xdr_array2_desc *desc, void *elem)
 {
 	struct nfsacl_decode_desc *nfsacl_desc =
 		(struct nfsacl_decode_desc *) desc;
-	u32 *p = (u32 *) elem;
+	__be32 *p = elem;
 	struct posix_acl_entry *entry;
 
 	if (!nfsacl_desc->acl) {
-- 
cgit v1.2.3


From 63f103111fdfc3cba00e4c94921d32362f375d93 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:54 -0700
Subject: [PATCH] nfsd: nfserrno() endianness annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfsproc.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 09030afd7249..03ab6822291f 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -579,11 +579,11 @@ struct svc_version	nfsd_version2 = {
 /*
  * Map errnos to NFS errnos.
  */
-int
+__be32
 nfserrno (int errno)
 {
 	static struct {
-		int	nfserr;
+		__be32	nfserr;
 		int	syserr;
 	} nfs_errtbl[] = {
 		{ nfs_ok, 0 },
@@ -615,11 +615,10 @@ nfserrno (int errno)
 		{ nfserr_badname, -ESRCH },
 		{ nfserr_io, -ETXTBSY },
 		{ nfserr_notsupp, -EOPNOTSUPP },
-		{ -1, -EIO }
 	};
 	int	i;
 
-	for (i = 0; nfs_errtbl[i].nfserr != -1; i++) {
+	for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) {
 		if (nfs_errtbl[i].syserr == errno)
 			return nfs_errtbl[i].nfserr;
 	}
-- 
cgit v1.2.3


From 83b11340d683a67a77e35a5ffb5ad4afbf0be4e5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:55 -0700
Subject: [PATCH] nfsfh simple endianness annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfsfh.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 501d83884530..727ab3bd450d 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -76,7 +76,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
  * comment in the NFSv3 spec says this is incorrect (implementation notes for
  * the write call).
  */
-static inline int
+static inline __be32
 nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int type)
 {
 	/* Type can be negative when creating hardlinks - not to a dir */
@@ -110,13 +110,13 @@ nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int type)
  * This is only called at the start of an nfsproc call, so fhp points to
  * a svc_fh which is all 0 except for the over-the-wire file handle.
  */
-u32
+__be32
 fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
 {
 	struct knfsd_fh	*fh = &fhp->fh_handle;
 	struct svc_export *exp = NULL;
 	struct dentry	*dentry;
-	u32		error = 0;
+	__be32		error = 0;
 
 	dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp));
 
@@ -315,7 +315,7 @@ static inline void _fh_update_old(struct dentry *dentry,
 		fh->ofh_dirino = 0;
 }
 
-int
+__be32
 fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, struct svc_fh *ref_fh)
 {
 	/* ref_fh is a reference file handle.
@@ -451,7 +451,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, st
  * Update file handle information after changing a dentry.
  * This is only called by nfsd_create, nfsd_create_v3 and nfsd_proc_create
  */
-int
+__be32
 fh_update(struct svc_fh *fhp)
 {
 	struct dentry *dentry;
-- 
cgit v1.2.3


From ad451d389f46f699832da3e9ad95f610cb8c0fd2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:55 -0700
Subject: [PATCH] xdr annotations: nfsd_dispatch()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfssvc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 013b38996e64..8067118b1c0c 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -495,8 +495,8 @@ nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp)
 {
 	struct svc_procedure	*proc;
 	kxdrproc_t		xdr;
-	u32			nfserr;
-	u32			*nfserrp;
+	__be32			nfserr;
+	__be32			*nfserrp;
 
 	dprintk("nfsd_dispatch: vers %d proc %d\n",
 				rqstp->rq_vers, rqstp->rq_proc);
@@ -515,7 +515,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp)
 
 	/* Decode arguments */
 	xdr = proc->pc_decode;
-	if (xdr && !xdr(rqstp, (u32*)rqstp->rq_arg.head[0].iov_base,
+	if (xdr && !xdr(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base,
 			rqstp->rq_argp)) {
 		dprintk("nfsd: failed to decode arguments!\n");
 		nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
@@ -528,7 +528,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp)
 	 */
 	nfserrp = rqstp->rq_res.head[0].iov_base
 		+ rqstp->rq_res.head[0].iov_len;
-	rqstp->rq_res.head[0].iov_len += sizeof(u32);
+	rqstp->rq_res.head[0].iov_len += sizeof(__be32);
 
 	/* Now call the procedure handler, and encode NFS status. */
 	nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
-- 
cgit v1.2.3


From 131a21c2177c267ab259fcd06947c6f593a7de8e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:56 -0700
Subject: [PATCH] xdr annotations: NFSv2 server

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs2acl.c | 18 +++++++-------
 fs/nfsd/nfsxdr.c  | 72 +++++++++++++++++++++++++++----------------------------
 2 files changed, 45 insertions(+), 45 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 8d48616882c1..fd5397d8c62a 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -158,7 +158,7 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessarg
 /*
  * XDR decode functions
  */
-static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, u32 *p,
+static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_getaclargs *argp)
 {
 	if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
@@ -169,7 +169,7 @@ static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 
-static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, u32 *p,
+static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_setaclargs *argp)
 {
 	struct kvec *head = rqstp->rq_arg.head;
@@ -194,7 +194,7 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, u32 *p,
 	return (n > 0);
 }
 
-static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, u32 *p,
+static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd_fhandle *argp)
 {
 	if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
@@ -202,7 +202,7 @@ static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, u32 *p,
 	return xdr_argsize_check(rqstp, p);
 }
 
-static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, u32 *p,
+static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_accessargs *argp)
 {
 	if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
@@ -217,7 +217,7 @@ static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, u32 *p,
  */
 
 /* GETACL */
-static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, u32 *p,
+static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_getaclres *resp)
 {
 	struct dentry *dentry = resp->fh.fh_dentry;
@@ -259,7 +259,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, u32 *p,
 	return 1;
 }
 
-static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, u32 *p,
+static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd_attrstat *resp)
 {
 	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh);
@@ -267,7 +267,7 @@ static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, u32 *p,
 }
 
 /* ACCESS */
-static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, u32 *p,
+static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_accessres *resp)
 {
 	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh);
@@ -278,7 +278,7 @@ static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, u32 *p,
 /*
  * XDR release functions
  */
-static int nfsaclsvc_release_getacl(struct svc_rqst *rqstp, u32 *p,
+static int nfsaclsvc_release_getacl(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_getaclres *resp)
 {
 	fh_put(&resp->fh);
@@ -287,7 +287,7 @@ static int nfsaclsvc_release_getacl(struct svc_rqst *rqstp, u32 *p,
 	return 1;
 }
 
-static int nfsaclsvc_release_fhandle(struct svc_rqst *rqstp, u32 *p,
+static int nfsaclsvc_release_fhandle(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd_fhandle *resp)
 {
 	fh_put(&resp->fh);
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 1135c0d14557..56ebb1443e0e 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -37,8 +37,8 @@ static u32	nfs_ftypes[] = {
 /*
  * XDR functions for basic NFS types
  */
-static u32 *
-decode_fh(u32 *p, struct svc_fh *fhp)
+static __be32 *
+decode_fh(__be32 *p, struct svc_fh *fhp)
 {
 	fh_init(fhp, NFS_FHSIZE);
 	memcpy(&fhp->fh_handle.fh_base, p, NFS_FHSIZE);
@@ -50,13 +50,13 @@ decode_fh(u32 *p, struct svc_fh *fhp)
 }
 
 /* Helper function for NFSv2 ACL code */
-u32 *nfs2svc_decode_fh(u32 *p, struct svc_fh *fhp)
+__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp)
 {
 	return decode_fh(p, fhp);
 }
 
-static inline u32 *
-encode_fh(u32 *p, struct svc_fh *fhp)
+static inline __be32 *
+encode_fh(__be32 *p, struct svc_fh *fhp)
 {
 	memcpy(p, &fhp->fh_handle.fh_base, NFS_FHSIZE);
 	return p + (NFS_FHSIZE>> 2);
@@ -66,8 +66,8 @@ encode_fh(u32 *p, struct svc_fh *fhp)
  * Decode a file name and make sure that the path contains
  * no slashes or null bytes.
  */
-static inline u32 *
-decode_filename(u32 *p, char **namp, int *lenp)
+static inline __be32 *
+decode_filename(__be32 *p, char **namp, int *lenp)
 {
 	char		*name;
 	int		i;
@@ -82,8 +82,8 @@ decode_filename(u32 *p, char **namp, int *lenp)
 	return p;
 }
 
-static inline u32 *
-decode_pathname(u32 *p, char **namp, int *lenp)
+static inline __be32 *
+decode_pathname(__be32 *p, char **namp, int *lenp)
 {
 	char		*name;
 	int		i;
@@ -98,8 +98,8 @@ decode_pathname(u32 *p, char **namp, int *lenp)
 	return p;
 }
 
-static inline u32 *
-decode_sattr(u32 *p, struct iattr *iap)
+static inline __be32 *
+decode_sattr(__be32 *p, struct iattr *iap)
 {
 	u32	tmp, tmp1;
 
@@ -151,8 +151,8 @@ decode_sattr(u32 *p, struct iattr *iap)
 	return p;
 }
 
-static u32 *
-encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp,
+static __be32 *
+encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
 	     struct kstat *stat)
 {
 	struct dentry	*dentry = fhp->fh_dentry;
@@ -195,7 +195,7 @@ encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp,
 }
 
 /* Helper function for NFSv2 ACL code */
-u32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
 {
 	struct kstat stat;
 	vfs_getattr(fhp->fh_export->ex_mnt, fhp->fh_dentry, &stat);
@@ -206,13 +206,13 @@ u32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
  * XDR decode functions
  */
 int
-nfssvc_decode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
 {
 	return xdr_argsize_check(rqstp, p);
 }
 
 int
-nfssvc_decode_fhandle(struct svc_rqst *rqstp, u32 *p, struct nfsd_fhandle *args)
+nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)
 {
 	if (!(p = decode_fh(p, &args->fh)))
 		return 0;
@@ -220,7 +220,7 @@ nfssvc_decode_fhandle(struct svc_rqst *rqstp, u32 *p, struct nfsd_fhandle *args)
 }
 
 int
-nfssvc_decode_sattrargs(struct svc_rqst *rqstp, u32 *p,
+nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_sattrargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh))
@@ -231,7 +231,7 @@ nfssvc_decode_sattrargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_decode_diropargs(struct svc_rqst *rqstp, u32 *p,
+nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_diropargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh))
@@ -242,7 +242,7 @@ nfssvc_decode_diropargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
+nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_readargs *args)
 {
 	unsigned int len;
@@ -273,7 +273,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
+nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_writeargs *args)
 {
 	unsigned int len;
@@ -303,7 +303,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_decode_createargs(struct svc_rqst *rqstp, u32 *p,
+nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_createargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh))
@@ -315,7 +315,7 @@ nfssvc_decode_createargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_decode_renameargs(struct svc_rqst *rqstp, u32 *p,
+nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_renameargs *args)
 {
 	if (!(p = decode_fh(p, &args->ffh))
@@ -328,7 +328,7 @@ nfssvc_decode_renameargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p, struct nfsd_readlinkargs *args)
+nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readlinkargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh)))
 		return 0;
@@ -338,7 +338,7 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p, struct nfsd_readlinka
 }
 
 int
-nfssvc_decode_linkargs(struct svc_rqst *rqstp, u32 *p,
+nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_linkargs *args)
 {
 	if (!(p = decode_fh(p, &args->ffh))
@@ -350,7 +350,7 @@ nfssvc_decode_linkargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, u32 *p,
+nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_symlinkargs *args)
 {
 	if (!(p = decode_fh(p, &args->ffh))
@@ -363,7 +363,7 @@ nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p,
+nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_readdirargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh)))
@@ -382,13 +382,13 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p,
  * XDR encode functions
  */
 int
-nfssvc_encode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+nfssvc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
 {
 	return xdr_ressize_check(rqstp, p);
 }
 
 int
-nfssvc_encode_attrstat(struct svc_rqst *rqstp, u32 *p,
+nfssvc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_attrstat *resp)
 {
 	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
@@ -396,7 +396,7 @@ nfssvc_encode_attrstat(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_encode_diropres(struct svc_rqst *rqstp, u32 *p,
+nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_diropres *resp)
 {
 	p = encode_fh(p, &resp->fh);
@@ -405,7 +405,7 @@ nfssvc_encode_diropres(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p,
+nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_readlinkres *resp)
 {
 	*p++ = htonl(resp->len);
@@ -421,7 +421,7 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_encode_readres(struct svc_rqst *rqstp, u32 *p,
+nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_readres *resp)
 {
 	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
@@ -440,7 +440,7 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_encode_readdirres(struct svc_rqst *rqstp, u32 *p,
+nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_readdirres *resp)
 {
 	xdr_ressize_check(rqstp, p);
@@ -453,7 +453,7 @@ nfssvc_encode_readdirres(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_encode_statfsres(struct svc_rqst *rqstp, u32 *p,
+nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_statfsres *resp)
 {
 	struct kstatfs	*stat = &resp->stats;
@@ -471,7 +471,7 @@ nfssvc_encode_entry(struct readdir_cd *ccd, const char *name,
 		    int namlen, loff_t offset, ino_t ino, unsigned int d_type)
 {
 	struct nfsd_readdirres *cd = container_of(ccd, struct nfsd_readdirres, common);
-	u32	*p = cd->buffer;
+	__be32	*p = cd->buffer;
 	int	buflen, slen;
 
 	/*
@@ -497,7 +497,7 @@ nfssvc_encode_entry(struct readdir_cd *ccd, const char *name,
 	*p++ = htonl((u32) ino);		/* file id */
 	p    = xdr_encode_array(p, name, namlen);/* name length & name */
 	cd->offset = p;			/* remember pointer */
-	*p++ = ~(u32) 0;		/* offset of next entry */
+	*p++ = htonl(~0U);		/* offset of next entry */
 
 	cd->buflen = buflen;
 	cd->buffer = p;
@@ -509,7 +509,7 @@ nfssvc_encode_entry(struct readdir_cd *ccd, const char *name,
  * XDR release functions
  */
 int
-nfssvc_release_fhandle(struct svc_rqst *rqstp, u32 *p,
+nfssvc_release_fhandle(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_fhandle *resp)
 {
 	fh_put(&resp->fh);
-- 
cgit v1.2.3


From 91f07168cef8e99dd16f608fbc703e7a5af0237f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:57 -0700
Subject: [PATCH] xdr annotations: NFSv3 server

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs3acl.c |  10 ++---
 fs/nfsd/nfs3xdr.c | 126 +++++++++++++++++++++++++++---------------------------
 2 files changed, 68 insertions(+), 68 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index ed6e2c27b5e8..78b2c83d00c5 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -122,7 +122,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
 /*
  * XDR decode functions
  */
-static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, u32 *p,
+static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_getaclargs *args)
 {
 	if (!(p = nfs3svc_decode_fh(p, &args->fh)))
@@ -133,7 +133,7 @@ static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 
-static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, u32 *p,
+static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_setaclargs *args)
 {
 	struct kvec *head = rqstp->rq_arg.head;
@@ -163,7 +163,7 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, u32 *p,
  */
 
 /* GETACL */
-static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, u32 *p,
+static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_getaclres *resp)
 {
 	struct dentry *dentry = resp->fh.fh_dentry;
@@ -208,7 +208,7 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, u32 *p,
 }
 
 /* SETACL */
-static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, u32 *p,
+static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_attrstat *resp)
 {
 	p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
@@ -219,7 +219,7 @@ static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, u32 *p,
 /*
  * XDR release functions
  */
-static int nfs3svc_release_getacl(struct svc_rqst *rqstp, u32 *p,
+static int nfs3svc_release_getacl(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_getaclres *resp)
 {
 	fh_put(&resp->fh);
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 247d518248bf..b4baca3053c3 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -42,23 +42,23 @@ static u32	nfs3_ftypes[] = {
 /*
  * XDR functions for basic NFS types
  */
-static inline u32 *
-encode_time3(u32 *p, struct timespec *time)
+static inline __be32 *
+encode_time3(__be32 *p, struct timespec *time)
 {
 	*p++ = htonl((u32) time->tv_sec); *p++ = htonl(time->tv_nsec);
 	return p;
 }
 
-static inline u32 *
-decode_time3(u32 *p, struct timespec *time)
+static inline __be32 *
+decode_time3(__be32 *p, struct timespec *time)
 {
 	time->tv_sec = ntohl(*p++);
 	time->tv_nsec = ntohl(*p++);
 	return p;
 }
 
-static inline u32 *
-decode_fh(u32 *p, struct svc_fh *fhp)
+static inline __be32 *
+decode_fh(__be32 *p, struct svc_fh *fhp)
 {
 	unsigned int size;
 	fh_init(fhp, NFS3_FHSIZE);
@@ -72,13 +72,13 @@ decode_fh(u32 *p, struct svc_fh *fhp)
 }
 
 /* Helper function for NFSv3 ACL code */
-u32 *nfs3svc_decode_fh(u32 *p, struct svc_fh *fhp)
+__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp)
 {
 	return decode_fh(p, fhp);
 }
 
-static inline u32 *
-encode_fh(u32 *p, struct svc_fh *fhp)
+static inline __be32 *
+encode_fh(__be32 *p, struct svc_fh *fhp)
 {
 	unsigned int size = fhp->fh_handle.fh_size;
 	*p++ = htonl(size);
@@ -91,8 +91,8 @@ encode_fh(u32 *p, struct svc_fh *fhp)
  * Decode a file name and make sure that the path contains
  * no slashes or null bytes.
  */
-static inline u32 *
-decode_filename(u32 *p, char **namp, int *lenp)
+static inline __be32 *
+decode_filename(__be32 *p, char **namp, int *lenp)
 {
 	char		*name;
 	int		i;
@@ -107,8 +107,8 @@ decode_filename(u32 *p, char **namp, int *lenp)
 	return p;
 }
 
-static inline u32 *
-decode_sattr3(u32 *p, struct iattr *iap)
+static inline __be32 *
+decode_sattr3(__be32 *p, struct iattr *iap)
 {
 	u32	tmp;
 
@@ -153,8 +153,8 @@ decode_sattr3(u32 *p, struct iattr *iap)
 	return p;
 }
 
-static inline u32 *
-encode_fattr3(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp,
+static inline __be32 *
+encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
 	      struct kstat *stat)
 {
 	struct dentry	*dentry = fhp->fh_dentry;
@@ -186,8 +186,8 @@ encode_fattr3(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp,
 	return p;
 }
 
-static inline u32 *
-encode_saved_post_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+static inline __be32 *
+encode_saved_post_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
 {
 	struct inode	*inode = fhp->fh_dentry->d_inode;
 
@@ -224,8 +224,8 @@ encode_saved_post_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
  * The inode may be NULL if the call failed because of a stale file
  * handle. In this case, no attributes are returned.
  */
-static u32 *
-encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+static __be32 *
+encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
 {
 	struct dentry *dentry = fhp->fh_dentry;
 	if (dentry && dentry->d_inode != NULL) {
@@ -243,8 +243,8 @@ encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
 }
 
 /* Helper for NFSv3 ACLs */
-u32 *
-nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+__be32 *
+nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
 {
 	return encode_post_op_attr(rqstp, p, fhp);
 }
@@ -252,8 +252,8 @@ nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
 /*
  * Enocde weak cache consistency data
  */
-static u32 *
-encode_wcc_data(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+static __be32 *
+encode_wcc_data(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
 {
 	struct dentry	*dentry = fhp->fh_dentry;
 
@@ -278,7 +278,7 @@ encode_wcc_data(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
  * XDR decode functions
  */
 int
-nfs3svc_decode_fhandle(struct svc_rqst *rqstp, u32 *p, struct nfsd_fhandle *args)
+nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)
 {
 	if (!(p = decode_fh(p, &args->fh)))
 		return 0;
@@ -286,7 +286,7 @@ nfs3svc_decode_fhandle(struct svc_rqst *rqstp, u32 *p, struct nfsd_fhandle *args
 }
 
 int
-nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_sattrargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh))
@@ -303,7 +303,7 @@ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_diropargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_diropargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh))
@@ -314,7 +314,7 @@ nfs3svc_decode_diropargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_accessargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_accessargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh)))
@@ -325,7 +325,7 @@ nfs3svc_decode_accessargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_readargs *args)
 {
 	unsigned int len;
@@ -355,7 +355,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_writeargs *args)
 {
 	unsigned int len, v, hdr;
@@ -393,7 +393,7 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_createargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_createargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh))
@@ -417,7 +417,7 @@ nfs3svc_decode_createargs(struct svc_rqst *rqstp, u32 *p,
 	return xdr_argsize_check(rqstp, p);
 }
 int
-nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_createargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh))
@@ -429,7 +429,7 @@ nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_symlinkargs *args)
 {
 	unsigned int len;
@@ -481,7 +481,7 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_mknodargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh))
@@ -505,7 +505,7 @@ nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_renameargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_renameargs *args)
 {
 	if (!(p = decode_fh(p, &args->ffh))
@@ -518,7 +518,7 @@ nfs3svc_decode_renameargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_readlinkargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh)))
@@ -530,7 +530,7 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_linkargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_linkargs *args)
 {
 	if (!(p = decode_fh(p, &args->ffh))
@@ -542,7 +542,7 @@ nfs3svc_decode_linkargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_readdirargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh)))
@@ -562,7 +562,7 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_readdirargs *args)
 {
 	int len, pn;
@@ -590,7 +590,7 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_commitargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_commitargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh)))
@@ -609,14 +609,14 @@ nfs3svc_decode_commitargs(struct svc_rqst *rqstp, u32 *p,
  * will work properly.
  */
 int
-nfs3svc_encode_voidres(struct svc_rqst *rqstp, u32 *p, void *dummy)
+nfs3svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
 {
 	return xdr_ressize_check(rqstp, p);
 }
 
 /* GETATTR */
 int
-nfs3svc_encode_attrstat(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_attrstat *resp)
 {
 	if (resp->status == 0)
@@ -626,7 +626,7 @@ nfs3svc_encode_attrstat(struct svc_rqst *rqstp, u32 *p,
 
 /* SETATTR, REMOVE, RMDIR */
 int
-nfs3svc_encode_wccstat(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_attrstat *resp)
 {
 	p = encode_wcc_data(rqstp, p, &resp->fh);
@@ -635,7 +635,7 @@ nfs3svc_encode_wccstat(struct svc_rqst *rqstp, u32 *p,
 
 /* LOOKUP */
 int
-nfs3svc_encode_diropres(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_diropres *resp)
 {
 	if (resp->status == 0) {
@@ -648,7 +648,7 @@ nfs3svc_encode_diropres(struct svc_rqst *rqstp, u32 *p,
 
 /* ACCESS */
 int
-nfs3svc_encode_accessres(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_accessres *resp)
 {
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
@@ -659,7 +659,7 @@ nfs3svc_encode_accessres(struct svc_rqst *rqstp, u32 *p,
 
 /* READLINK */
 int
-nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_readlinkres *resp)
 {
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
@@ -680,7 +680,7 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p,
 
 /* READ */
 int
-nfs3svc_encode_readres(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_readres *resp)
 {
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
@@ -704,7 +704,7 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, u32 *p,
 
 /* WRITE */
 int
-nfs3svc_encode_writeres(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_writeres *resp)
 {
 	p = encode_wcc_data(rqstp, p, &resp->fh);
@@ -719,7 +719,7 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, u32 *p,
 
 /* CREATE, MKDIR, SYMLINK, MKNOD */
 int
-nfs3svc_encode_createres(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_diropres *resp)
 {
 	if (resp->status == 0) {
@@ -733,7 +733,7 @@ nfs3svc_encode_createres(struct svc_rqst *rqstp, u32 *p,
 
 /* RENAME */
 int
-nfs3svc_encode_renameres(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_renameres *resp)
 {
 	p = encode_wcc_data(rqstp, p, &resp->ffh);
@@ -743,7 +743,7 @@ nfs3svc_encode_renameres(struct svc_rqst *rqstp, u32 *p,
 
 /* LINK */
 int
-nfs3svc_encode_linkres(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_linkres *resp)
 {
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
@@ -753,7 +753,7 @@ nfs3svc_encode_linkres(struct svc_rqst *rqstp, u32 *p,
 
 /* READDIR */
 int
-nfs3svc_encode_readdirres(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_readdirres *resp)
 {
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
@@ -776,8 +776,8 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, u32 *p,
 		return xdr_ressize_check(rqstp, p);
 }
 
-static inline u32 *
-encode_entry_baggage(struct nfsd3_readdirres *cd, u32 *p, const char *name,
+static inline __be32 *
+encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name,
 	     int namlen, ino_t ino)
 {
 	*p++ = xdr_one;				 /* mark entry present */
@@ -790,8 +790,8 @@ encode_entry_baggage(struct nfsd3_readdirres *cd, u32 *p, const char *name,
 	return p;
 }
 
-static inline u32 *
-encode_entryplus_baggage(struct nfsd3_readdirres *cd, u32 *p,
+static inline __be32 *
+encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p,
 		struct svc_fh *fhp)
 {
 		p = encode_post_op_attr(cd->rqstp, p, fhp);
@@ -853,7 +853,7 @@ encode_entry(struct readdir_cd *ccd, const char *name,
 {
 	struct nfsd3_readdirres *cd = container_of(ccd, struct nfsd3_readdirres,
 		       					common);
-	u32		*p = cd->buffer;
+	__be32		*p = cd->buffer;
 	caddr_t		curr_page_addr = NULL;
 	int		pn;		/* current page number */
 	int		slen;		/* string (name) length */
@@ -919,7 +919,7 @@ encode_entry(struct readdir_cd *ccd, const char *name,
 	} else if (cd->rqstp->rq_respages[pn+1] != NULL) {
 		/* temporarily encode entry into next page, then move back to
 		 * current and next page in rq_respages[] */
-		u32 *p1, *tmp;
+		__be32 *p1, *tmp;
 		int len1, len2;
 
 		/* grab next page for temporary storage of entry */
@@ -1009,7 +1009,7 @@ nfs3svc_encode_entry_plus(struct readdir_cd *cd, const char *name,
 
 /* FSSTAT */
 int
-nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_fsstatres *resp)
 {
 	struct kstatfs	*s = &resp->stats;
@@ -1031,7 +1031,7 @@ nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, u32 *p,
 
 /* FSINFO */
 int
-nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_fsinfores *resp)
 {
 	*p++ = xdr_zero;	/* no post_op_attr */
@@ -1055,7 +1055,7 @@ nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, u32 *p,
 
 /* PATHCONF */
 int
-nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_pathconfres *resp)
 {
 	*p++ = xdr_zero;	/* no post_op_attr */
@@ -1074,7 +1074,7 @@ nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, u32 *p,
 
 /* COMMIT */
 int
-nfs3svc_encode_commitres(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_commitres *resp)
 {
 	p = encode_wcc_data(rqstp, p, &resp->fh);
@@ -1090,7 +1090,7 @@ nfs3svc_encode_commitres(struct svc_rqst *rqstp, u32 *p,
  * XDR release functions
  */
 int
-nfs3svc_release_fhandle(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_release_fhandle(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_attrstat *resp)
 {
 	fh_put(&resp->fh);
@@ -1098,7 +1098,7 @@ nfs3svc_release_fhandle(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_release_fhandle2(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_release_fhandle2(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_fhandle_pair *resp)
 {
 	fh_put(&resp->fh1);
-- 
cgit v1.2.3


From 2ebbc012a9433a252be7ab4ce54e94bf7b21e506 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:58 -0700
Subject: [PATCH] xdr annotations: NFSv4 server

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c |  2 +-
 fs/nfsd/nfs4xdr.c  | 66 +++++++++++++++++++++++++++---------------------------
 2 files changed, 34 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 795ad6c5cb2c..ca6414248527 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -664,7 +664,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
 static int
 nfsd4_verify(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_verify *verify)
 {
-	u32 *buf, *p;
+	__be32 *buf, *p;
 	int count;
 	int status;
 
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 77be0c4785e6..3419d99aeb1a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -94,7 +94,7 @@ check_filename(char *str, int len, int err)
  * consistent with the style used in NFSv2/v3...
  */
 #define DECODE_HEAD				\
-	u32 *p;					\
+	__be32 *p;				\
 	int status
 #define DECODE_TAIL				\
 	status = 0;				\
@@ -144,13 +144,13 @@ xdr_error:					\
 	}					\
 } while (0)
 
-static u32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes)
+static __be32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes)
 {
 	/* We want more bytes than seem to be available.
 	 * Maybe we need a new page, maybe we have just run out
 	 */
 	int avail = (char*)argp->end - (char*)argp->p;
-	u32 *p;
+	__be32 *p;
 	if (avail + argp->pagelen < nbytes)
 		return NULL;
 	if (avail + PAGE_SIZE < nbytes) /* need more than a page !! */
@@ -197,7 +197,7 @@ defer_free(struct nfsd4_compoundargs *argp,
 	return 0;
 }
 
-static char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes)
+static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
 {
 	void *new = NULL;
 	if (p == argp->tmp) {
@@ -951,8 +951,8 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
 			argp->pagelen -= len;
 		}
 	}
-	argp->end = (u32*) (argp->rqstp->rq_vec[v].iov_base + argp->rqstp->rq_vec[v].iov_len);
-	argp->p = (u32*)  (argp->rqstp->rq_vec[v].iov_base + (XDR_QUADLEN(len) << 2));
+	argp->end = (__be32*) (argp->rqstp->rq_vec[v].iov_base + argp->rqstp->rq_vec[v].iov_len);
+	argp->p = (__be32*)  (argp->rqstp->rq_vec[v].iov_base + (XDR_QUADLEN(len) << 2));
 	argp->rqstp->rq_vec[v].iov_len = len;
 	write->wr_vlen = v+1;
 
@@ -1179,7 +1179,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
  * task to translate them into Linux-specific versions which are more
  * consistent with the style used in NFSv2/v3...
  */
-#define ENCODE_HEAD              u32 *p
+#define ENCODE_HEAD              __be32 *p
 
 #define WRITE32(n)               *p++ = htonl(n)
 #define WRITE64(n)               do {				\
@@ -1209,8 +1209,8 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
  * Header routine to setup seqid operation replay cache
  */
 #define ENCODE_SEQID_OP_HEAD					\
-	u32 *p;							\
-	u32 *save;						\
+	__be32 *p;						\
+	__be32 *save;						\
 								\
 	save = resp->p;
 
@@ -1235,10 +1235,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
  * seperated @sep.
  */
 static int nfsd4_encode_components(char sep, char *components,
-				   u32 **pp, int *buflen)
+				   __be32 **pp, int *buflen)
 {
-	u32 *p = *pp;
-	u32 *countp = p;
+	__be32 *p = *pp;
+	__be32 *countp = p;
 	int strlen, count=0;
 	char *str, *end;
 
@@ -1272,10 +1272,10 @@ static int nfsd4_encode_components(char sep, char *components,
  * encode a location element of a fs_locations structure
  */
 static int nfsd4_encode_fs_location4(struct nfsd4_fs_location *location,
-				    u32 **pp, int *buflen)
+				    __be32 **pp, int *buflen)
 {
 	int status;
-	u32 *p = *pp;
+	__be32 *p = *pp;
 
 	status = nfsd4_encode_components(':', location->hosts, &p, buflen);
 	if (status)
@@ -1320,11 +1320,11 @@ static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, u32 *sta
  */
 static int nfsd4_encode_fs_locations(struct svc_rqst *rqstp,
 				     struct svc_export *exp,
-				     u32 **pp, int *buflen)
+				     __be32 **pp, int *buflen)
 {
 	u32 status;
 	int i;
-	u32 *p = *pp;
+	__be32 *p = *pp;
 	struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs;
 	char *root = nfsd4_path(rqstp, exp, &status);
 
@@ -1355,7 +1355,7 @@ static u32 nfs4_ftypes[16] = {
 
 static int
 nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
-			u32 **p, int *buflen)
+			__be32 **p, int *buflen)
 {
 	int status;
 
@@ -1376,20 +1376,20 @@ nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
 }
 
 static inline int
-nfsd4_encode_user(struct svc_rqst *rqstp, uid_t uid, u32 **p, int *buflen)
+nfsd4_encode_user(struct svc_rqst *rqstp, uid_t uid, __be32 **p, int *buflen)
 {
 	return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, uid, 0, p, buflen);
 }
 
 static inline int
-nfsd4_encode_group(struct svc_rqst *rqstp, uid_t gid, u32 **p, int *buflen)
+nfsd4_encode_group(struct svc_rqst *rqstp, uid_t gid, __be32 **p, int *buflen)
 {
 	return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, gid, 1, p, buflen);
 }
 
 static inline int
 nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
-		u32 **p, int *buflen)
+		__be32 **p, int *buflen)
 {
 	return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen);
 }
@@ -1423,7 +1423,7 @@ static int fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err)
  */
 int
 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
-		struct dentry *dentry, u32 *buffer, int *countp, u32 *bmval,
+		struct dentry *dentry, __be32 *buffer, int *countp, u32 *bmval,
 		struct svc_rqst *rqstp)
 {
 	u32 bmval0 = bmval[0];
@@ -1432,11 +1432,11 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 	struct svc_fh tempfh;
 	struct kstatfs statfs;
 	int buflen = *countp << 2;
-	u32 *attrlenp;
+	__be32 *attrlenp;
 	u32 dummy;
 	u64 dummy64;
 	u32 rdattr_err = 0;
-	u32 *p = buffer;
+	__be32 *p = buffer;
 	int status;
 	int aclsupport = 0;
 	struct nfs4_acl *acl = NULL;
@@ -1831,7 +1831,7 @@ out_serverfault:
 
 static int
 nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
-		const char *name, int namlen, u32 *p, int *buflen)
+		const char *name, int namlen, __be32 *p, int *buflen)
 {
 	struct svc_export *exp = cd->rd_fhp->fh_export;
 	struct dentry *dentry;
@@ -1864,10 +1864,10 @@ out_put:
 	return nfserr;
 }
 
-static u32 *
-nfsd4_encode_rdattr_error(u32 *p, int buflen, int nfserr)
+static __be32 *
+nfsd4_encode_rdattr_error(__be32 *p, int buflen, int nfserr)
 {
-	u32 *attrlenp;
+	__be32 *attrlenp;
 
 	if (buflen < 6)
 		return NULL;
@@ -1887,7 +1887,7 @@ nfsd4_encode_dirent(struct readdir_cd *ccd, const char *name, int namlen,
 {
 	struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
 	int buflen;
-	u32 *p = cd->buffer;
+	__be32 *p = cd->buffer;
 	int nfserr = nfserr_toosmall;
 
 	/* In nfsv4, "." and ".." never make it onto the wire.. */
@@ -2321,7 +2321,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
 {
 	int maxcount;
 	loff_t offset;
-	u32 *page, *savep, *tailbase;
+	__be32 *page, *savep, *tailbase;
 	ENCODE_HEAD;
 
 	if (nfserr)
@@ -2479,7 +2479,7 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_writ
 void
 nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
 {
-	u32 *statp;
+	__be32 *statp;
 	ENCODE_HEAD;
 
 	RESERVE_SPACE(8);
@@ -2617,7 +2617,7 @@ nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
  */
 
 int
-nfs4svc_encode_voidres(struct svc_rqst *rqstp, u32 *p, void *dummy)
+nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
 {
         return xdr_ressize_check(rqstp, p);
 }
@@ -2639,7 +2639,7 @@ void nfsd4_release_compoundargs(struct nfsd4_compoundargs *args)
 }
 
 int
-nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, u32 *p, struct nfsd4_compoundargs *args)
+nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundargs *args)
 {
 	int status;
 
@@ -2660,7 +2660,7 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, u32 *p, struct nfsd4_compoun
 }
 
 int
-nfs4svc_encode_compoundres(struct svc_rqst *rqstp, u32 *p, struct nfsd4_compoundres *resp)
+nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundres *resp)
 {
 	/*
 	 * All that remains is to write the tag and operation count...
-- 
cgit v1.2.3


From 6264d69d7df654ca64f625e9409189a0e50734e9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:58 -0700
Subject: [PATCH] nfsd: vfs.c endianness annotations

don't use the same variable to store NFS and host error values

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/vfs.c | 299 ++++++++++++++++++++++++++++++----------------------------
 1 file changed, 157 insertions(+), 142 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 1141bd29e4e3..f21e917bb8ed 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -110,7 +110,7 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
 	struct dentry *dentry = *dpp;
 	struct vfsmount *mnt = mntget(exp->ex_mnt);
 	struct dentry *mounts = dget(dentry);
-	int err = nfs_ok;
+	int err = 0;
 
 	while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts));
 
@@ -148,14 +148,15 @@ out:
  *   clients and is explicitly disallowed for NFSv3
  *      NeilBrown <neilb@cse.unsw.edu.au>
  */
-int
+__be32
 nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
 					int len, struct svc_fh *resfh)
 {
 	struct svc_export	*exp;
 	struct dentry		*dparent;
 	struct dentry		*dentry;
-	int			err;
+	__be32			err;
+	int			host_err;
 
 	dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
 
@@ -193,7 +194,7 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
 			exp2 = exp_parent(exp->ex_client, mnt, dentry,
 					  &rqstp->rq_chandle);
 			if (IS_ERR(exp2)) {
-				err = PTR_ERR(exp2);
+				host_err = PTR_ERR(exp2);
 				dput(dentry);
 				mntput(mnt);
 				goto out_nfserr;
@@ -210,14 +211,14 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
 	} else {
 		fh_lock(fhp);
 		dentry = lookup_one_len(name, dparent, len);
-		err = PTR_ERR(dentry);
+		host_err = PTR_ERR(dentry);
 		if (IS_ERR(dentry))
 			goto out_nfserr;
 		/*
 		 * check if we have crossed a mount point ...
 		 */
 		if (d_mountpoint(dentry)) {
-			if ((err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
+			if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
 				dput(dentry);
 				goto out_nfserr;
 			}
@@ -236,7 +237,7 @@ out:
 	return err;
 
 out_nfserr:
-	err = nfserrno(err);
+	err = nfserrno(host_err);
 	goto out;
 }
 
@@ -244,7 +245,7 @@ out_nfserr:
  * Set various file attributes.
  * N.B. After this call fhp needs an fh_put
  */
-int
+__be32
 nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 	     int check_guard, time_t guardtime)
 {
@@ -253,7 +254,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 	int		accmode = MAY_SATTR;
 	int		ftype = 0;
 	int		imode;
-	int		err;
+	__be32		err;
+	int		host_err;
 	int		size_change = 0;
 
 	if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
@@ -319,19 +321,19 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 		 * If we are changing the size of the file, then
 		 * we need to break all leases.
 		 */
-		err = break_lease(inode, FMODE_WRITE | O_NONBLOCK);
-		if (err == -EWOULDBLOCK)
-			err = -ETIMEDOUT;
-		if (err) /* ENOMEM or EWOULDBLOCK */
+		host_err = break_lease(inode, FMODE_WRITE | O_NONBLOCK);
+		if (host_err == -EWOULDBLOCK)
+			host_err = -ETIMEDOUT;
+		if (host_err) /* ENOMEM or EWOULDBLOCK */
 			goto out_nfserr;
 
-		err = get_write_access(inode);
-		if (err)
+		host_err = get_write_access(inode);
+		if (host_err)
 			goto out_nfserr;
 
 		size_change = 1;
-		err = locks_verify_truncate(inode, NULL, iap->ia_size);
-		if (err) {
+		host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
+		if (host_err) {
 			put_write_access(inode);
 			goto out_nfserr;
 		}
@@ -357,8 +359,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 	err = nfserr_notsync;
 	if (!check_guard || guardtime == inode->i_ctime.tv_sec) {
 		fh_lock(fhp);
-		err = notify_change(dentry, iap);
-		err = nfserrno(err);
+		host_err = notify_change(dentry, iap);
+		err = nfserrno(host_err);
 		fh_unlock(fhp);
 	}
 	if (size_change)
@@ -370,7 +372,7 @@ out:
 	return err;
 
 out_nfserr:
-	err = nfserrno(err);
+	err = nfserrno(host_err);
 	goto out;
 }
 
@@ -420,11 +422,12 @@ out:
 	return error;
 }
 
-int
+__be32
 nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
     struct nfs4_acl *acl)
 {
-	int error;
+	__be32 error;
+	int host_error;
 	struct dentry *dentry;
 	struct inode *inode;
 	struct posix_acl *pacl = NULL, *dpacl = NULL;
@@ -440,20 +443,20 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (S_ISDIR(inode->i_mode))
 		flags = NFS4_ACL_DIR;
 
-	error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags);
-	if (error == -EINVAL) {
+	host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags);
+	if (host_error == -EINVAL) {
 		error = nfserr_attrnotsupp;
 		goto out;
-	} else if (error < 0)
+	} else if (host_error < 0)
 		goto out_nfserr;
 
-	error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
-	if (error < 0)
+	host_error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
+	if (host_error < 0)
 		goto out_nfserr;
 
 	if (S_ISDIR(inode->i_mode)) {
-		error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT);
-		if (error < 0)
+		host_error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT);
+		if (host_error < 0)
 			goto out_nfserr;
 	}
 
@@ -464,7 +467,7 @@ out:
 	posix_acl_release(dpacl);
 	return (error);
 out_nfserr:
-	error = nfserrno(error);
+	error = nfserrno(host_error);
 	goto out;
 }
 
@@ -571,14 +574,14 @@ static struct accessmap	nfs3_anyaccess[] = {
     {	0,			0				}
 };
 
-int
+__be32
 nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *supported)
 {
 	struct accessmap	*map;
 	struct svc_export	*export;
 	struct dentry		*dentry;
 	u32			query, result = 0, sresult = 0;
-	unsigned int		error;
+	__be32			error;
 
 	error = fh_verify(rqstp, fhp, 0, MAY_NOP);
 	if (error)
@@ -598,7 +601,7 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
 	query = *access;
 	for  (; map->access; map++) {
 		if (map->access & query) {
-			unsigned int err2;
+			__be32 err2;
 
 			sresult |= map->access;
 
@@ -637,13 +640,15 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
  * The access argument indicates the type of open (read/write/lock)
  * N.B. After this call fhp needs an fh_put
  */
-int
+__be32
 nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 			int access, struct file **filp)
 {
 	struct dentry	*dentry;
 	struct inode	*inode;
-	int		flags = O_RDONLY|O_LARGEFILE, err;
+	int		flags = O_RDONLY|O_LARGEFILE;
+	__be32		err;
+	int		host_err;
 
 	/*
 	 * If we get here, then the client has already done an "open",
@@ -673,10 +678,10 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 	 * Check to see if there are any leases on this file.
 	 * This may block while leases are broken.
 	 */
-	err = break_lease(inode, O_NONBLOCK | ((access & MAY_WRITE) ? FMODE_WRITE : 0));
-	if (err == -EWOULDBLOCK)
-		err = -ETIMEDOUT;
-	if (err) /* NOMEM or WOULDBLOCK */
+	host_err = break_lease(inode, O_NONBLOCK | ((access & MAY_WRITE) ? FMODE_WRITE : 0));
+	if (host_err == -EWOULDBLOCK)
+		host_err = -ETIMEDOUT;
+	if (host_err) /* NOMEM or WOULDBLOCK */
 		goto out_nfserr;
 
 	if (access & MAY_WRITE) {
@@ -689,10 +694,9 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 	}
 	*filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_mnt), flags);
 	if (IS_ERR(*filp))
-		err = PTR_ERR(*filp);
+		host_err = PTR_ERR(*filp);
 out_nfserr:
-	if (err)
-		err = nfserrno(err);
+	err = nfserrno(host_err);
 out:
 	return err;
 }
@@ -830,14 +834,15 @@ nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset
 	return size;
 }
 
-static int
+static __be32
 nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
               loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
 {
 	struct inode *inode;
 	struct raparms	*ra;
 	mm_segment_t	oldfs;
-	int		err;
+	__be32		err;
+	int		host_err;
 
 	err = nfserr_perm;
 	inode = file->f_dentry->d_inode;
@@ -855,12 +860,12 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 
 	if (file->f_op->sendfile && rqstp->rq_sendfile_ok) {
 		rqstp->rq_resused = 1;
-		err = file->f_op->sendfile(file, &offset, *count,
+		host_err = file->f_op->sendfile(file, &offset, *count,
 						 nfsd_read_actor, rqstp);
 	} else {
 		oldfs = get_fs();
 		set_fs(KERNEL_DS);
-		err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset);
+		host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset);
 		set_fs(oldfs);
 	}
 
@@ -874,13 +879,13 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 		spin_unlock(&rab->pb_lock);
 	}
 
-	if (err >= 0) {
-		nfsdstats.io_read += err;
-		*count = err;
+	if (host_err >= 0) {
+		nfsdstats.io_read += host_err;
+		*count = host_err;
 		err = 0;
 		fsnotify_access(file->f_dentry);
 	} else 
-		err = nfserrno(err);
+		err = nfserrno(host_err);
 out:
 	return err;
 }
@@ -895,7 +900,7 @@ static void kill_suid(struct dentry *dentry)
 	mutex_unlock(&dentry->d_inode->i_mutex);
 }
 
-static int
+static __be32
 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 				loff_t offset, struct kvec *vec, int vlen,
 	   			unsigned long cnt, int *stablep)
@@ -904,7 +909,8 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 	struct dentry		*dentry;
 	struct inode		*inode;
 	mm_segment_t		oldfs;
-	int			err = 0;
+	__be32			err = 0;
+	int			host_err;
 	int			stable = *stablep;
 
 #ifdef MSNFS
@@ -940,18 +946,18 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 
 	/* Write the data. */
 	oldfs = get_fs(); set_fs(KERNEL_DS);
-	err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
+	host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
 	set_fs(oldfs);
-	if (err >= 0) {
+	if (host_err >= 0) {
 		nfsdstats.io_write += cnt;
 		fsnotify_modify(file->f_dentry);
 	}
 
 	/* clear setuid/setgid flag after write */
-	if (err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID)))
+	if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID)))
 		kill_suid(dentry);
 
-	if (err >= 0 && stable) {
+	if (host_err >= 0 && stable) {
 		static ino_t	last_ino;
 		static dev_t	last_dev;
 
@@ -977,7 +983,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 
 			if (inode->i_state & I_DIRTY) {
 				dprintk("nfsd: write sync %d\n", current->pid);
-				err=nfsd_sync(file);
+				host_err=nfsd_sync(file);
 			}
 #if 0
 			wake_up(&inode->i_wait);
@@ -987,11 +993,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 		last_dev = inode->i_sb->s_dev;
 	}
 
-	dprintk("nfsd: write complete err=%d\n", err);
-	if (err >= 0)
+	dprintk("nfsd: write complete host_err=%d\n", host_err);
+	if (host_err >= 0)
 		err = 0;
 	else 
-		err = nfserrno(err);
+		err = nfserrno(host_err);
 out:
 	return err;
 }
@@ -1001,12 +1007,12 @@ out:
  * on entry. On return, *count contains the number of bytes actually read.
  * N.B. After this call fhp needs an fh_put
  */
-int
+__be32
 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 		loff_t offset, struct kvec *vec, int vlen,
 		unsigned long *count)
 {
-	int		err;
+	__be32		err;
 
 	if (file) {
 		err = nfsd_permission(fhp->fh_export, fhp->fh_dentry,
@@ -1030,12 +1036,12 @@ out:
  * The stable flag requests synchronous writes.
  * N.B. After this call fhp needs an fh_put
  */
-int
+__be32
 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 		loff_t offset, struct kvec *vec, int vlen, unsigned long cnt,
 		int *stablep)
 {
-	int			err = 0;
+	__be32			err = 0;
 
 	if (file) {
 		err = nfsd_permission(fhp->fh_export, fhp->fh_dentry,
@@ -1067,12 +1073,12 @@ out:
  * Unfortunately we cannot lock the file to make sure we return full WCC
  * data to the client, as locking happens lower down in the filesystem.
  */
-int
+__be32
 nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
                loff_t offset, unsigned long count)
 {
 	struct file	*file;
-	int		err;
+	__be32		err;
 
 	if ((u64)count > ~(u64)offset)
 		return nfserr_inval;
@@ -1100,14 +1106,15 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
  *
  * N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp
  */
-int
+__be32
 nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		char *fname, int flen, struct iattr *iap,
 		int type, dev_t rdev, struct svc_fh *resfhp)
 {
 	struct dentry	*dentry, *dchild = NULL;
 	struct inode	*dirp;
-	int		err;
+	__be32		err;
+	int		host_err;
 
 	err = nfserr_perm;
 	if (!flen)
@@ -1134,7 +1141,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		/* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
 		fh_lock_nested(fhp, I_MUTEX_PARENT);
 		dchild = lookup_one_len(fname, dentry, flen);
-		err = PTR_ERR(dchild);
+		host_err = PTR_ERR(dchild);
 		if (IS_ERR(dchild))
 			goto out_nfserr;
 		err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
@@ -1173,22 +1180,22 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	err = nfserr_perm;
 	switch (type) {
 	case S_IFREG:
-		err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
+		host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
 		break;
 	case S_IFDIR:
-		err = vfs_mkdir(dirp, dchild, iap->ia_mode);
+		host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
 		break;
 	case S_IFCHR:
 	case S_IFBLK:
 	case S_IFIFO:
 	case S_IFSOCK:
-		err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
+		host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
 		break;
 	default:
 	        printk("nfsd: bad file type %o in nfsd_create\n", type);
-		err = -EINVAL;
+		host_err = -EINVAL;
 	}
-	if (err < 0)
+	if (host_err < 0)
 		goto out_nfserr;
 
 	if (EX_ISSYNC(fhp->fh_export)) {
@@ -1203,7 +1210,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	 * directories via NFS.
 	 */
 	if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) {
-		int err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
+		__be32 err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
 		if (err2)
 			err = err2;
 	}
@@ -1218,7 +1225,7 @@ out:
 	return err;
 
 out_nfserr:
-	err = nfserrno(err);
+	err = nfserrno(host_err);
 	goto out;
 }
 
@@ -1226,7 +1233,7 @@ out_nfserr:
 /*
  * NFSv3 version of nfsd_create
  */
-int
+__be32
 nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		char *fname, int flen, struct iattr *iap,
 		struct svc_fh *resfhp, int createmode, u32 *verifier,
@@ -1234,7 +1241,8 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 {
 	struct dentry	*dentry, *dchild = NULL;
 	struct inode	*dirp;
-	int		err;
+	__be32		err;
+	int		host_err;
 	__u32		v_mtime=0, v_atime=0;
 	int		v_mode=0;
 
@@ -1264,7 +1272,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	 * Compose the response file handle.
 	 */
 	dchild = lookup_one_len(fname, dentry, flen);
-	err = PTR_ERR(dchild);
+	host_err = PTR_ERR(dchild);
 	if (IS_ERR(dchild))
 		goto out_nfserr;
 
@@ -1320,8 +1328,8 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		goto out;
 	}
 
-	err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
-	if (err < 0)
+	host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
+	if (host_err < 0)
 		goto out_nfserr;
 
 	if (EX_ISSYNC(fhp->fh_export)) {
@@ -1350,7 +1358,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	 */
  set_attr:
 	if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID)) != 0) {
- 		int err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
+ 		__be32 err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
 		if (err2)
 			err = err2;
 	}
@@ -1368,7 +1376,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
  	return err;
  
  out_nfserr:
-	err = nfserrno(err);
+	err = nfserrno(host_err);
 	goto out;
 }
 #endif /* CONFIG_NFSD_V3 */
@@ -1378,13 +1386,14 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
  * fits into the buffer. On return, it contains the true length.
  * N.B. After this call fhp needs an fh_put
  */
-int
+__be32
 nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
 {
 	struct dentry	*dentry;
 	struct inode	*inode;
 	mm_segment_t	oldfs;
-	int		err;
+	__be32		err;
+	int		host_err;
 
 	err = fh_verify(rqstp, fhp, S_IFLNK, MAY_NOP);
 	if (err)
@@ -1403,18 +1412,18 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
 	 */
 
 	oldfs = get_fs(); set_fs(KERNEL_DS);
-	err = inode->i_op->readlink(dentry, buf, *lenp);
+	host_err = inode->i_op->readlink(dentry, buf, *lenp);
 	set_fs(oldfs);
 
-	if (err < 0)
+	if (host_err < 0)
 		goto out_nfserr;
-	*lenp = err;
+	*lenp = host_err;
 	err = 0;
 out:
 	return err;
 
 out_nfserr:
-	err = nfserrno(err);
+	err = nfserrno(host_err);
 	goto out;
 }
 
@@ -1422,7 +1431,7 @@ out_nfserr:
  * Create a symlink and look up its inode
  * N.B. After this call _both_ fhp and resfhp need an fh_put
  */
-int
+__be32
 nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
 				char *fname, int flen,
 				char *path,  int plen,
@@ -1430,7 +1439,8 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
 				struct iattr *iap)
 {
 	struct dentry	*dentry, *dnew;
-	int		err, cerr;
+	__be32		err, cerr;
+	int		host_err;
 	umode_t		mode;
 
 	err = nfserr_noent;
@@ -1446,7 +1456,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	fh_lock(fhp);
 	dentry = fhp->fh_dentry;
 	dnew = lookup_one_len(fname, dentry, flen);
-	err = PTR_ERR(dnew);
+	host_err = PTR_ERR(dnew);
 	if (IS_ERR(dnew))
 		goto out_nfserr;
 
@@ -1458,21 +1468,21 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (unlikely(path[plen] != 0)) {
 		char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
 		if (path_alloced == NULL)
-			err = -ENOMEM;
+			host_err = -ENOMEM;
 		else {
 			strncpy(path_alloced, path, plen);
 			path_alloced[plen] = 0;
-			err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode);
+			host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode);
 			kfree(path_alloced);
 		}
 	} else
-		err = vfs_symlink(dentry->d_inode, dnew, path, mode);
+		host_err = vfs_symlink(dentry->d_inode, dnew, path, mode);
 
-	if (!err)
+	if (!host_err) {
 		if (EX_ISSYNC(fhp->fh_export))
-			err = nfsd_sync_dir(dentry);
-	if (err)
-		err = nfserrno(err);
+			host_err = nfsd_sync_dir(dentry);
+	}
+	err = nfserrno(host_err);
 	fh_unlock(fhp);
 
 	cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
@@ -1482,7 +1492,7 @@ out:
 	return err;
 
 out_nfserr:
-	err = nfserrno(err);
+	err = nfserrno(host_err);
 	goto out;
 }
 
@@ -1490,13 +1500,14 @@ out_nfserr:
  * Create a hardlink
  * N.B. After this call _both_ ffhp and tfhp need an fh_put
  */
-int
+__be32
 nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
 				char *name, int len, struct svc_fh *tfhp)
 {
 	struct dentry	*ddir, *dnew, *dold;
 	struct inode	*dirp, *dest;
-	int		err;
+	__be32		err;
+	int		host_err;
 
 	err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_CREATE);
 	if (err)
@@ -1517,24 +1528,25 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
 	dirp = ddir->d_inode;
 
 	dnew = lookup_one_len(name, ddir, len);
-	err = PTR_ERR(dnew);
+	host_err = PTR_ERR(dnew);
 	if (IS_ERR(dnew))
 		goto out_nfserr;
 
 	dold = tfhp->fh_dentry;
 	dest = dold->d_inode;
 
-	err = vfs_link(dold, dirp, dnew);
-	if (!err) {
+	host_err = vfs_link(dold, dirp, dnew);
+	if (!host_err) {
 		if (EX_ISSYNC(ffhp->fh_export)) {
 			err = nfserrno(nfsd_sync_dir(ddir));
 			write_inode_now(dest, 1);
 		}
+		err = 0;
 	} else {
-		if (err == -EXDEV && rqstp->rq_vers == 2)
+		if (host_err == -EXDEV && rqstp->rq_vers == 2)
 			err = nfserr_acces;
 		else
-			err = nfserrno(err);
+			err = nfserrno(host_err);
 	}
 
 	dput(dnew);
@@ -1544,7 +1556,7 @@ out:
 	return err;
 
 out_nfserr:
-	err = nfserrno(err);
+	err = nfserrno(host_err);
 	goto out_unlock;
 }
 
@@ -1552,13 +1564,14 @@ out_nfserr:
  * Rename a file
  * N.B. After this call _both_ ffhp and tfhp need an fh_put
  */
-int
+__be32
 nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
 			    struct svc_fh *tfhp, char *tname, int tlen)
 {
 	struct dentry	*fdentry, *tdentry, *odentry, *ndentry, *trap;
 	struct inode	*fdir, *tdir;
-	int		err;
+	__be32		err;
+	int		host_err;
 
 	err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_REMOVE);
 	if (err)
@@ -1589,22 +1602,22 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
 	fill_pre_wcc(tfhp);
 
 	odentry = lookup_one_len(fname, fdentry, flen);
-	err = PTR_ERR(odentry);
+	host_err = PTR_ERR(odentry);
 	if (IS_ERR(odentry))
 		goto out_nfserr;
 
-	err = -ENOENT;
+	host_err = -ENOENT;
 	if (!odentry->d_inode)
 		goto out_dput_old;
-	err = -EINVAL;
+	host_err = -EINVAL;
 	if (odentry == trap)
 		goto out_dput_old;
 
 	ndentry = lookup_one_len(tname, tdentry, tlen);
-	err = PTR_ERR(ndentry);
+	host_err = PTR_ERR(ndentry);
 	if (IS_ERR(ndentry))
 		goto out_dput_old;
-	err = -ENOTEMPTY;
+	host_err = -ENOTEMPTY;
 	if (ndentry == trap)
 		goto out_dput_new;
 
@@ -1612,14 +1625,14 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
 	if ((ffhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
 		((atomic_read(&odentry->d_count) > 1)
 		 || (atomic_read(&ndentry->d_count) > 1))) {
-			err = -EPERM;
+			host_err = -EPERM;
 	} else
 #endif
-	err = vfs_rename(fdir, odentry, tdir, ndentry);
-	if (!err && EX_ISSYNC(tfhp->fh_export)) {
-		err = nfsd_sync_dir(tdentry);
-		if (!err)
-			err = nfsd_sync_dir(fdentry);
+	host_err = vfs_rename(fdir, odentry, tdir, ndentry);
+	if (!host_err && EX_ISSYNC(tfhp->fh_export)) {
+		host_err = nfsd_sync_dir(tdentry);
+		if (!host_err)
+			host_err = nfsd_sync_dir(fdentry);
 	}
 
  out_dput_new:
@@ -1627,8 +1640,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
  out_dput_old:
 	dput(odentry);
  out_nfserr:
-	if (err)
-		err = nfserrno(err);
+	err = nfserrno(host_err);
 
 	/* we cannot reply on fh_unlock on the two filehandles,
 	 * as that would do the wrong thing if the two directories
@@ -1647,13 +1659,14 @@ out:
  * Unlink a file or directory
  * N.B. After this call fhp needs an fh_put
  */
-int
+__be32
 nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 				char *fname, int flen)
 {
 	struct dentry	*dentry, *rdentry;
 	struct inode	*dirp;
-	int		err;
+	__be32		err;
+	int		host_err;
 
 	err = nfserr_acces;
 	if (!flen || isdotent(fname, flen))
@@ -1667,7 +1680,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 	dirp = dentry->d_inode;
 
 	rdentry = lookup_one_len(fname, dentry, flen);
-	err = PTR_ERR(rdentry);
+	host_err = PTR_ERR(rdentry);
 	if (IS_ERR(rdentry))
 		goto out_nfserr;
 
@@ -1684,22 +1697,23 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 #ifdef MSNFS
 		if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
 			(atomic_read(&rdentry->d_count) > 1)) {
-			err = -EPERM;
+			host_err = -EPERM;
 		} else
 #endif
-		err = vfs_unlink(dirp, rdentry);
+		host_err = vfs_unlink(dirp, rdentry);
 	} else { /* It's RMDIR */
-		err = vfs_rmdir(dirp, rdentry);
+		host_err = vfs_rmdir(dirp, rdentry);
 	}
 
 	dput(rdentry);
 
-	if (err == 0 &&
-	    EX_ISSYNC(fhp->fh_export))
-			err = nfsd_sync_dir(dentry);
+	if (host_err)
+		goto out_nfserr;
+	if (EX_ISSYNC(fhp->fh_export))
+		host_err = nfsd_sync_dir(dentry);
 
 out_nfserr:
-	err = nfserrno(err);
+	err = nfserrno(host_err);
 out:
 	return err;
 }
@@ -1708,11 +1722,12 @@ out:
  * Read entries from a directory.
  * The  NFSv3/4 verifier we ignore for now.
  */
-int
+__be32
 nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, 
 	     struct readdir_cd *cdp, encode_dent_fn func)
 {
-	int		err;
+	__be32		err;
+	int 		host_err;
 	struct file	*file;
 	loff_t		offset = *offsetp;
 
@@ -1734,10 +1749,10 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
 
 	do {
 		cdp->err = nfserr_eof; /* will be cleared on successful read */
-		err = vfs_readdir(file, (filldir_t) func, cdp);
-	} while (err >=0 && cdp->err == nfs_ok);
-	if (err)
-		err = nfserrno(err);
+		host_err = vfs_readdir(file, (filldir_t) func, cdp);
+	} while (host_err >=0 && cdp->err == nfs_ok);
+	if (host_err)
+		err = nfserrno(host_err);
 	else
 		err = cdp->err;
 	*offsetp = vfs_llseek(file, 0, 1);
@@ -1754,10 +1769,10 @@ out:
  * Get file system stats
  * N.B. After this call fhp needs an fh_put
  */
-int
+__be32
 nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat)
 {
-	int err = fh_verify(rqstp, fhp, 0, MAY_NOP);
+	__be32 err = fh_verify(rqstp, fhp, 0, MAY_NOP);
 	if (!err && vfs_statfs(fhp->fh_dentry,stat))
 		err = nfserr_io;
 	return err;
@@ -1766,7 +1781,7 @@ nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat)
 /*
  * Check for a user's access permissions to this inode.
  */
-int
+__be32
 nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc)
 {
 	struct inode	*inode = dentry->d_inode;
-- 
cgit v1.2.3


From b37ad28bcaa7c486a4ff0fb6c3bdaaacd67b86ce Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:59 -0700
Subject: [PATCH] nfsd: nfs4 code returns error values in net-endian

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c    |  82 +++++++++++++--------------
 fs/nfsd/nfs4recover.c |  14 ++---
 fs/nfsd/nfs4state.c   |  96 ++++++++++++++++----------------
 fs/nfsd/nfs4xdr.c     | 150 +++++++++++++++++++++++++-------------------------
 4 files changed, 171 insertions(+), 171 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index ca6414248527..63823945f972 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -67,10 +67,10 @@ fh_dup2(struct svc_fh *dst, struct svc_fh *src)
 	*dst = *src;
 }
 
-static int
+static __be32
 do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open, int accmode)
 {
-	int status;
+	__be32 status;
 
 	if (open->op_truncate &&
 		!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
@@ -88,11 +88,11 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
 	return status;
 }
 
-static int
+static __be32
 do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
 {
 	struct svc_fh resfh;
-	int status;
+	__be32 status;
 
 	fh_init(&resfh, NFS4_FHSIZE);
 	open->op_truncate = 0;
@@ -131,10 +131,10 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
 	return status;
 }
 
-static int
+static __be32
 do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
 {
-	int status;
+	__be32 status;
 
 	/* Only reclaims from previously confirmed clients are valid */
 	if ((status = nfs4_check_open_reclaim(&open->op_clientid)))
@@ -161,10 +161,10 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_
 }
 
 
-static inline int
+static inline __be32
 nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open, struct nfs4_stateowner **replay_owner)
 {
-	int status;
+	__be32 status;
 	dprintk("NFSD: nfsd4_open filename %.*s op_stateowner %p\n",
 		(int)open->op_fname.len, open->op_fname.data,
 		open->op_stateowner);
@@ -261,7 +261,7 @@ out:
 /*
  * filehandle-manipulating ops.
  */
-static inline int
+static inline __be32
 nfsd4_getfh(struct svc_fh *current_fh, struct svc_fh **getfh)
 {
 	if (!current_fh->fh_dentry)
@@ -271,7 +271,7 @@ nfsd4_getfh(struct svc_fh *current_fh, struct svc_fh **getfh)
 	return nfs_ok;
 }
 
-static inline int
+static inline __be32
 nfsd4_putfh(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_putfh *putfh)
 {
 	fh_put(current_fh);
@@ -280,10 +280,10 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_putf
 	return fh_verify(rqstp, current_fh, 0, MAY_NOP);
 }
 
-static inline int
+static inline __be32
 nfsd4_putrootfh(struct svc_rqst *rqstp, struct svc_fh *current_fh)
 {
-	int status;
+	__be32 status;
 
 	fh_put(current_fh);
 	status = exp_pseudoroot(rqstp->rq_client, current_fh,
@@ -291,7 +291,7 @@ nfsd4_putrootfh(struct svc_rqst *rqstp, struct svc_fh *current_fh)
 	return status;
 }
 
-static inline int
+static inline __be32
 nfsd4_restorefh(struct svc_fh *current_fh, struct svc_fh *save_fh)
 {
 	if (!save_fh->fh_dentry)
@@ -301,7 +301,7 @@ nfsd4_restorefh(struct svc_fh *current_fh, struct svc_fh *save_fh)
 	return nfs_ok;
 }
 
-static inline int
+static inline __be32
 nfsd4_savefh(struct svc_fh *current_fh, struct svc_fh *save_fh)
 {
 	if (!current_fh->fh_dentry)
@@ -314,7 +314,7 @@ nfsd4_savefh(struct svc_fh *current_fh, struct svc_fh *save_fh)
 /*
  * misc nfsv4 ops
  */
-static inline int
+static inline __be32
 nfsd4_access(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_access *access)
 {
 	if (access->ac_req_access & ~NFS3_ACCESS_FULL)
@@ -324,10 +324,10 @@ nfsd4_access(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_acc
 	return nfsd_access(rqstp, current_fh, &access->ac_resp_access, &access->ac_supported);
 }
 
-static inline int
+static inline __be32
 nfsd4_commit(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_commit *commit)
 {
-	int status;
+	__be32 status;
 
 	u32 *p = (u32 *)commit->co_verf.data;
 	*p++ = nfssvc_boot.tv_sec;
@@ -339,11 +339,11 @@ nfsd4_commit(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_com
 	return status;
 }
 
-static int
+static __be32
 nfsd4_create(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_create *create)
 {
 	struct svc_fh resfh;
-	int status;
+	__be32 status;
 	dev_t rdev;
 
 	fh_init(&resfh, NFS4_FHSIZE);
@@ -423,10 +423,10 @@ nfsd4_create(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_cre
 	return status;
 }
 
-static inline int
+static inline __be32
 nfsd4_getattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_getattr *getattr)
 {
-	int status;
+	__be32 status;
 
 	status = fh_verify(rqstp, current_fh, 0, MAY_NOP);
 	if (status)
@@ -442,11 +442,11 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_ge
 	return nfs_ok;
 }
 
-static inline int
+static inline __be32
 nfsd4_link(struct svc_rqst *rqstp, struct svc_fh *current_fh,
 	   struct svc_fh *save_fh, struct nfsd4_link *link)
 {
-	int status = nfserr_nofilehandle;
+	__be32 status = nfserr_nofilehandle;
 
 	if (!save_fh->fh_dentry)
 		return status;
@@ -456,11 +456,11 @@ nfsd4_link(struct svc_rqst *rqstp, struct svc_fh *current_fh,
 	return status;
 }
 
-static int
+static __be32
 nfsd4_lookupp(struct svc_rqst *rqstp, struct svc_fh *current_fh)
 {
 	struct svc_fh tmp_fh;
-	int ret;
+	__be32 ret;
 
 	fh_init(&tmp_fh, NFS4_FHSIZE);
 	if((ret = exp_pseudoroot(rqstp->rq_client, &tmp_fh,
@@ -474,16 +474,16 @@ nfsd4_lookupp(struct svc_rqst *rqstp, struct svc_fh *current_fh)
 	return nfsd_lookup(rqstp, current_fh, "..", 2, current_fh);
 }
 
-static inline int
+static inline __be32
 nfsd4_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lookup *lookup)
 {
 	return nfsd_lookup(rqstp, current_fh, lookup->lo_name, lookup->lo_len, current_fh);
 }
 
-static inline int
+static inline __be32
 nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read *read)
 {
-	int status;
+	__be32 status;
 
 	/* no need to check permission - this will be done in nfsd_read() */
 
@@ -508,7 +508,7 @@ out:
 	return status;
 }
 
-static inline int
+static inline __be32
 nfsd4_readdir(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_readdir *readdir)
 {
 	u64 cookie = readdir->rd_cookie;
@@ -531,7 +531,7 @@ nfsd4_readdir(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_re
 	return nfs_ok;
 }
 
-static inline int
+static inline __be32
 nfsd4_readlink(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_readlink *readlink)
 {
 	readlink->rl_rqstp = rqstp;
@@ -539,10 +539,10 @@ nfsd4_readlink(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_r
 	return nfs_ok;
 }
 
-static inline int
+static inline __be32
 nfsd4_remove(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_remove *remove)
 {
-	int status;
+	__be32 status;
 
 	if (nfs4_in_grace())
 		return nfserr_grace;
@@ -556,11 +556,11 @@ nfsd4_remove(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_rem
 	return status;
 }
 
-static inline int
+static inline __be32
 nfsd4_rename(struct svc_rqst *rqstp, struct svc_fh *current_fh,
 	     struct svc_fh *save_fh, struct nfsd4_rename *rename)
 {
-	int status = nfserr_nofilehandle;
+	__be32 status = nfserr_nofilehandle;
 
 	if (!save_fh->fh_dentry)
 		return status;
@@ -589,10 +589,10 @@ nfsd4_rename(struct svc_rqst *rqstp, struct svc_fh *current_fh,
 	return status;
 }
 
-static inline int
+static inline __be32
 nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_setattr *setattr)
 {
-	int status = nfs_ok;
+	__be32 status = nfs_ok;
 
 	if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
 		nfs4_lock_state();
@@ -614,13 +614,13 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_se
 	return status;
 }
 
-static inline int
+static inline __be32
 nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_write *write)
 {
 	stateid_t *stateid = &write->wr_stateid;
 	struct file *filp = NULL;
 	u32 *p;
-	int status = nfs_ok;
+	__be32 status = nfs_ok;
 
 	/* no need to check permission - this will be done in nfsd_write() */
 
@@ -661,12 +661,12 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
  * attributes matched.  VERIFY is implemented by mapping NFSERR_SAME
  * to NFS_OK after the call; NVERIFY by mapping NFSERR_NOT_SAME to NFS_OK.
  */
-static int
+static __be32
 nfsd4_verify(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_verify *verify)
 {
 	__be32 *buf, *p;
 	int count;
-	int status;
+	__be32 status;
 
 	status = fh_verify(rqstp, current_fh, 0, MAY_NOP);
 	if (status)
@@ -741,7 +741,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 	struct svc_fh	*save_fh = NULL;
 	struct nfs4_stateowner *replay_owner = NULL;
 	int		slack_space;    /* in words, not bytes! */
-	int		status;
+	__be32		status;
 
 	status = nfserr_resource;
 	current_fh = kmalloc(sizeof(*current_fh), GFP_KERNEL);
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 1cbd2e4ee122..e9d07704680e 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -83,13 +83,13 @@ md5_to_hex(char *out, char *md5)
 	*out = '\0';
 }
 
-int
+__be32
 nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
 {
 	struct xdr_netobj cksum;
 	struct hash_desc desc;
 	struct scatterlist sg[1];
-	int status = nfserr_resource;
+	__be32 status = nfserr_resource;
 
 	dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
 			clname->len, clname->data);
@@ -193,7 +193,7 @@ nfsd4_build_dentrylist(void *arg, const char *name, int namlen,
 	struct dentry_list *child;
 
 	if (name && isdotent(name, namlen))
-		return nfs_ok;
+		return 0;
 	dentry = lookup_one_len(name, parent, namlen);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
@@ -333,14 +333,14 @@ purge_old(struct dentry *parent, struct dentry *child)
 	int status;
 
 	if (nfs4_has_reclaimed_state(child->d_name.name))
-		return nfs_ok;
+		return 0;
 
 	status = nfsd4_clear_clid_dir(parent, child);
 	if (status)
 		printk("failed to remove client recovery directory %s\n",
 				child->d_name.name);
 	/* Keep trying, success or failure: */
-	return nfs_ok;
+	return 0;
 }
 
 void
@@ -365,10 +365,10 @@ load_recdir(struct dentry *parent, struct dentry *child)
 		printk("nfsd4: illegal name %s in recovery directory\n",
 				child->d_name.name);
 		/* Keep trying; maybe the others are OK: */
-		return nfs_ok;
+		return 0;
 	}
 	nfs4_client_to_reclaim(child->d_name.name);
-	return nfs_ok;
+	return 0;
 }
 
 int
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index ebcf226a9e4a..e5ca6d7028df 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -710,7 +710,7 @@ out_err:
  *		as described above.
  *
  */
-int
+__be32
 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 {
 	u32 			ip_addr = rqstp->rq_addr.sin_addr.s_addr;
@@ -721,7 +721,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 	nfs4_verifier		clverifier = setclid->se_verf;
 	unsigned int 		strhashval;
 	struct nfs4_client	*conf, *unconf, *new;
-	int 			status;
+	__be32 			status;
 	char                    dname[HEXDIR_LEN];
 	
 	if (!check_name(clname))
@@ -875,14 +875,14 @@ out:
  *
  * NOTE: callback information will be processed here in a future patch
  */
-int
+__be32
 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confirm *setclientid_confirm)
 {
 	u32 ip_addr = rqstp->rq_addr.sin_addr.s_addr;
 	struct nfs4_client *conf, *unconf;
 	nfs4_verifier confirm = setclientid_confirm->sc_confirm; 
 	clientid_t * clid = &setclientid_confirm->sc_clientid;
-	int status;
+	__be32 status;
 
 	if (STALE_CLIENTID(clid))
 		return nfserr_stale_clientid;
@@ -1280,13 +1280,13 @@ test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) {
  * Called to check deny when READ with all zero stateid or
  * WRITE with all zero or all one stateid
  */
-static int
+static __be32
 nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
 {
 	struct inode *ino = current_fh->fh_dentry->d_inode;
 	struct nfs4_file *fp;
 	struct nfs4_stateid *stp;
-	int ret;
+	__be32 ret;
 
 	dprintk("NFSD: nfs4_share_conflict\n");
 
@@ -1444,7 +1444,7 @@ static struct lock_manager_operations nfsd_lease_mng_ops = {
 };
 
 
-int
+__be32
 nfsd4_process_open1(struct nfsd4_open *open)
 {
 	clientid_t *clientid = &open->op_clientid;
@@ -1501,7 +1501,7 @@ renew:
 	return nfs_ok;
 }
 
-static inline int
+static inline __be32
 nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
 {
 	if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
@@ -1522,12 +1522,12 @@ find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
 	return NULL;
 }
 
-static int
+static __be32
 nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open,
 		struct nfs4_delegation **dp)
 {
 	int flags;
-	int status = nfserr_bad_stateid;
+	__be32 status = nfserr_bad_stateid;
 
 	*dp = find_delegation_file(fp, &open->op_delegate_stateid);
 	if (*dp == NULL)
@@ -1546,11 +1546,11 @@ out:
 	return nfs_ok;
 }
 
-static int
+static __be32
 nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_stateid **stpp)
 {
 	struct nfs4_stateid *local;
-	int status = nfserr_share_denied;
+	__be32 status = nfserr_share_denied;
 	struct nfs4_stateowner *sop = open->op_stateowner;
 
 	list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
@@ -1575,7 +1575,7 @@ nfs4_alloc_stateid(void)
 	return kmem_cache_alloc(stateid_slab, GFP_KERNEL);
 }
 
-static int
+static __be32
 nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
 		struct nfs4_delegation *dp,
 		struct svc_fh *cur_fh, int flags)
@@ -1590,7 +1590,7 @@ nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
 		get_file(dp->dl_vfs_file);
 		stp->st_vfs_file = dp->dl_vfs_file;
 	} else {
-		int status;
+		__be32 status;
 		status = nfsd_open(rqstp, cur_fh, S_IFREG, flags,
 				&stp->st_vfs_file);
 		if (status) {
@@ -1604,7 +1604,7 @@ nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
 	return 0;
 }
 
-static inline int
+static inline __be32
 nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
 		struct nfsd4_open *open)
 {
@@ -1619,22 +1619,22 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
 	return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0);
 }
 
-static int
+static __be32
 nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open)
 {
 	struct file *filp = stp->st_vfs_file;
 	struct inode *inode = filp->f_dentry->d_inode;
 	unsigned int share_access, new_writer;
-	int status;
+	__be32 status;
 
 	set_access(&share_access, stp->st_access_bmap);
 	new_writer = (~share_access) & open->op_share_access
 			& NFS4_SHARE_ACCESS_WRITE;
 
 	if (new_writer) {
-		status = get_write_access(inode);
-		if (status)
-			return nfserrno(status);
+		int err = get_write_access(inode);
+		if (err)
+			return nfserrno(err);
 	}
 	status = nfsd4_truncate(rqstp, cur_fh, open);
 	if (status) {
@@ -1738,14 +1738,14 @@ out:
 /*
  * called with nfs4_lock_state() held.
  */
-int
+__be32
 nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
 {
 	struct nfs4_file *fp = NULL;
 	struct inode *ino = current_fh->fh_dentry->d_inode;
 	struct nfs4_stateid *stp = NULL;
 	struct nfs4_delegation *dp = NULL;
-	int status;
+	__be32 status;
 
 	status = nfserr_inval;
 	if (!access_valid(open->op_share_access)
@@ -1833,11 +1833,11 @@ static struct work_struct laundromat_work;
 static void laundromat_main(void *);
 static DECLARE_WORK(laundromat_work, laundromat_main, NULL);
 
-int 
+__be32
 nfsd4_renew(clientid_t *clid)
 {
 	struct nfs4_client *clp;
-	int status;
+	__be32 status;
 
 	nfs4_lock_state();
 	dprintk("process_renew(%08x/%08x): starting\n", 
@@ -1996,9 +1996,9 @@ access_permit_write(unsigned long access_bmap)
 }
 
 static
-int nfs4_check_openmode(struct nfs4_stateid *stp, int flags)
+__be32 nfs4_check_openmode(struct nfs4_stateid *stp, int flags)
 {
-        int status = nfserr_openmode;
+        __be32 status = nfserr_openmode;
 
 	if ((flags & WR_STATE) && (!access_permit_write(stp->st_access_bmap)))
                 goto out;
@@ -2009,7 +2009,7 @@ out:
 	return status;
 }
 
-static inline int
+static inline __be32
 check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
 {
 	/* Trying to call delegreturn with a special stateid? Yuch: */
@@ -2043,14 +2043,14 @@ io_during_grace_disallowed(struct inode *inode, int flags)
 /*
 * Checks for stateid operations
 */
-int
+__be32
 nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int flags, struct file **filpp)
 {
 	struct nfs4_stateid *stp = NULL;
 	struct nfs4_delegation *dp = NULL;
 	stateid_t *stidp;
 	struct inode *ino = current_fh->fh_dentry->d_inode;
-	int status;
+	__be32 status;
 
 	dprintk("NFSD: preprocess_stateid_op: stateid = (%08x/%08x/%08x/%08x)\n",
 		stateid->si_boot, stateid->si_stateownerid, 
@@ -2125,7 +2125,7 @@ setlkflg (int type)
 /* 
  * Checks for sequence id mutating operations. 
  */
-static int
+static __be32
 nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, struct nfsd4_lock *lock)
 {
 	struct nfs4_stateid *stp;
@@ -2169,7 +2169,7 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
 		clientid_t *lockclid = &lock->v.new.clientid;
 		struct nfs4_client *clp = sop->so_client;
 		int lkflg = 0;
-		int status;
+		__be32 status;
 
 		lkflg = setlkflg(lock->lk_type);
 
@@ -2241,10 +2241,10 @@ check_replay:
 	return nfserr_bad_seqid;
 }
 
-int
+__be32
 nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_confirm *oc, struct nfs4_stateowner **replay_owner)
 {
-	int status;
+	__be32 status;
 	struct nfs4_stateowner *sop;
 	struct nfs4_stateid *stp;
 
@@ -2310,10 +2310,10 @@ reset_union_bmap_deny(unsigned long deny, unsigned long *bmap)
 	}
 }
 
-int
+__be32
 nfsd4_open_downgrade(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_downgrade *od, struct nfs4_stateowner **replay_owner)
 {
-	int status;
+	__be32 status;
 	struct nfs4_stateid *stp;
 	unsigned int share_access;
 
@@ -2365,10 +2365,10 @@ out:
 /*
  * nfs4_unlock_state() called after encode
  */
-int
+__be32
 nfsd4_close(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_close *close, struct nfs4_stateowner **replay_owner)
 {
-	int status;
+	__be32 status;
 	struct nfs4_stateid *stp;
 
 	dprintk("NFSD: nfsd4_close on file %.*s\n", 
@@ -2404,10 +2404,10 @@ out:
 	return status;
 }
 
-int
+__be32
 nfsd4_delegreturn(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_delegreturn *dr)
 {
-	int status;
+	__be32 status;
 
 	if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0)))
 		goto out;
@@ -2635,7 +2635,7 @@ check_lock_length(u64 offset, u64 length)
 /*
  *  LOCK operation 
  */
-int
+__be32
 nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock *lock, struct nfs4_stateowner **replay_owner)
 {
 	struct nfs4_stateowner *open_sop = NULL;
@@ -2644,7 +2644,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	struct file *filp;
 	struct file_lock file_lock;
 	struct file_lock conflock;
-	int status = 0;
+	__be32 status = 0;
 	unsigned int strhashval;
 
 	dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n",
@@ -2793,14 +2793,14 @@ out:
 /*
  * LOCKT operation
  */
-int
+__be32
 nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lockt *lockt)
 {
 	struct inode *inode;
 	struct file file;
 	struct file_lock file_lock;
 	struct file_lock conflock;
-	int status;
+	__be32 status;
 
 	if (nfs4_in_grace())
 		return nfserr_grace;
@@ -2873,13 +2873,13 @@ out:
 	return status;
 }
 
-int
+__be32
 nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_locku *locku, struct nfs4_stateowner **replay_owner)
 {
 	struct nfs4_stateid *stp;
 	struct file *filp = NULL;
 	struct file_lock file_lock;
-	int status;
+	__be32 status;
 						        
 	dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n",
 		(long long) locku->lu_offset,
@@ -2965,7 +2965,7 @@ out:
 	return status;
 }
 
-int
+__be32
 nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *rlockowner)
 {
 	clientid_t *clid = &rlockowner->rl_clientid;
@@ -2974,7 +2974,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *
 	struct xdr_netobj *owner = &rlockowner->rl_owner;
 	struct list_head matches;
 	int i;
-	int status;
+	__be32 status;
 
 	dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
 		clid->cl_boot, clid->cl_id);
@@ -3111,7 +3111,7 @@ nfs4_find_reclaim_client(clientid_t *clid)
 /*
 * Called from OPEN. Look for clientid in reclaim list.
 */
-int
+__be32
 nfs4_check_open_reclaim(clientid_t *clid)
 {
 	return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 3419d99aeb1a..d7b630f1a9ae 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -68,8 +68,8 @@
 #define NFS4_REFERRAL_FSID_MAJOR	0x8000000ULL
 #define NFS4_REFERRAL_FSID_MINOR	0x8000000ULL
 
-static int
-check_filename(char *str, int len, int err)
+static __be32
+check_filename(char *str, int len, __be32 err)
 {
 	int i;
 
@@ -95,7 +95,7 @@ check_filename(char *str, int len, int err)
  */
 #define DECODE_HEAD				\
 	__be32 *p;				\
-	int status
+	__be32 status
 #define DECODE_TAIL				\
 	status = 0;				\
 out:						\
@@ -217,7 +217,7 @@ static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
 }
 
 
-static int
+static __be32
 nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval)
 {
 	u32 bmlen;
@@ -240,7 +240,7 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval)
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr,
     struct nfs4_acl **acl)
 {
@@ -418,7 +418,7 @@ out_nfserr:
 	goto out;
 }
 
-static int
+static __be32
 nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access)
 {
 	DECODE_HEAD;
@@ -429,7 +429,7 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close)
 {
 	DECODE_HEAD;
@@ -444,7 +444,7 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close)
 }
 
 
-static int
+static __be32
 nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit)
 {
 	DECODE_HEAD;
@@ -456,7 +456,7 @@ nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create)
 {
 	DECODE_HEAD;
@@ -496,7 +496,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
 	DECODE_TAIL;
 }
 
-static inline int
+static inline __be32
 nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr)
 {
 	DECODE_HEAD;
@@ -508,13 +508,13 @@ nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegretu
 	DECODE_TAIL;
 }
 
-static inline int
+static inline __be32
 nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr)
 {
 	return nfsd4_decode_bitmap(argp, getattr->ga_bmval);
 }
 
-static int
+static __be32
 nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link)
 {
 	DECODE_HEAD;
@@ -529,7 +529,7 @@ nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link)
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
 {
 	DECODE_HEAD;
@@ -568,7 +568,7 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt)
 {
 	DECODE_HEAD;
@@ -587,7 +587,7 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt)
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku)
 {
 	DECODE_HEAD;
@@ -606,7 +606,7 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku)
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup)
 {
 	DECODE_HEAD;
@@ -621,7 +621,7 @@ nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
 {
 	DECODE_HEAD;
@@ -699,7 +699,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf)
 {
 	DECODE_HEAD;
@@ -713,7 +713,7 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_downgrade *open_down)
 {
 	DECODE_HEAD;
@@ -729,7 +729,7 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh)
 {
 	DECODE_HEAD;
@@ -744,7 +744,7 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh)
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read)
 {
 	DECODE_HEAD;
@@ -758,7 +758,7 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read)
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *readdir)
 {
 	DECODE_HEAD;
@@ -774,7 +774,7 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove)
 {
 	DECODE_HEAD;
@@ -789,7 +789,7 @@ nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename)
 {
 	DECODE_HEAD;
@@ -809,7 +809,7 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid)
 {
 	DECODE_HEAD;
@@ -820,7 +820,7 @@ nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid)
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr)
 {
 	DECODE_HEAD;
@@ -834,7 +834,7 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid *setclientid)
 {
 	DECODE_HEAD;
@@ -859,7 +859,7 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid_confirm *scd_c)
 {
 	DECODE_HEAD;
@@ -872,7 +872,7 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s
 }
 
 /* Also used for NVERIFY */
-static int
+static __be32
 nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify)
 {
 #if 0
@@ -908,7 +908,7 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
 {
 	int avail;
@@ -959,7 +959,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner)
 {
 	DECODE_HEAD;
@@ -973,7 +973,7 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 {
 	DECODE_HEAD;
@@ -1234,7 +1234,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 /* Encode as an array of strings the string given with components
  * seperated @sep.
  */
-static int nfsd4_encode_components(char sep, char *components,
+static __be32 nfsd4_encode_components(char sep, char *components,
 				   __be32 **pp, int *buflen)
 {
 	__be32 *p = *pp;
@@ -1271,10 +1271,10 @@ static int nfsd4_encode_components(char sep, char *components,
 /*
  * encode a location element of a fs_locations structure
  */
-static int nfsd4_encode_fs_location4(struct nfsd4_fs_location *location,
+static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location,
 				    __be32 **pp, int *buflen)
 {
-	int status;
+	__be32 status;
 	__be32 *p = *pp;
 
 	status = nfsd4_encode_components(':', location->hosts, &p, buflen);
@@ -1292,7 +1292,7 @@ static int nfsd4_encode_fs_location4(struct nfsd4_fs_location *location,
  * Returned string is safe to use as long as the caller holds a reference
  * to @exp.
  */
-static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, u32 *stat)
+static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *stat)
 {
 	struct svc_fh tmp_fh;
 	char *path, *rootpath;
@@ -1318,11 +1318,11 @@ static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, u32 *sta
 /*
  *  encode a fs_locations structure
  */
-static int nfsd4_encode_fs_locations(struct svc_rqst *rqstp,
+static __be32 nfsd4_encode_fs_locations(struct svc_rqst *rqstp,
 				     struct svc_export *exp,
 				     __be32 **pp, int *buflen)
 {
-	u32 status;
+	__be32 status;
 	int i;
 	__be32 *p = *pp;
 	struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs;
@@ -1353,7 +1353,7 @@ static u32 nfs4_ftypes[16] = {
         NF4SOCK, NF4BAD,  NF4LNK, NF4BAD,
 };
 
-static int
+static __be32
 nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
 			__be32 **p, int *buflen)
 {
@@ -1375,19 +1375,19 @@ nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
 	return 0;
 }
 
-static inline int
+static inline __be32
 nfsd4_encode_user(struct svc_rqst *rqstp, uid_t uid, __be32 **p, int *buflen)
 {
 	return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, uid, 0, p, buflen);
 }
 
-static inline int
+static inline __be32
 nfsd4_encode_group(struct svc_rqst *rqstp, uid_t gid, __be32 **p, int *buflen)
 {
 	return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, gid, 1, p, buflen);
 }
 
-static inline int
+static inline __be32
 nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
 		__be32 **p, int *buflen)
 {
@@ -1398,7 +1398,7 @@ nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
 			      FATTR4_WORD0_RDATTR_ERROR)
 #define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID
 
-static int fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err)
+static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err)
 {
 	/* As per referral draft:  */
 	if (*bmval0 & ~WORD0_ABSENT_FS_ATTRS ||
@@ -1421,7 +1421,7 @@ static int fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err)
  * @countp is the buffer size in _words_; upon successful return this becomes
  * replaced with the number of words written.
  */
-int
+__be32
 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 		struct dentry *dentry, __be32 *buffer, int *countp, u32 *bmval,
 		struct svc_rqst *rqstp)
@@ -1437,7 +1437,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 	u64 dummy64;
 	u32 rdattr_err = 0;
 	__be32 *p = buffer;
-	int status;
+	__be32 status;
 	int aclsupport = 0;
 	struct nfs4_acl *acl = NULL;
 
@@ -1829,13 +1829,13 @@ out_serverfault:
 	goto out;
 }
 
-static int
+static __be32
 nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
 		const char *name, int namlen, __be32 *p, int *buflen)
 {
 	struct svc_export *exp = cd->rd_fhp->fh_export;
 	struct dentry *dentry;
-	int nfserr;
+	__be32 nfserr;
 
 	dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
 	if (IS_ERR(dentry))
@@ -1865,7 +1865,7 @@ out_put:
 }
 
 static __be32 *
-nfsd4_encode_rdattr_error(__be32 *p, int buflen, int nfserr)
+nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr)
 {
 	__be32 *attrlenp;
 
@@ -1888,7 +1888,7 @@ nfsd4_encode_dirent(struct readdir_cd *ccd, const char *name, int namlen,
 	struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
 	int buflen;
 	__be32 *p = cd->buffer;
-	int nfserr = nfserr_toosmall;
+	__be32 nfserr = nfserr_toosmall;
 
 	/* In nfsv4, "." and ".." never make it onto the wire.. */
 	if (name && isdotent(name, namlen)) {
@@ -1944,7 +1944,7 @@ fail:
 }
 
 static void
-nfsd4_encode_access(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_access *access)
+nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access)
 {
 	ENCODE_HEAD;
 
@@ -1957,7 +1957,7 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_acc
 }
 
 static void
-nfsd4_encode_close(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_close *close)
+nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close)
 {
 	ENCODE_SEQID_OP_HEAD;
 
@@ -1972,7 +1972,7 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_clos
 
 
 static void
-nfsd4_encode_commit(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_commit *commit)
+nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit)
 {
 	ENCODE_HEAD;
 
@@ -1984,7 +1984,7 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_com
 }
 
 static void
-nfsd4_encode_create(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_create *create)
+nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create)
 {
 	ENCODE_HEAD;
 
@@ -1998,8 +1998,8 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_cre
 	}
 }
 
-static int
-nfsd4_encode_getattr(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_getattr *getattr)
+static __be32
+nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr)
 {
 	struct svc_fh *fhp = getattr->ga_fhp;
 	int buflen;
@@ -2017,7 +2017,7 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_ge
 }
 
 static void
-nfsd4_encode_getfh(struct nfsd4_compoundres *resp, int nfserr, struct svc_fh *fhp)
+nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh *fhp)
 {
 	unsigned int len;
 	ENCODE_HEAD;
@@ -2057,7 +2057,7 @@ nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denie
 }
 
 static void
-nfsd4_encode_lock(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_lock *lock)
+nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock)
 {
 	ENCODE_SEQID_OP_HEAD;
 
@@ -2073,14 +2073,14 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_lock
 }
 
 static void
-nfsd4_encode_lockt(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_lockt *lockt)
+nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt)
 {
 	if (nfserr == nfserr_denied)
 		nfsd4_encode_lock_denied(resp, &lockt->lt_denied);
 }
 
 static void
-nfsd4_encode_locku(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_locku *locku)
+nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku)
 {
 	ENCODE_SEQID_OP_HEAD;
 
@@ -2096,7 +2096,7 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_lock
 
 
 static void
-nfsd4_encode_link(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_link *link)
+nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link)
 {
 	ENCODE_HEAD;
 
@@ -2109,7 +2109,7 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_link
 
 
 static void
-nfsd4_encode_open(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open *open)
+nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open)
 {
 	ENCODE_SEQID_OP_HEAD;
 
@@ -2174,7 +2174,7 @@ out:
 }
 
 static void
-nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open_confirm *oc)
+nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc)
 {
 	ENCODE_SEQID_OP_HEAD;
 				        
@@ -2189,7 +2189,7 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, int nfserr, struct nfs
 }
 
 static void
-nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open_downgrade *od)
+nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od)
 {
 	ENCODE_SEQID_OP_HEAD;
 				        
@@ -2203,8 +2203,8 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, int nfserr, struct n
 	ENCODE_SEQID_OP_TAIL(od->od_stateowner);
 }
 
-static int
-nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr,
+static __be32
+nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 		  struct nfsd4_read *read)
 {
 	u32 eof;
@@ -2268,8 +2268,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr,
 	return 0;
 }
 
-static int
-nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_readlink *readlink)
+static __be32
+nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink)
 {
 	int maxcount;
 	char *page;
@@ -2316,8 +2316,8 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_r
 	return 0;
 }
 
-static int
-nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_readdir *readdir)
+static __be32
+nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir)
 {
 	int maxcount;
 	loff_t offset;
@@ -2396,7 +2396,7 @@ err_no_verf:
 }
 
 static void
-nfsd4_encode_remove(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_remove *remove)
+nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove)
 {
 	ENCODE_HEAD;
 
@@ -2408,7 +2408,7 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_rem
 }
 
 static void
-nfsd4_encode_rename(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_rename *rename)
+nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename)
 {
 	ENCODE_HEAD;
 
@@ -2425,7 +2425,7 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_ren
  * regardless of the error status.
  */
 static void
-nfsd4_encode_setattr(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_setattr *setattr)
+nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr)
 {
 	ENCODE_HEAD;
 
@@ -2444,7 +2444,7 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_se
 }
 
 static void
-nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_setclientid *scd)
+nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd)
 {
 	ENCODE_HEAD;
 
@@ -2463,7 +2463,7 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, int nfserr, struct nfsd
 }
 
 static void
-nfsd4_encode_write(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_write *write)
+nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write)
 {
 	ENCODE_HEAD;
 
@@ -2641,7 +2641,7 @@ void nfsd4_release_compoundargs(struct nfsd4_compoundargs *args)
 int
 nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundargs *args)
 {
-	int status;
+	__be32 status;
 
 	args->p = p;
 	args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len;
-- 
cgit v1.2.3


From c4d987ba841dff4b2fc768e52d1d95af83f9f157 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:29:00 -0700
Subject: [PATCH] nfsd: NFSv{2,3} trivial endianness annotations for error
 values

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs2acl.c  |  6 +++---
 fs/nfsd/nfs3acl.c  |  4 ++--
 fs/nfsd/nfs3proc.c | 46 +++++++++++++++++++++++++---------------------
 fs/nfsd/nfsproc.c  | 40 +++++++++++++++++++++-------------------
 4 files changed, 51 insertions(+), 45 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index fd5397d8c62a..e3eca0816986 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -35,7 +35,7 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
 {
 	svc_fh *fh;
 	struct posix_acl *acl;
-	int nfserr = 0;
+	__be32 nfserr = 0;
 
 	dprintk("nfsd: GETACL(2acl)   %s\n", SVCFH_fmt(&argp->fh));
 
@@ -102,7 +102,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
 		struct nfsd_attrstat *resp)
 {
 	svc_fh *fh;
-	int nfserr = 0;
+	__be32 nfserr = 0;
 
 	dprintk("nfsd: SETACL(2acl)   %s\n", SVCFH_fmt(&argp->fh));
 
@@ -143,7 +143,7 @@ static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp,
 static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
 		struct nfsd3_accessres *resp)
 {
-	int nfserr;
+	__be32 nfserr;
 
 	dprintk("nfsd: ACCESS(2acl)   %s 0x%x\n",
 			SVCFH_fmt(&argp->fh),
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 78b2c83d00c5..fcad2895ddb0 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -33,7 +33,7 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
 {
 	svc_fh *fh;
 	struct posix_acl *acl;
-	int nfserr = 0;
+	__be32 nfserr = 0;
 
 	fh = fh_copy(&resp->fh, &argp->fh);
 	if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP)))
@@ -98,7 +98,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
 		struct nfsd3_attrstat *resp)
 {
 	svc_fh *fh;
-	int nfserr = 0;
+	__be32 nfserr = 0;
 
 	fh = fh_copy(&resp->fh, &argp->fh);
 	nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_SATTR);
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index a12663fdfe16..64db601c2bd2 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -56,7 +56,8 @@ static __be32
 nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
 					   struct nfsd3_attrstat *resp)
 {
-	int	err, nfserr;
+	int	err;
+	__be32	nfserr;
 
 	dprintk("nfsd: GETATTR(3)  %s\n",
 		SVCFH_fmt(&argp->fh));
@@ -80,7 +81,7 @@ static __be32
 nfsd3_proc_setattr(struct svc_rqst *rqstp, struct nfsd3_sattrargs *argp,
 					   struct nfsd3_attrstat  *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 
 	dprintk("nfsd: SETATTR(3)  %s\n",
 				SVCFH_fmt(&argp->fh));
@@ -98,7 +99,7 @@ static __be32
 nfsd3_proc_lookup(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
 					  struct nfsd3_diropres  *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 
 	dprintk("nfsd: LOOKUP(3)   %s %.*s\n",
 				SVCFH_fmt(&argp->fh),
@@ -122,7 +123,7 @@ static __be32
 nfsd3_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
 					  struct nfsd3_accessres *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 
 	dprintk("nfsd: ACCESS(3)   %s 0x%x\n",
 				SVCFH_fmt(&argp->fh),
@@ -141,7 +142,7 @@ static __be32
 nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd3_readlinkargs *argp,
 					   struct nfsd3_readlinkres *resp)
 {
-	int nfserr;
+	__be32 nfserr;
 
 	dprintk("nfsd: READLINK(3) %s\n", SVCFH_fmt(&argp->fh));
 
@@ -159,7 +160,7 @@ static __be32
 nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
 				        struct nfsd3_readres  *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 	u32	max_blocksize = svc_max_payload(rqstp);
 
 	dprintk("nfsd: READ(3) %s %lu bytes at %lu\n",
@@ -199,7 +200,7 @@ static __be32
 nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
 					 struct nfsd3_writeres  *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 
 	dprintk("nfsd: WRITE(3)    %s %d bytes at %ld%s\n",
 				SVCFH_fmt(&argp->fh),
@@ -229,7 +230,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
 {
 	svc_fh		*dirfhp, *newfhp = NULL;
 	struct iattr	*attr;
-	u32		nfserr;
+	__be32		nfserr;
 
 	dprintk("nfsd: CREATE(3)   %s %.*s\n",
 				SVCFH_fmt(&argp->fh),
@@ -269,7 +270,7 @@ static __be32
 nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
 					 struct nfsd3_diropres   *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 
 	dprintk("nfsd: MKDIR(3)    %s %.*s\n",
 				SVCFH_fmt(&argp->fh),
@@ -289,7 +290,7 @@ static __be32
 nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp,
 					   struct nfsd3_diropres    *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 
 	dprintk("nfsd: SYMLINK(3)  %s %.*s -> %.*s\n",
 				SVCFH_fmt(&argp->ffh),
@@ -311,7 +312,8 @@ static __be32
 nfsd3_proc_mknod(struct svc_rqst *rqstp, struct nfsd3_mknodargs *argp,
 					 struct nfsd3_diropres  *resp)
 {
-	int	nfserr, type;
+	__be32	nfserr;
+	int type;
 	dev_t	rdev = 0;
 
 	dprintk("nfsd: MKNOD(3)    %s %.*s\n",
@@ -347,7 +349,7 @@ static __be32
 nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
 					  struct nfsd3_attrstat  *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 
 	dprintk("nfsd: REMOVE(3)   %s %.*s\n",
 				SVCFH_fmt(&argp->fh),
@@ -367,7 +369,7 @@ static __be32
 nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
 					 struct nfsd3_attrstat  *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 
 	dprintk("nfsd: RMDIR(3)    %s %.*s\n",
 				SVCFH_fmt(&argp->fh),
@@ -383,7 +385,7 @@ static __be32
 nfsd3_proc_rename(struct svc_rqst *rqstp, struct nfsd3_renameargs *argp,
 					  struct nfsd3_renameres  *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 
 	dprintk("nfsd: RENAME(3)   %s %.*s ->\n",
 				SVCFH_fmt(&argp->ffh),
@@ -405,7 +407,7 @@ static __be32
 nfsd3_proc_link(struct svc_rqst *rqstp, struct nfsd3_linkargs *argp,
 					struct nfsd3_linkres  *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 
 	dprintk("nfsd: LINK(3)     %s ->\n",
 				SVCFH_fmt(&argp->ffh));
@@ -428,7 +430,8 @@ static __be32
 nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
 					   struct nfsd3_readdirres  *resp)
 {
-	int		nfserr, count;
+	__be32		nfserr;
+	int		count;
 
 	dprintk("nfsd: READDIR(3)  %s %d bytes at %d\n",
 				SVCFH_fmt(&argp->fh),
@@ -463,7 +466,8 @@ static __be32
 nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
 					       struct nfsd3_readdirres  *resp)
 {
-	int	nfserr, count = 0;
+	__be32	nfserr;
+	int	count = 0;
 	loff_t	offset;
 	int	i;
 	caddr_t	page_addr = NULL;
@@ -521,7 +525,7 @@ static __be32
 nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
 					   struct nfsd3_fsstatres *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 
 	dprintk("nfsd: FSSTAT(3)   %s\n",
 				SVCFH_fmt(&argp->fh));
@@ -538,7 +542,7 @@ static __be32
 nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
 					   struct nfsd3_fsinfores *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 	u32	max_blocksize = svc_max_payload(rqstp);
 
 	dprintk("nfsd: FSINFO(3)   %s\n",
@@ -580,7 +584,7 @@ static __be32
 nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle      *argp,
 					     struct nfsd3_pathconfres *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 
 	dprintk("nfsd: PATHCONF(3) %s\n",
 				SVCFH_fmt(&argp->fh));
@@ -623,7 +627,7 @@ static __be32
 nfsd3_proc_commit(struct svc_rqst * rqstp, struct nfsd3_commitargs *argp,
 					   struct nfsd3_commitres  *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 
 	dprintk("nfsd: COMMIT(3)   %s %u@%Lu\n",
 				SVCFH_fmt(&argp->fh),
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 03ab6822291f..ec983b777680 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -36,16 +36,16 @@ nfsd_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 	return nfs_ok;
 }
 
-static int
-nfsd_return_attrs(int err, struct nfsd_attrstat *resp)
+static __be32
+nfsd_return_attrs(__be32 err, struct nfsd_attrstat *resp)
 {
 	if (err) return err;
 	return nfserrno(vfs_getattr(resp->fh.fh_export->ex_mnt,
 				    resp->fh.fh_dentry,
 				    &resp->stat));
 }
-static int
-nfsd_return_dirop(int err, struct nfsd_diropres *resp)
+static __be32
+nfsd_return_dirop(__be32 err, struct nfsd_diropres *resp)
 {
 	if (err) return err;
 	return nfserrno(vfs_getattr(resp->fh.fh_export->ex_mnt,
@@ -60,7 +60,7 @@ static __be32
 nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
 					  struct nfsd_attrstat *resp)
 {
-	int nfserr;
+	__be32 nfserr;
 	dprintk("nfsd: GETATTR  %s\n", SVCFH_fmt(&argp->fh));
 
 	fh_copy(&resp->fh, &argp->fh);
@@ -76,7 +76,7 @@ static __be32
 nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
 					  struct nfsd_attrstat  *resp)
 {
-	int nfserr;
+	__be32 nfserr;
 	dprintk("nfsd: SETATTR  %s, valid=%x, size=%ld\n",
 		SVCFH_fmt(&argp->fh),
 		argp->attrs.ia_valid, (long) argp->attrs.ia_size);
@@ -96,7 +96,7 @@ static __be32
 nfsd_proc_lookup(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
 					 struct nfsd_diropres  *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 
 	dprintk("nfsd: LOOKUP   %s %.*s\n",
 		SVCFH_fmt(&argp->fh), argp->len, argp->name);
@@ -116,7 +116,7 @@ static __be32
 nfsd_proc_readlink(struct svc_rqst *rqstp, struct nfsd_readlinkargs *argp,
 					   struct nfsd_readlinkres *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 
 	dprintk("nfsd: READLINK %s\n", SVCFH_fmt(&argp->fh));
 
@@ -136,7 +136,7 @@ static __be32
 nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
 				       struct nfsd_readres  *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 
 	dprintk("nfsd: READ    %s %d bytes at %d\n",
 		SVCFH_fmt(&argp->fh),
@@ -176,7 +176,7 @@ static __be32
 nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
 					struct nfsd_attrstat  *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 	int	stable = 1;
 
 	dprintk("nfsd: WRITE    %s %d bytes at %d\n",
@@ -206,7 +206,8 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
 	struct iattr	*attr = &argp->attrs;
 	struct inode	*inode;
 	struct dentry	*dchild;
-	int		nfserr, type, mode;
+	int		type, mode;
+	__be32		nfserr;
 	dev_t		rdev = 0, wanted = new_decode_dev(attr->ia_size);
 
 	dprintk("nfsd: CREATE   %s %.*s\n",
@@ -352,7 +353,7 @@ static __be32
 nfsd_proc_remove(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
 					 void		       *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 
 	dprintk("nfsd: REMOVE   %s %.*s\n", SVCFH_fmt(&argp->fh),
 		argp->len, argp->name);
@@ -367,7 +368,7 @@ static __be32
 nfsd_proc_rename(struct svc_rqst *rqstp, struct nfsd_renameargs *argp,
 				  	 void		        *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 
 	dprintk("nfsd: RENAME   %s %.*s -> \n",
 		SVCFH_fmt(&argp->ffh), argp->flen, argp->fname);
@@ -385,7 +386,7 @@ static __be32
 nfsd_proc_link(struct svc_rqst *rqstp, struct nfsd_linkargs *argp,
 				void			    *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 
 	dprintk("nfsd: LINK     %s ->\n",
 		SVCFH_fmt(&argp->ffh));
@@ -406,7 +407,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp,
 				          void			  *resp)
 {
 	struct svc_fh	newfh;
-	int		nfserr;
+	__be32		nfserr;
 
 	dprintk("nfsd: SYMLINK  %s %.*s -> %.*s\n",
 		SVCFH_fmt(&argp->ffh), argp->flen, argp->fname,
@@ -434,7 +435,7 @@ static __be32
 nfsd_proc_mkdir(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
 					struct nfsd_diropres   *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 
 	dprintk("nfsd: MKDIR    %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name);
 
@@ -458,7 +459,7 @@ static __be32
 nfsd_proc_rmdir(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
 				 	void		      *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 
 	dprintk("nfsd: RMDIR    %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name);
 
@@ -474,7 +475,8 @@ static __be32
 nfsd_proc_readdir(struct svc_rqst *rqstp, struct nfsd_readdirargs *argp,
 					  struct nfsd_readdirres  *resp)
 {
-	int		nfserr, count;
+	int		count;
+	__be32		nfserr;
 	loff_t		offset;
 
 	dprintk("nfsd: READDIR  %s %d bytes at %d\n",
@@ -513,7 +515,7 @@ static __be32
 nfsd_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle   *argp,
 					  struct nfsd_statfsres *resp)
 {
-	int	nfserr;
+	__be32	nfserr;
 
 	dprintk("nfsd: STATFS   %s\n", SVCFH_fmt(&argp->fh));
 
-- 
cgit v1.2.3


From b8dd7b9ab194d9ab322881f49fde42954757efae Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:29:01 -0700
Subject: [PATCH] nfsd: NFSv4 errno endianness annotations

don't use the same variable to store NFS and host error values

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 15 +++++++++------
 fs/nfsd/nfs4xdr.c   | 42 ++++++++++++++++++++++--------------------
 2 files changed, 31 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e5ca6d7028df..ae1d47715b90 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2646,6 +2646,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	struct file_lock conflock;
 	__be32 status = 0;
 	unsigned int strhashval;
+	int err;
 
 	dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n",
 		(long long) lock->lk_offset,
@@ -2758,13 +2759,14 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	 * locks_copy_lock: */
 	conflock.fl_ops = NULL;
 	conflock.fl_lmops = NULL;
-	status = posix_lock_file_conf(filp, &file_lock, &conflock);
+	err = posix_lock_file_conf(filp, &file_lock, &conflock);
 	dprintk("NFSD: nfsd4_lock: posix_lock_file_conf status %d\n",status);
-	switch (-status) {
+	switch (-err) {
 	case 0: /* success! */
 		update_stateid(&lock_stp->st_stateid);
 		memcpy(&lock->lk_resp_stateid, &lock_stp->st_stateid, 
 				sizeof(stateid_t));
+		status = 0;
 		break;
 	case (EAGAIN):		/* conflock holds conflicting lock */
 		status = nfserr_denied;
@@ -2775,7 +2777,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 		status = nfserr_deadlock;
 		break;
 	default:        
-		dprintk("NFSD: nfsd4_lock: posix_lock_file_conf() failed! status %d\n",status);
+		dprintk("NFSD: nfsd4_lock: posix_lock_file_conf() failed! status %d\n",err);
 		status = nfserr_resource;
 		break;
 	}
@@ -2880,6 +2882,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	struct file *filp = NULL;
 	struct file_lock file_lock;
 	__be32 status;
+	int err;
 						        
 	dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n",
 		(long long) locku->lu_offset,
@@ -2917,8 +2920,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	/*
 	*  Try to unlock the file in the VFS.
 	*/
-	status = posix_lock_file(filp, &file_lock); 
-	if (status) {
+	err = posix_lock_file(filp, &file_lock);
+	if (err) {
 		dprintk("NFSD: nfs4_locku: posix_lock_file failed!\n");
 		goto out_nfserr;
 	}
@@ -2937,7 +2940,7 @@ out:
 	return status;
 
 out_nfserr:
-	status = nfserrno(status);
+	status = nfserrno(err);
 	goto out;
 }
 
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index d7b630f1a9ae..f3f239db04bb 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -247,6 +247,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia
 	int expected_len, len = 0;
 	u32 dummy32;
 	char *buf;
+	int host_err;
 
 	DECODE_HEAD;
 	iattr->ia_valid = 0;
@@ -280,7 +281,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia
 
 		*acl = nfs4_acl_new();
 		if (*acl == NULL) {
-			status = -ENOMEM;
+			host_err = -ENOMEM;
 			goto out_nfserr;
 		}
 		defer_free(argp, (void (*)(const void *))nfs4_acl_free, *acl);
@@ -295,20 +296,20 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia
 			len += XDR_QUADLEN(dummy32) << 2;
 			READMEM(buf, dummy32);
 			ace.whotype = nfs4_acl_get_whotype(buf, dummy32);
-			status = 0;
+			host_err = 0;
 			if (ace.whotype != NFS4_ACL_WHO_NAMED)
 				ace.who = 0;
 			else if (ace.flag & NFS4_ACE_IDENTIFIER_GROUP)
-				status = nfsd_map_name_to_gid(argp->rqstp,
+				host_err = nfsd_map_name_to_gid(argp->rqstp,
 						buf, dummy32, &ace.who);
 			else
-				status = nfsd_map_name_to_uid(argp->rqstp,
+				host_err = nfsd_map_name_to_uid(argp->rqstp,
 						buf, dummy32, &ace.who);
-			if (status)
+			if (host_err)
 				goto out_nfserr;
-			status = nfs4_acl_add_ace(*acl, ace.type, ace.flag,
+			host_err = nfs4_acl_add_ace(*acl, ace.type, ace.flag,
 				 ace.access_mask, ace.whotype, ace.who);
-			if (status)
+			if (host_err)
 				goto out_nfserr;
 		}
 	} else
@@ -327,7 +328,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia
 		READ_BUF(dummy32);
 		len += (XDR_QUADLEN(dummy32) << 2);
 		READMEM(buf, dummy32);
-		if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid)))
+		if ((host_err = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid)))
 			goto out_nfserr;
 		iattr->ia_valid |= ATTR_UID;
 	}
@@ -338,7 +339,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia
 		READ_BUF(dummy32);
 		len += (XDR_QUADLEN(dummy32) << 2);
 		READMEM(buf, dummy32);
-		if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid)))
+		if ((host_err = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid)))
 			goto out_nfserr;
 		iattr->ia_valid |= ATTR_GID;
 	}
@@ -414,7 +415,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia
 	DECODE_TAIL;
 
 out_nfserr:
-	status = nfserrno(status);
+	status = nfserrno(host_err);
 	goto out;
 }
 
@@ -1438,6 +1439,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 	u32 rdattr_err = 0;
 	__be32 *p = buffer;
 	__be32 status;
+	int err;
 	int aclsupport = 0;
 	struct nfs4_acl *acl = NULL;
 
@@ -1451,14 +1453,14 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 			goto out;
 	}
 
-	status = vfs_getattr(exp->ex_mnt, dentry, &stat);
-	if (status)
+	err = vfs_getattr(exp->ex_mnt, dentry, &stat);
+	if (err)
 		goto out_nfserr;
 	if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL)) ||
 	    (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
 		       FATTR4_WORD1_SPACE_TOTAL))) {
-		status = vfs_statfs(dentry, &statfs);
-		if (status)
+		err = vfs_statfs(dentry, &statfs);
+		if (err)
 			goto out_nfserr;
 	}
 	if ((bmval0 & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && !fhp) {
@@ -1470,15 +1472,15 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 	}
 	if (bmval0 & (FATTR4_WORD0_ACL | FATTR4_WORD0_ACLSUPPORT
 			| FATTR4_WORD0_SUPPORTED_ATTRS)) {
-		status = nfsd4_get_nfs4_acl(rqstp, dentry, &acl);
-		aclsupport = (status == 0);
+		err = nfsd4_get_nfs4_acl(rqstp, dentry, &acl);
+		aclsupport = (err == 0);
 		if (bmval0 & FATTR4_WORD0_ACL) {
-			if (status == -EOPNOTSUPP)
+			if (err == -EOPNOTSUPP)
 				bmval0 &= ~FATTR4_WORD0_ACL;
-			else if (status == -EINVAL) {
+			else if (err == -EINVAL) {
 				status = nfserr_attrnotsupp;
 				goto out;
-			} else if (status != 0)
+			} else if (err != 0)
 				goto out_nfserr;
 		}
 	}
@@ -1818,7 +1820,7 @@ out:
 		fh_put(&tempfh);
 	return status;
 out_nfserr:
-	status = nfserrno(status);
+	status = nfserrno(err);
 	goto out;
 out_resource:
 	*countp = 0;
-- 
cgit v1.2.3


From f00f328fda1eeec575cd0f360da81b66bf4133a1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:29:01 -0700
Subject: [PATCH] xdr annotations: nfsd callback*

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4callback.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 324a278f2808..8497ed4862b2 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -85,8 +85,8 @@ enum nfs_cb_opnum4 {
 /*
 * Generic encode routines from fs/nfs/nfs4xdr.c
 */
-static inline u32 *
-xdr_writemem(u32 *p, const void *ptr, int nbytes)
+static inline __be32 *
+xdr_writemem(__be32 *p, const void *ptr, int nbytes)
 {
 	int tmp = XDR_QUADLEN(nbytes);
 	if (!tmp)
@@ -205,7 +205,7 @@ nfs_cb_stat_to_errno(int stat)
 static int
 encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr)
 {
-	u32 * p;
+	__be32 * p;
 
 	RESERVE_SPACE(16);
 	WRITE32(0);            /* tag length is always 0 */
@@ -218,7 +218,7 @@ encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr)
 static int
 encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec)
 {
-	u32 *p;
+	__be32 *p;
 	int len = cb_rec->cbr_fhlen;
 
 	RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len);
@@ -231,7 +231,7 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec)
 }
 
 static int
-nfs4_xdr_enc_cb_null(struct rpc_rqst *req, u32 *p)
+nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
 {
 	struct xdr_stream xdrs, *xdr = &xdrs;
 
@@ -241,7 +241,7 @@ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, u32 *p)
 }
 
 static int
-nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, u32 *p, struct nfs4_cb_recall *args)
+nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct nfs4_cb_recall *args)
 {
 	struct xdr_stream xdr;
 	struct nfs4_cb_compound_hdr hdr = {
@@ -257,7 +257,7 @@ nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, u32 *p, struct nfs4_cb_recall *args
 
 static int
 decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){
-        u32 *p;
+        __be32 *p;
 
         READ_BUF(8);
         READ32(hdr->status);
@@ -272,7 +272,7 @@ decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr)
 static int
 decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
 {
-	u32 *p;
+	__be32 *p;
 	u32 op;
 	int32_t nfserr;
 
@@ -291,13 +291,13 @@ decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
 }
 
 static int
-nfs4_xdr_dec_cb_null(struct rpc_rqst *req, u32 *p)
+nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p)
 {
 	return 0;
 }
 
 static int
-nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, u32 *p)
+nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p)
 {
 	struct xdr_stream xdr;
 	struct nfs4_cb_compound_hdr hdr;
-- 
cgit v1.2.3


From c7afef1f963bec198b186cc34b9e8c9b9ce2e266 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:29:02 -0700
Subject: [PATCH] nfsd: misc endianness annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/export.c       | 4 ++--
 fs/nfsd/lockd.c        | 2 +-
 fs/nfsd/nfs4callback.c | 2 +-
 fs/nfsd/nfs4state.c    | 4 ++--
 fs/nfsd/nfscache.c     | 8 ++++----
 fs/nfsd/nfssvc.c       | 2 +-
 6 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index e13fa23bd108..f37df46d2eaa 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1148,12 +1148,12 @@ exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv,
  * for a given NFSv4 client.   The root is defined to be the
  * export point with fsid==0
  */
-int
+__be32
 exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
 	       struct cache_req *creq)
 {
 	struct svc_export *exp;
-	int rv;
+	__be32 rv;
 	u32 fsidv[2];
 
 	mk_fsid_v1(fsidv, 0);
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 9b9e7e127c03..11fdaf7721b4 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -25,7 +25,7 @@
 static u32
 nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
 {
-	u32		nfserr;
+	__be32		nfserr;
 	struct svc_fh	fh;
 
 	/* must initialize before using! but maxsize doesn't matter */
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 8497ed4862b2..f57655a7a2b6 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -461,7 +461,7 @@ nfs4_cb_null(struct rpc_task *task, void *dummy)
 {
 	struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp;
 	struct nfs4_callback *cb = &clp->cl_callback;
-	u32 addr = htonl(cb->cb_addr);
+	__be32 addr = htonl(cb->cb_addr);
 
 	dprintk("NFSD: nfs4_cb_null task->tk_status %d\n", task->tk_status);
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index ae1d47715b90..2e468c9e64d9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -713,7 +713,7 @@ out_err:
 __be32
 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 {
-	u32 			ip_addr = rqstp->rq_addr.sin_addr.s_addr;
+	__be32 			ip_addr = rqstp->rq_addr.sin_addr.s_addr;
 	struct xdr_netobj 	clname = { 
 		.len = setclid->se_namelen,
 		.data = setclid->se_name,
@@ -878,7 +878,7 @@ out:
 __be32
 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confirm *setclientid_confirm)
 {
-	u32 ip_addr = rqstp->rq_addr.sin_addr.s_addr;
+	__be32 ip_addr = rqstp->rq_addr.sin_addr.s_addr;
 	struct nfs4_client *conf, *unconf;
 	nfs4_verifier confirm = setclientid_confirm->sc_confirm; 
 	clientid_t * clid = &setclientid_confirm->sc_clientid;
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index fdf7cf3dfadc..6100bbe27432 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -29,7 +29,7 @@
  */
 #define CACHESIZE		1024
 #define HASHSIZE		64
-#define REQHASH(xid)		((((xid) >> 24) ^ (xid)) & (HASHSIZE-1))
+#define REQHASH(xid)		(((((__force __u32)xid) >> 24) ^ ((__force __u32)xid)) & (HASHSIZE-1))
 
 static struct hlist_head *	hash_list;
 static struct list_head 	lru_head;
@@ -127,8 +127,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type)
 	struct hlist_node	*hn;
 	struct hlist_head 	*rh;
 	struct svc_cacherep	*rp;
-	u32			xid = rqstp->rq_xid,
-				proto =  rqstp->rq_prot,
+	__be32			xid = rqstp->rq_xid;
+	u32			proto =  rqstp->rq_prot,
 				vers = rqstp->rq_vers,
 				proc = rqstp->rq_proc;
 	unsigned long		age;
@@ -258,7 +258,7 @@ found_entry:
  * In this case, nfsd_cache_update is called with statp == NULL.
  */
 void
-nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, u32 *statp)
+nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
 {
 	struct svc_cacherep *rp;
 	struct kvec	*resv = &rqstp->rq_res.head[0], *cachv;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 8067118b1c0c..0aaccb03bf76 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -491,7 +491,7 @@ out:
 }
 
 int
-nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp)
+nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 {
 	struct svc_procedure	*proc;
 	kxdrproc_t		xdr;
-- 
cgit v1.2.3


From a90b061c0bf712961cea40d9c916b300073d12e5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:29:03 -0700
Subject: [PATCH] nfsd: nfs_replay_me

We are using NFS_REPLAY_ME as a special error value that is never leaked to
clients.  That works fine; the only problem is mixing host- and network-
endian values in the same objects.  Network-endian equivalent would work just
as fine; switch to it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c  | 6 +++---
 fs/nfsd/nfs4state.c | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 63823945f972..0a7bbdc4a10a 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -177,7 +177,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
 
 	/* check seqid for replay. set nfs4_owner */
 	status = nfsd4_process_open1(open);
-	if (status == NFSERR_REPLAY_ME) {
+	if (status == nfserr_replay_me) {
 		struct nfs4_replay *rp = &open->op_stateowner->so_replay;
 		fh_put(current_fh);
 		current_fh->fh_handle.fh_size = rp->rp_openfh_len;
@@ -188,7 +188,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
 			dprintk("nfsd4_open: replay failed"
 				" restoring previous filehandle\n");
 		else
-			status = NFSERR_REPLAY_ME;
+			status = nfserr_replay_me;
 	}
 	if (status)
 		goto out;
@@ -937,7 +937,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 		}
 
 encode_op:
-		if (op->status == NFSERR_REPLAY_ME) {
+		if (op->status == nfserr_replay_me) {
 			op->replay = &replay_owner->so_replay;
 			nfsd4_encode_replay(resp, op);
 			status = op->status = op->replay->rp_status;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2e468c9e64d9..293b6495829f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1477,7 +1477,7 @@ nfsd4_process_open1(struct nfsd4_open *open)
 	}
 	if (open->op_seqid == sop->so_seqid - 1) {
 		if (sop->so_replay.rp_buflen)
-			return NFSERR_REPLAY_ME;
+			return nfserr_replay_me;
 		/* The original OPEN failed so spectacularly
 		 * that we don't even have replay data saved!
 		 * Therefore, we have no choice but to continue
@@ -2233,7 +2233,7 @@ check_replay:
 	if (seqid == sop->so_seqid - 1) {
 		dprintk("NFSD: preprocess_seqid_op: retransmission?\n");
 		/* indicate replay to calling function */
-		return NFSERR_REPLAY_ME;
+		return nfserr_replay_me;
 	}
 	printk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d)\n",
 			sop->so_seqid, seqid);
-- 
cgit v1.2.3


From 3e2a532b26b491706bd8b5c7cfc8d767b43b8f36 Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Thu, 19 Oct 2006 23:29:11 -0700
Subject: [PATCH] ext3/4: fix J_ASSERT(transaction->t_updates > 0) in
 journal_stop()

A disk generated some I/O error, after it, I hitted
J_ASSERT(transaction->t_updates > 0) in journal_stop().

It seems to happened on ext3_truncate() path from stack trace. Then,
maybe the following case may trigger J_ASSERT(transaction->t_updates > 0).

ext3_truncate()
    -> ext3_free_branches()
        -> ext3_journal_test_restart()
	    -> ext3_journal_restart()
                -> journal_restart()
                transaction->t_updates--;
                /* another process aborted journal */
                    -> start_this_handle()
		    returns -EROFS without transaction->t_updates++;

    -> ext3_journal_stop()
        -> journal_stop()
	J_ASSERT(transaction->t_updates > 0)

If journal was aborted in middle of journal_restart(), ext3_truncate()
may trigger J_ASSERT().

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd/transaction.c  | 5 +++--
 fs/jbd2/transaction.c | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index e1b3c8af4d17..d5c63047a8b3 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1314,13 +1314,14 @@ int journal_stop(handle_t *handle)
 	int old_handle_count, err;
 	pid_t pid;
 
-	J_ASSERT(transaction->t_updates > 0);
 	J_ASSERT(journal_current_handle() == handle);
 
 	if (is_handle_aborted(handle))
 		err = -EIO;
-	else
+	else {
+		J_ASSERT(transaction->t_updates > 0);
 		err = 0;
+	}
 
 	if (--handle->h_ref > 0) {
 		jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 149957bef907..b6cf2be845a1 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1314,13 +1314,14 @@ int jbd2_journal_stop(handle_t *handle)
 	int old_handle_count, err;
 	pid_t pid;
 
-	J_ASSERT(transaction->t_updates > 0);
 	J_ASSERT(journal_current_handle() == handle);
 
 	if (is_handle_aborted(handle))
 		err = -EIO;
-	else
+	else {
+		J_ASSERT(transaction->t_updates > 0);
 		err = 0;
+	}
 
 	if (--handle->h_ref > 0) {
 		jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
-- 
cgit v1.2.3


From 79cd22d3ac921b9209bf813c7e75e6b69e74896c Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Thu, 12 Oct 2006 14:29:33 +0900
Subject: ocfs2: delete redundant memcmp()

This patch deletes redundant memcmp() while looking up in rb tree.

Signed-off-by: Akinbou Mita <akinobu.mita@gmail.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/cluster/nodemanager.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index e1fceb8aa32d..d11753c50bc1 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -152,14 +152,16 @@ static struct o2nm_node *o2nm_node_ip_tree_lookup(struct o2nm_cluster *cluster,
 	struct o2nm_node *node, *ret = NULL;
 
 	while (*p) {
+		int cmp;
+
 		parent = *p;
 		node = rb_entry(parent, struct o2nm_node, nd_ip_node);
 
-		if (memcmp(&ip_needle, &node->nd_ipv4_address,
-		           sizeof(ip_needle)) < 0)
+		cmp = memcmp(&ip_needle, &node->nd_ipv4_address,
+				sizeof(ip_needle));
+		if (cmp < 0)
 			p = &(*p)->rb_left;
-		else if (memcmp(&ip_needle, &node->nd_ipv4_address,
-			        sizeof(ip_needle)) > 0)
+		else if (cmp > 0)
 			p = &(*p)->rb_right;
 		else {
 			ret = node;
-- 
cgit v1.2.3


From 711a40fcaa83bfad87736544b69f6fdd6527482d Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Wed, 11 Oct 2006 12:23:02 -0700
Subject: ocfs2: remove spurious d_count check in ocfs2_rename()

This was causing some folks to incorrectly get -EBUSY during rename.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/namei.c | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 259155f0eb2e..a57b751d4f40 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1085,14 +1085,6 @@ static int ocfs2_rename(struct inode *old_dir,
 			BUG();
 	}
 
-	if (atomic_read(&old_dentry->d_count) > 2) {
-		shrink_dcache_parent(old_dentry);
-		if (atomic_read(&old_dentry->d_count) > 2) {
-			status = -EBUSY;
-			goto bail;
-		}
-	}
-
 	/* Assume a directory heirarchy thusly:
 	 * a/b/c
 	 * a/d
-- 
cgit v1.2.3


From 0effef776ff95b7a6d6e48a2ef407ecaa8c21f96 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Tue, 3 Oct 2006 17:44:42 -0700
Subject: ocfs2: fix page zeroing during simple extends

The page zeroing code was missing the region between old i_size and new
i_size for those extends that didn't actually require a change in space
allocation.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/file.c | 44 ++++++++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index d9ba0a931a03..b499c329257b 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -728,31 +728,36 @@ static int ocfs2_extend_file(struct inode *inode,
 	clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size) - 
 		OCFS2_I(inode)->ip_clusters;
 
-	if (clusters_to_add) {
-		/* 
-		 * protect the pages that ocfs2_zero_extend is going to
-		 * be pulling into the page cache.. we do this before the
-		 * metadata extend so that we don't get into the situation
-		 * where we've extended the metadata but can't get the data
-		 * lock to zero.
-		 */
-		ret = ocfs2_data_lock(inode, 1);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto out;
-		}
+	/* 
+	 * protect the pages that ocfs2_zero_extend is going to be
+	 * pulling into the page cache.. we do this before the
+	 * metadata extend so that we don't get into the situation
+	 * where we've extended the metadata but can't get the data
+	 * lock to zero.
+	 */
+	ret = ocfs2_data_lock(inode, 1);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
 
+	if (clusters_to_add) {
 		ret = ocfs2_extend_allocation(inode, clusters_to_add);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto out_unlock;
 		}
+	}
 
-		ret = ocfs2_zero_extend(inode, (u64)new_i_size - tail_to_skip);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto out_unlock;
-		}
+	/*
+	 * Call this even if we don't add any clusters to the tree. We
+	 * still need to zero the area between the old i_size and the
+	 * new i_size.
+	 */
+	ret = ocfs2_zero_extend(inode, (u64)new_i_size - tail_to_skip);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out_unlock;
 	}
 
 	if (!tail_to_skip) {
@@ -764,8 +769,7 @@ static int ocfs2_extend_file(struct inode *inode,
 	}
 
 out_unlock:
-	if (clusters_to_add) /* this is the only case in which we lock */
-		ocfs2_data_unlock(inode, 1);
+	ocfs2_data_unlock(inode, 1);
 
 out:
 	return ret;
-- 
cgit v1.2.3


From e2057c5a63821e17c8a54dab6db680c77ce7ee6c Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Tue, 3 Oct 2006 17:53:05 -0700
Subject: ocfs2: cond_resched() in ocfs2_zero_extend()

The loop within ocfs2_zero_extend() can execute for a long time, causing
spurious soft lockup warnings.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/file.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index b499c329257b..1be74c4e7814 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -30,6 +30,7 @@
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
 #include <linux/uio.h>
+#include <linux/sched.h>
 
 #define MLOG_MASK_PREFIX ML_INODE
 #include <cluster/masklog.h>
@@ -691,6 +692,12 @@ static int ocfs2_zero_extend(struct inode *inode,
 		}
 
 		start_off += sb->s_blocksize;
+
+		/*
+		 * Very large extends have the potential to lock up
+		 * the cpu for extended periods of time.
+		 */
+		cond_resched();
 	}
 
 out:
-- 
cgit v1.2.3


From 559c9ac391c046710bdeee5581dc5d9dda794881 Mon Sep 17 00:00:00 2001
From: Chandra Seetharaman <sekharan@us.ibm.com>
Date: Tue, 10 Oct 2006 15:15:55 -0700
Subject: configfs: handle kzalloc() failure in check_perm()

check_perm() does not drop the reference to the module when kzalloc()
failure occurs.

Signed-Off-By: Chandra Seetharaman <sekharan@us.ibm.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/configfs/file.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index e6d5754a715e..cf33fac68c84 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -275,13 +275,14 @@ static int check_perm(struct inode * inode, struct file * file)
 	 * it in file->private_data for easy access.
 	 */
 	buffer = kzalloc(sizeof(struct configfs_buffer),GFP_KERNEL);
-	if (buffer) {
-		init_MUTEX(&buffer->sem);
-		buffer->needs_read_fill = 1;
-		buffer->ops = ops;
-		file->private_data = buffer;
-	} else
+	if (!buffer) {
 		error = -ENOMEM;
+		goto Enomem;
+	}
+	init_MUTEX(&buffer->sem);
+	buffer->needs_read_fill = 1;
+	buffer->ops = ops;
+	file->private_data = buffer;
 	goto Done;
 
  Einval:
@@ -289,6 +290,7 @@ static int check_perm(struct inode * inode, struct file * file)
 	goto Done;
  Eaccess:
 	error = -EACCES;
+ Enomem:
 	module_put(attr->ca_owner);
  Done:
 	if (error && item)
-- 
cgit v1.2.3


From 9eaef27b36a6b716384948da94b8fc5bfba7b712 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 21 Oct 2006 10:24:20 -0700
Subject: [PATCH] VFS: Make d_materialise_unique() enforce directory uniqueness

If the caller tries to instantiate a directory using an inode that already
has a dentry alias, then we attempt to rename the existing dentry instead
of instantiating a new one.  Fail with an ELOOP error if the rename would
affect one of our parent directories.

This behaviour is needed in order to avoid issues such as

  http://bugzilla.kernel.org/show_bug.cgi?id=7178

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Maneesh Soni <maneesh@in.ibm.com>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dcache.c  | 137 +++++++++++++++++++++++++++++++++++++++++++----------------
 fs/nfs/dir.c |   7 ++-
 2 files changed, 106 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 2bac4ba1d1d3..a1ff91eef108 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1469,23 +1469,21 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
  * deleted it.
  */
  
-/**
- * d_move - move a dentry
+/*
+ * d_move_locked - move a dentry
  * @dentry: entry to move
  * @target: new dentry
  *
  * Update the dcache to reflect the move of a file name. Negative
  * dcache entries should not be moved in this way.
  */
-
-void d_move(struct dentry * dentry, struct dentry * target)
+static void d_move_locked(struct dentry * dentry, struct dentry * target)
 {
 	struct hlist_head *list;
 
 	if (!dentry->d_inode)
 		printk(KERN_WARNING "VFS: moving negative dcache entry\n");
 
-	spin_lock(&dcache_lock);
 	write_seqlock(&rename_lock);
 	/*
 	 * XXXX: do we really need to take target->d_lock?
@@ -1536,9 +1534,83 @@ already_unhashed:
 	fsnotify_d_move(dentry);
 	spin_unlock(&dentry->d_lock);
 	write_sequnlock(&rename_lock);
+}
+
+/**
+ * d_move - move a dentry
+ * @dentry: entry to move
+ * @target: new dentry
+ *
+ * Update the dcache to reflect the move of a file name. Negative
+ * dcache entries should not be moved in this way.
+ */
+
+void d_move(struct dentry * dentry, struct dentry * target)
+{
+	spin_lock(&dcache_lock);
+	d_move_locked(dentry, target);
 	spin_unlock(&dcache_lock);
 }
 
+/*
+ * Helper that returns 1 if p1 is a parent of p2, else 0
+ */
+static int d_isparent(struct dentry *p1, struct dentry *p2)
+{
+	struct dentry *p;
+
+	for (p = p2; p->d_parent != p; p = p->d_parent) {
+		if (p->d_parent == p1)
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * This helper attempts to cope with remotely renamed directories
+ *
+ * It assumes that the caller is already holding
+ * dentry->d_parent->d_inode->i_mutex and the dcache_lock
+ *
+ * Note: If ever the locking in lock_rename() changes, then please
+ * remember to update this too...
+ *
+ * On return, dcache_lock will have been unlocked.
+ */
+static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
+{
+	struct mutex *m1 = NULL, *m2 = NULL;
+	struct dentry *ret;
+
+	/* If alias and dentry share a parent, then no extra locks required */
+	if (alias->d_parent == dentry->d_parent)
+		goto out_unalias;
+
+	/* Check for loops */
+	ret = ERR_PTR(-ELOOP);
+	if (d_isparent(alias, dentry))
+		goto out_err;
+
+	/* See lock_rename() */
+	ret = ERR_PTR(-EBUSY);
+	if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
+		goto out_err;
+	m1 = &dentry->d_sb->s_vfs_rename_mutex;
+	if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
+		goto out_err;
+	m2 = &alias->d_parent->d_inode->i_mutex;
+out_unalias:
+	d_move_locked(alias, dentry);
+	ret = alias;
+out_err:
+	spin_unlock(&dcache_lock);
+	if (m2)
+		mutex_unlock(m2);
+	if (m1)
+		mutex_unlock(m1);
+	return ret;
+}
+
 /*
  * Prepare an anonymous dentry for life in the superblock's dentry tree as a
  * named dentry in place of the dentry to be replaced.
@@ -1581,7 +1653,7 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
  */
 struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
 {
-	struct dentry *alias, *actual;
+	struct dentry *actual;
 
 	BUG_ON(!d_unhashed(dentry));
 
@@ -1593,26 +1665,27 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
 		goto found_lock;
 	}
 
-	/* See if a disconnected directory already exists as an anonymous root
-	 * that we should splice into the tree instead */
-	if (S_ISDIR(inode->i_mode) && (alias = __d_find_alias(inode, 1))) {
-		spin_lock(&alias->d_lock);
-
-		/* Is this a mountpoint that we could splice into our tree? */
-		if (IS_ROOT(alias))
-			goto connect_mountpoint;
-
-		if (alias->d_name.len == dentry->d_name.len &&
-		    alias->d_parent == dentry->d_parent &&
-		    memcmp(alias->d_name.name,
-			   dentry->d_name.name,
-			   dentry->d_name.len) == 0)
-			goto replace_with_alias;
-
-		spin_unlock(&alias->d_lock);
-
-		/* Doh! Seem to be aliasing directories for some reason... */
-		dput(alias);
+	if (S_ISDIR(inode->i_mode)) {
+		struct dentry *alias;
+
+		/* Does an aliased dentry already exist? */
+		alias = __d_find_alias(inode, 0);
+		if (alias) {
+			actual = alias;
+			/* Is this an anonymous mountpoint that we could splice
+			 * into our tree? */
+			if (IS_ROOT(alias)) {
+				spin_lock(&alias->d_lock);
+				__d_materialise_dentry(dentry, alias);
+				__d_drop(alias);
+				goto found;
+			}
+			/* Nope, but we must(!) avoid directory aliasing */
+			actual = __d_unalias(dentry, alias);
+			if (IS_ERR(actual))
+				dput(alias);
+			goto out_nolock;
+		}
 	}
 
 	/* Add a unique reference */
@@ -1628,7 +1701,7 @@ found:
 	_d_rehash(actual);
 	spin_unlock(&actual->d_lock);
 	spin_unlock(&dcache_lock);
-
+out_nolock:
 	if (actual == dentry) {
 		security_d_instantiate(dentry, inode);
 		return NULL;
@@ -1637,16 +1710,6 @@ found:
 	iput(inode);
 	return actual;
 
-	/* Convert the anonymous/root alias into an ordinary dentry */
-connect_mountpoint:
-	__d_materialise_dentry(dentry, alias);
-
-	/* Replace the candidate dentry with the alias in the tree */
-replace_with_alias:
-	__d_drop(alias);
-	actual = alias;
-	goto found;
-
 shouldnt_be_hashed:
 	spin_unlock(&dcache_lock);
 	BUG();
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 4133ef5264e5..27b5a1051b1c 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -935,8 +935,11 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 
 no_entry:
 	res = d_materialise_unique(dentry, inode);
-	if (res != NULL)
+	if (res != NULL) {
+		if (IS_ERR(res))
+			goto out_unlock;
 		dentry = res;
+	}
 	nfs_renew_times(dentry);
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 out_unlock:
@@ -1132,6 +1135,8 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
 	alias = d_materialise_unique(dentry, inode);
 	if (alias != NULL) {
 		dput(dentry);
+		if (IS_ERR(alias))
+			return NULL;
 		dentry = alias;
 	}
 
-- 
cgit v1.2.3


From fc22617e451f23b466d4d63bb016f5f6111b69e4 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 21 Oct 2006 10:24:24 -0700
Subject: [PATCH] NFS: Cache invalidation fixup

If someone has renamed a directory on the server, triggering the d_move
code in d_materialise_unique(), then we need to invalidate the cached
directory information in the source parent directory.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Maneesh Soni <maneesh@in.ibm.com>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/dir.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 27b5a1051b1c..b34cd16f472f 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -936,8 +936,14 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 no_entry:
 	res = d_materialise_unique(dentry, inode);
 	if (res != NULL) {
+		struct dentry *parent;
 		if (IS_ERR(res))
 			goto out_unlock;
+		/* Was a directory renamed! */
+		parent = dget_parent(res);
+		if (!IS_ROOT(parent))
+			nfs_mark_for_revalidate(parent->d_inode);
+		dput(parent);
 		dentry = res;
 	}
 	nfs_renew_times(dentry);
-- 
cgit v1.2.3


From bcbaecbb9968750d4bfb2686a97e396f681f88ef Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 25 Oct 2006 16:49:36 +1000
Subject: [CRYPTO] users: Select ECB/CBC where needed

CRYPTO_MANAGER is selected automatically by CONFIG_ECB and CONFIG_CBC.

config CRYPTO_ECB
        tristate "ECB support"
        select CRYPTO_BLKCIPHER
        select CRYPTO_MANAGER


I've added CONFIG_ECB to the ones you mentioned and CONFIG_CBC to
gssapi.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 fs/Kconfig | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index fee318e6f4bb..133dcc8a4150 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1777,6 +1777,7 @@ config RPCSEC_GSS_KRB5
 	select CRYPTO
 	select CRYPTO_MD5
 	select CRYPTO_DES
+	select CRYPTO_CBC
 	help
 	  Provides for secure RPC calls by means of a gss-api
 	  mechanism based on Kerberos V5. This is required for
@@ -1795,6 +1796,7 @@ config RPCSEC_GSS_SPKM3
 	select CRYPTO_MD5
 	select CRYPTO_DES
 	select CRYPTO_CAST5
+	select CRYPTO_CBC
 	help
 	  Provides for secure RPC calls by means of a gss-api
 	  mechanism based on the SPKM3 public-key mechanism.
-- 
cgit v1.2.3


From 2ae88149a27cadf2840e0ab8155bef13be285c03 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Sat, 28 Oct 2006 10:38:23 -0700
Subject: [PATCH] mm: clean up pagecache allocation

- Consolidate page_cache_alloc

- Fix splice: only the pagecache pages and filesystem data need to use
  mapping_gfp_mask.

- Fix grab_cache_page_nowait: same as splice, also honour NUMA placement.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/splice.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/splice.c b/fs/splice.c
index 49fb9f129938..8d705954d294 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -74,7 +74,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
 		wait_on_page_writeback(page);
 
 		if (PagePrivate(page))
-			try_to_release_page(page, mapping_gfp_mask(mapping));
+			try_to_release_page(page, GFP_KERNEL);
 
 		/*
 		 * If we succeeded in removing the mapping, set LRU flag
@@ -333,7 +333,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 				break;
 
 			error = add_to_page_cache_lru(page, mapping, index,
-					      mapping_gfp_mask(mapping));
+					      GFP_KERNEL);
 			if (unlikely(error)) {
 				page_cache_release(page);
 				if (error == -EEXIST)
@@ -557,7 +557,6 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 {
 	struct file *file = sd->file;
 	struct address_space *mapping = file->f_mapping;
-	gfp_t gfp_mask = mapping_gfp_mask(mapping);
 	unsigned int offset, this_len;
 	struct page *page;
 	pgoff_t index;
@@ -591,7 +590,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 			goto find_page;
 
 		page = buf->page;
-		if (add_to_page_cache(page, mapping, index, gfp_mask)) {
+		if (add_to_page_cache(page, mapping, index, GFP_KERNEL)) {
 			unlock_page(page);
 			goto find_page;
 		}
@@ -613,7 +612,7 @@ find_page:
 			 * This will also lock the page
 			 */
 			ret = add_to_page_cache_lru(page, mapping, index,
-						    gfp_mask);
+						    GFP_KERNEL);
 			if (unlikely(ret))
 				goto out;
 		}
-- 
cgit v1.2.3


From 1939e49a0cb9d73785857bf312f4f65661b4b513 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Sat, 28 Oct 2006 10:38:26 -0700
Subject: [PATCH] ext4: fix printk format warnings

fs/ext4/resize.c:72: warning: long long unsigned int format, __u64 arg (arg 4)
fs/ext4/resize.c:76: warning: long long unsigned int format, __u64 arg (arg 4)
fs/ext4/resize.c:81: warning: long long unsigned int format, __u64 arg (arg 4)
fs/ext4/resize.c:85: warning: long long unsigned int format, __u64 arg (arg 4)
fs/ext4/resize.c:89: warning: long long unsigned int format, __u64 arg (arg 4)
fs/ext4/resize.c:89: warning: long long unsigned int format, __u64 arg (arg 5)
fs/ext4/resize.c:93: warning: long long unsigned int format, __u64 arg (arg 4)
fs/ext4/resize.c:93: warning: long long unsigned int format, __u64 arg (arg 5)
fs/ext4/resize.c:98: warning: long long unsigned int format, __u64 arg (arg 4)
fs/ext4/resize.c:103: warning: long long unsigned int format, __u64 arg (arg 4)
fs/ext4/resize.c:109: warning: long long unsigned int format, __u64 arg (arg 4)

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/resize.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 1e9578052cd3..4fe49c3661b2 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -69,44 +69,49 @@ static int verify_group_input(struct super_block *sb,
 	else if (outside(input->block_bitmap, start, end))
 		ext4_warning(sb, __FUNCTION__,
 			     "Block bitmap not in group (block %llu)",
-			     input->block_bitmap);
+			     (unsigned long long)input->block_bitmap);
 	else if (outside(input->inode_bitmap, start, end))
 		ext4_warning(sb, __FUNCTION__,
 			     "Inode bitmap not in group (block %llu)",
-			     input->inode_bitmap);
+			     (unsigned long long)input->inode_bitmap);
 	else if (outside(input->inode_table, start, end) ||
 	         outside(itend - 1, start, end))
 		ext4_warning(sb, __FUNCTION__,
 			     "Inode table not in group (blocks %llu-%llu)",
-			     input->inode_table, itend - 1);
+			     (unsigned long long)input->inode_table, itend - 1);
 	else if (input->inode_bitmap == input->block_bitmap)
 		ext4_warning(sb, __FUNCTION__,
 			     "Block bitmap same as inode bitmap (%llu)",
-			     input->block_bitmap);
+			     (unsigned long long)input->block_bitmap);
 	else if (inside(input->block_bitmap, input->inode_table, itend))
 		ext4_warning(sb, __FUNCTION__,
 			     "Block bitmap (%llu) in inode table (%llu-%llu)",
-			     input->block_bitmap, input->inode_table, itend-1);
+			     (unsigned long long)input->block_bitmap,
+			     (unsigned long long)input->inode_table, itend - 1);
 	else if (inside(input->inode_bitmap, input->inode_table, itend))
 		ext4_warning(sb, __FUNCTION__,
 			     "Inode bitmap (%llu) in inode table (%llu-%llu)",
-			     input->inode_bitmap, input->inode_table, itend-1);
+			     (unsigned long long)input->inode_bitmap,
+			     (unsigned long long)input->inode_table, itend - 1);
 	else if (inside(input->block_bitmap, start, metaend))
 		ext4_warning(sb, __FUNCTION__,
 			     "Block bitmap (%llu) in GDT table"
 			     " (%llu-%llu)",
-			     input->block_bitmap, start, metaend - 1);
+			     (unsigned long long)input->block_bitmap,
+			     start, metaend - 1);
 	else if (inside(input->inode_bitmap, start, metaend))
 		ext4_warning(sb, __FUNCTION__,
 			     "Inode bitmap (%llu) in GDT table"
 			     " (%llu-%llu)",
-			     input->inode_bitmap, start, metaend - 1);
+			     (unsigned long long)input->inode_bitmap,
+			     start, metaend - 1);
 	else if (inside(input->inode_table, start, metaend) ||
 	         inside(itend - 1, start, metaend))
 		ext4_warning(sb, __FUNCTION__,
 			     "Inode table (%llu-%llu) overlaps"
 			     "GDT table (%llu-%llu)",
-			     input->inode_table, itend - 1, start, metaend - 1);
+			     (unsigned long long)input->inode_table,
+			     itend - 1, start, metaend - 1);
 	else
 		err = 0;
 	brelse(bh);
-- 
cgit v1.2.3


From f58a74dca88d48b0669609b4957f3dd757bdc898 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sandeen.net>
Date: Sat, 28 Oct 2006 10:38:27 -0700
Subject: [PATCH] jbd: journal_dirty_data re-check for unmapped buffers

When running several fsx's and other filesystem stress tests, we found
cases where an unmapped buffer was still being sent to submit_bh by the
ext3 dirty data journaling code.

I saw this happen in two ways, both related to another thread doing a
truncate which would unmap the buffer in question.

Either we would get into journal_dirty_data with a bh which was already
unmapped (although journal_dirty_data_fn had checked for this earlier, the
state was not locked at that point), or it would get unmapped in the middle
of journal_dirty_data when we dropped locks to call sync_dirty_buffer.

By re-checking for mapped state after we've acquired the bh state lock, we
should avoid these races.  If we find a buffer which is no longer mapped,
we essentially ignore it, because journal_unmap_buffer has already decided
that this buffer can go away.

I've also added tracepoints in these two cases, and made a couple other
tracepoint changes that I found useful in debugging this.

Signed-off-by: Eric Sandeen <esandeen@redhat.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd/transaction.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index d5c63047a8b3..4f82bcd63e48 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -967,6 +967,13 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 	 */
 	jbd_lock_bh_state(bh);
 	spin_lock(&journal->j_list_lock);
+
+	/* Now that we have bh_state locked, are we really still mapped? */
+	if (!buffer_mapped(bh)) {
+		JBUFFER_TRACE(jh, "unmapped buffer, bailing out");
+		goto no_journal;
+	}
+
 	if (jh->b_transaction) {
 		JBUFFER_TRACE(jh, "has transaction");
 		if (jh->b_transaction != handle->h_transaction) {
@@ -1028,6 +1035,11 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 				sync_dirty_buffer(bh);
 				jbd_lock_bh_state(bh);
 				spin_lock(&journal->j_list_lock);
+				/* Since we dropped the lock... */
+				if (!buffer_mapped(bh)) {
+					JBUFFER_TRACE(jh, "buffer got unmapped");
+					goto no_journal;
+				}
 				/* The buffer may become locked again at any
 				   time if it is redirtied */
 			}
@@ -1824,6 +1836,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 			}
 		}
 	} else if (transaction == journal->j_committing_transaction) {
+		JBUFFER_TRACE(jh, "on committing transaction");
 		if (jh->b_jlist == BJ_Locked) {
 			/*
 			 * The buffer is on the committing transaction's locked
@@ -1838,7 +1851,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 		 * can remove it's next_transaction pointer from the
 		 * running transaction if that is set, but nothing
 		 * else. */
-		JBUFFER_TRACE(jh, "on committing transaction");
 		set_buffer_freed(bh);
 		if (jh->b_next_transaction) {
 			J_ASSERT(jh->b_next_transaction ==
@@ -1858,6 +1870,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 		 * i_size already for this truncate so recovery will not
 		 * expose the disk blocks we are discarding here.) */
 		J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
+		JBUFFER_TRACE(jh, "on running transaction");
 		may_free = __dispose_buffer(jh, transaction);
 	}
 
-- 
cgit v1.2.3


From 9b57988db9b2c81794546cb792133f0cfd064ea8 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sandeen.net>
Date: Sat, 28 Oct 2006 10:38:28 -0700
Subject: [PATCH] jbd2: journal_dirty_data re-check for unmapped buffers

When running several fsx's and other filesystem stress tests, we found
cases where an unmapped buffer was still being sent to submit_bh by the
ext3 dirty data journaling code.

I saw this happen in two ways, both related to another thread doing a
truncate which would unmap the buffer in question.

Either we would get into journal_dirty_data with a bh which was already
unmapped (although journal_dirty_data_fn had checked for this earlier, the
state was not locked at that point), or it would get unmapped in the middle
of journal_dirty_data when we dropped locks to call sync_dirty_buffer.

By re-checking for mapped state after we've acquired the bh state lock, we
should avoid these races.  If we find a buffer which is no longer mapped,
we essentially ignore it, because journal_unmap_buffer has already decided
that this buffer can go away.

I've also added tracepoints in these two cases, and made a couple other
tracepoint changes that I found useful in debugging this.

Signed-off-by: Eric Sandeen <esandeen@redhat.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd2/transaction.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index b6cf2be845a1..c051a94c8a97 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -967,6 +967,13 @@ int jbd2_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 	 */
 	jbd_lock_bh_state(bh);
 	spin_lock(&journal->j_list_lock);
+
+	/* Now that we have bh_state locked, are we really still mapped? */
+	if (!buffer_mapped(bh)) {
+		JBUFFER_TRACE(jh, "unmapped buffer, bailing out");
+		goto no_journal;
+	}
+
 	if (jh->b_transaction) {
 		JBUFFER_TRACE(jh, "has transaction");
 		if (jh->b_transaction != handle->h_transaction) {
@@ -1028,6 +1035,11 @@ int jbd2_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 				sync_dirty_buffer(bh);
 				jbd_lock_bh_state(bh);
 				spin_lock(&journal->j_list_lock);
+				/* Since we dropped the lock... */
+				if (!buffer_mapped(bh)) {
+					JBUFFER_TRACE(jh, "buffer got unmapped");
+					goto no_journal;
+				}
 				/* The buffer may become locked again at any
 				   time if it is redirtied */
 			}
@@ -1824,6 +1836,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 			}
 		}
 	} else if (transaction == journal->j_committing_transaction) {
+		JBUFFER_TRACE(jh, "on committing transaction");
 		if (jh->b_jlist == BJ_Locked) {
 			/*
 			 * The buffer is on the committing transaction's locked
@@ -1838,7 +1851,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 		 * can remove it's next_transaction pointer from the
 		 * running transaction if that is set, but nothing
 		 * else. */
-		JBUFFER_TRACE(jh, "on committing transaction");
 		set_buffer_freed(bh);
 		if (jh->b_next_transaction) {
 			J_ASSERT(jh->b_next_transaction ==
@@ -1858,6 +1870,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 		 * i_size already for this truncate so recovery will not
 		 * expose the disk blocks we are discarding here.) */
 		J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
+		JBUFFER_TRACE(jh, "on running transaction");
 		may_free = __dispose_buffer(jh, transaction);
 	}
 
-- 
cgit v1.2.3


From 6a2aae06cc1e87e9712a26a639f6a2f3442e2027 Mon Sep 17 00:00:00 2001
From: Pavel Emelianov <xemul@openvz.org>
Date: Sat, 28 Oct 2006 10:38:33 -0700
Subject: [PATCH] Fix potential OOPs in blkdev_open()

blkdev_open() calls bc_acquire() to get a struct block_device.  Since
bc_acquire() may return NULL when system is out of memory an appropriate
check is required.

Signed-off-by: Pavel Emelianov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index bc8f27cc4483..702b88cbd91d 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1131,6 +1131,8 @@ static int blkdev_open(struct inode * inode, struct file * filp)
 	filp->f_flags |= O_LARGEFILE;
 
 	bdev = bd_acquire(inode);
+	if (bdev == NULL)
+		return -ENOMEM;
 
 	res = do_open(bdev, filp, BD_MUTEX_NORMAL);
 	if (res)
-- 
cgit v1.2.3


From b9d7e6ae82da124dc9c579fe1061264ef2a69407 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Sat, 28 Oct 2006 10:38:41 -0700
Subject: [PATCH] hugetlb: fix size=4G parsing

On 32-bit machines, mount -t hugetlbfs -o size=4G gave a 0GB filesystem,
size=5G gave a 1GB filesystem etc: there's no point in masking size with
HPAGE_MASK just before shifting its lower bits away, and since HPAGE_MASK is a
UL, that removed all the higher bits of the unsigned long long size.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hugetlbfs/inode.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 4ee3f006b861..0b23b963bb44 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -624,7 +624,6 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
 				do_div(size, 100);
 				rest++;
 			}
-			size &= HPAGE_MASK;
 			pconfig->nr_blocks = (size >> HPAGE_SHIFT);
 			value = rest;
 		} else if (!strcmp(opt,"nr_inodes")) {
-- 
cgit v1.2.3


From 856fc29505556cf263f3dcda2533cf3766c14ab6 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Sat, 28 Oct 2006 10:38:43 -0700
Subject: [PATCH] hugetlb: fix prio_tree unit

hugetlb_vmtruncate_list was misconverted to prio_tree: its prio_tree is in
units of PAGE_SIZE (PAGE_CACHE_SIZE) like any other, not HPAGE_SIZE (whereas
its radix_tree is kept in units of HPAGE_SIZE, otherwise slots would be
absurdly sparse).

At first I thought the error benign, just calling __unmap_hugepage_range on
more vmas than necessary; but on 32-bit machines, when the prio_tree is
searched correctly, it happens to ensure the v_offset calculation won't
overflow.  As it stood, when truncating at or beyond 4GB, it was liable to
discard pages COWed from lower offsets; or even to clear pmd entries of
preceding vmas, triggering exit_mmap's BUG_ON(nr_ptes).

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hugetlbfs/inode.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 0b23b963bb44..0bea6a619e10 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -271,26 +271,24 @@ static void hugetlbfs_drop_inode(struct inode *inode)
 		hugetlbfs_forget_inode(inode);
 }
 
-/*
- * h_pgoff is in HPAGE_SIZE units.
- * vma->vm_pgoff is in PAGE_SIZE units.
- */
 static inline void
-hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff)
+hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff)
 {
 	struct vm_area_struct *vma;
 	struct prio_tree_iter iter;
 
-	vma_prio_tree_foreach(vma, &iter, root, h_pgoff, ULONG_MAX) {
-		unsigned long h_vm_pgoff;
+	vma_prio_tree_foreach(vma, &iter, root, pgoff, ULONG_MAX) {
 		unsigned long v_offset;
 
-		h_vm_pgoff = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT);
-		v_offset = (h_pgoff - h_vm_pgoff) << HPAGE_SHIFT;
 		/*
-		 * Is this VMA fully outside the truncation point?
+		 * Can the expression below overflow on 32-bit arches?
+		 * No, because the prio_tree returns us only those vmas
+		 * which overlap the truncated area starting at pgoff,
+		 * and no vma on a 32-bit arch can span beyond the 4GB.
 		 */
-		if (h_vm_pgoff >= h_pgoff)
+		if (vma->vm_pgoff < pgoff)
+			v_offset = (pgoff - vma->vm_pgoff) << PAGE_SHIFT;
+		else
 			v_offset = 0;
 
 		__unmap_hugepage_range(vma,
@@ -303,14 +301,14 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff)
  */
 static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 {
-	unsigned long pgoff;
+	pgoff_t pgoff;
 	struct address_space *mapping = inode->i_mapping;
 
 	if (offset > inode->i_size)
 		return -EINVAL;
 
 	BUG_ON(offset & ~HPAGE_MASK);
-	pgoff = offset >> HPAGE_SHIFT;
+	pgoff = offset >> PAGE_SHIFT;
 
 	inode->i_size = offset;
 	spin_lock(&mapping->i_mmap_lock);
-- 
cgit v1.2.3


From 6eac3f93f5e6b7256fb20b7608d62ec192da12be Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@sw.ru>
Date: Sat, 28 Oct 2006 10:38:44 -0700
Subject: [PATCH] missing unused dentry in prune_dcache()?

On the the following patch:
http://linux.bkbits.net:8080/linux-2.6/gnupatch@449b144ecSF1rYskg3q-SeR2vf88zg

# ChangeSet
#   2006/06/22 15:05:57-07:00 neilb@suse.de
#   [PATCH] Fix dcache race during umount

#   If prune_dcache finds a dentry that it cannot free, it leaves it where it
#   is (at the tail of the list) and exits, on the assumption that some other
#   thread will be removing that dentry soon.

However as far as I see this comment is not correct: when we cannot take
s_umount rw_semaphore (for example because it was taken in do_remount) this
dentry is already extracted from dentry_unused list and we do not add it
into the list again.  Therefore dentry will not be found by prune_dcache()
and shrink_dcache_sb() and will leave in memory very long time until the
partition will be unmounted.

The patch adds this dentry into tail of the dentry_unused list.

Signed-off-by: Vasily Averin <vvs@sw.ru>
Cc: Neil Brown <neilb@suse.de>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dcache.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index a1ff91eef108..a5b76b647c6d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -478,11 +478,12 @@ static void prune_dcache(int count, struct super_block *sb)
 			up_read(s_umount);
 		}
 		spin_unlock(&dentry->d_lock);
-		/* Cannot remove the first dentry, and it isn't appropriate
-		 * to move it to the head of the list, so give up, and try
-		 * later
+		/*
+		 * Insert dentry at the head of the list as inserting at the
+		 * tail leads to a cycle.
 		 */
-		break;
+ 		list_add(&dentry->d_lru, &dentry_unused);
+		dentry_stat.nr_unused++;
 	}
 	spin_unlock(&dcache_lock);
 }
-- 
cgit v1.2.3


From f87135762de4328c6f17897e803e6909bc056feb Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 28 Oct 2006 10:38:46 -0700
Subject: [PATCH] VFS: Fix an error in unused dentry counting

With Vasily Averin <vvs@sw.ru>

Fix an error in unused dentry counting in shrink_dcache_for_umount_subtree()
in which the count is modified without the dcache_lock held.

Signed-off-by: David Howells <dhowells@redhat.com>
Cc: Vasily Averin <vvs@sw.ru>
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dcache.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index a5b76b647c6d..fd4a428998ef 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -557,6 +557,7 @@ repeat:
 static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 {
 	struct dentry *parent;
+	unsigned detached = 0;
 
 	BUG_ON(!IS_ROOT(dentry));
 
@@ -621,7 +622,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 				atomic_dec(&parent->d_count);
 
 			list_del(&dentry->d_u.d_child);
-			dentry_stat.nr_dentry--;	/* For d_free, below */
+			detached++;
 
 			inode = dentry->d_inode;
 			if (inode) {
@@ -639,7 +640,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 			 * otherwise we ascend to the parent and move to the
 			 * next sibling if there is one */
 			if (!parent)
-				return;
+				goto out;
 
 			dentry = parent;
 
@@ -648,6 +649,11 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 		dentry = list_entry(dentry->d_subdirs.next,
 				    struct dentry, d_u.d_child);
 	}
+out:
+	/* several dentries were freed, need to correct nr_dentry */
+	spin_lock(&dcache_lock);
+	dentry_stat.nr_dentry -= detached;
+	spin_unlock(&dcache_lock);
 }
 
 /*
-- 
cgit v1.2.3


From 89f68225876db7df638de2884b561facb1870239 Mon Sep 17 00:00:00 2001
From: Daniel Drake <ddrake@brontes3d.com>
Date: Mon, 30 Oct 2006 11:47:02 -0600
Subject: jfs: Add splice support

This allows the splice() and tee() syscalls to be used with JFS.

Signed-off-by: Daniel Drake <ddrake@brontes3d.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/file.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 34181b8f5a0a..aa9132d04920 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -109,6 +109,8 @@ const struct file_operations jfs_file_operations = {
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
 	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
+	.splice_write	= generic_file_splice_write,
 	.fsync		= jfs_fsync,
 	.release	= jfs_release,
 	.ioctl		= jfs_ioctl,
-- 
cgit v1.2.3


From 7ca85ba752e521f1b5ead1f3b91c562cc3910c7b Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Mon, 30 Oct 2006 21:42:57 +0000
Subject: [CIFS] Fix readdir breakage when blocksize set too small

Do not treat filldir running out of space as an error that needs
to be returned.

Fixes Redhat bugzilla bug # 211070

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/readdir.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index b5b0a2a41bef..ed18c3965f7b 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -896,6 +896,10 @@ static int cifs_filldir(char *pfindEntry, struct file *file,
 		     tmp_inode->i_ino,obj_type);
 	if(rc) {
 		cFYI(1,("filldir rc = %d",rc));
+		/* we can not return filldir errors to the caller
+		since they are "normal" when the stat blocksize
+		is too small - we return remapped error instead */
+		rc = -EOVERFLOW;
 	}
 
 	dput(tmp_dentry);
@@ -1074,6 +1078,11 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
 			we want to check for that here? */
 			rc = cifs_filldir(current_entry, file,
 					filldir, direntry, tmp_buf, max_len);
+			if(rc == -EOVERFLOW) {
+				rc = 0;
+				break;
+			}
+
 			file->f_pos++;
 			if(file->f_pos == 
 				cifsFile->srch_inf.index_of_last_entry) {
-- 
cgit v1.2.3


From 4b952a9b0877dbe8f0f69b2747abe79e3bbd2865 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Mon, 30 Oct 2006 21:46:13 +0000
Subject: [CIFS] Allow null user connections

Some servers are configured to only allow null user mounts for
guest access.  Allow nul user (anonymous) mounts e.g.
	mount -t cifs //server/share /mnt -o username=

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/CHANGES   | 3 +++
 fs/cifs/connect.c | 7 ++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 1eb9a2ec0a3b..50afab81a59b 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,6 +1,9 @@
 Version 1.46
 ------------
 Support deep tree mounts.  Better support OS/2, Win9x (DOS) time stamps.
+Allow null user to be specified on mount ("username="). Do not return
+EINVAL on readdir when filldir fails due to overwritten blocksize
+(fixes FC problem)
 
 Version 1.45
 ------------
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 4093d5332930..71f77914ce93 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -822,10 +822,13 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
 		} else if (strnicmp(data, "nouser_xattr",12) == 0) {
 			vol->no_xattr = 1;
 		} else if (strnicmp(data, "user", 4) == 0) {
-			if (!value || !*value) {
+			if (!value) {
 				printk(KERN_WARNING
 				       "CIFS: invalid or missing username\n");
 				return 1;	/* needs_arg; */
+			} else if(!*value) {
+				/* null user, ie anonymous, authentication */
+				vol->nullauth = 1;
 			}
 			if (strnlen(value, 200) < 200) {
 				vol->username = value;
@@ -1642,6 +1645,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 		/* BB fixme parse for domain name here */
 		cFYI(1, ("Username: %s ", volume_info.username));
 
+	} else if (volume_info.nullauth) {
+		cFYI(1,("null user"));
 	} else {
 		cifserror("No username specified");
         /* In userspace mount helper we can get user name from alternate
-- 
cgit v1.2.3


From bcb55165d3d1ae3ec95807d118fd6d5956cd127b Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Mon, 30 Oct 2006 16:23:45 -0500
Subject: [PATCH] fix bd_claim_by_kobject error handling

This fixes bd_claim_by_kobject to release bdev correctly in case that
bd_claim succeeds but following add_bd_holder fails.

Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 702b88cbd91d..b54b0a1b7c68 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -751,8 +751,11 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
 
 	mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION);
 	res = bd_claim(bdev, holder);
-	if (res == 0)
+	if (res == 0) {
 		res = add_bd_holder(bdev, bo);
+		if (res)
+			bd_release(bdev);
+	}
 	if (res)
 		free_bd_holder(bo);
 	mutex_unlock(&bdev->bd_mutex);
-- 
cgit v1.2.3


From df6c0cd9a872ebf2298f5d66d8c789f62dbe35fc Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Mon, 30 Oct 2006 16:23:56 -0500
Subject: [PATCH] clean up add_bd_holder()

add_bd_holder() is called from bd_claim_by_kobject to put a given struct
bd_holder in the list if there is no matching entry.

There are 3 possible results of add_bd_holder():
  1. there is no matching entry and add the given one to the list
  2. there is matching entry, so just increment reference count of
     the existing one
  3. something failed during its course

1 and 2 are successful cases.  But for case 2, someone has to free the
unused struct bd_holder.

The current code frees it inside of add_bd_holder and returns same value
0 for both cases 1 and 2.  However, it's natural and less error-prone if
caller frees it since it's allocated by the caller.

Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c | 53 +++++++++++++++++++++++++++++++++++------------------
 1 file changed, 35 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index b54b0a1b7c68..aaa8301f43f1 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -641,17 +641,39 @@ static void free_bd_holder(struct bd_holder *bo)
 	kfree(bo);
 }
 
+/**
+ * find_bd_holder - find matching struct bd_holder from the block device
+ *
+ * @bdev:	struct block device to be searched
+ * @bo:		target struct bd_holder
+ *
+ * Returns matching entry with @bo in @bdev->bd_holder_list.
+ * If found, increment the reference count and return the pointer.
+ * If not found, returns NULL.
+ */
+static int find_bd_holder(struct block_device *bdev, struct bd_holder *bo)
+{
+	struct bd_holder *tmp;
+
+	list_for_each_entry(tmp, &bdev->bd_holder_list, list)
+		if (tmp->sdir == bo->sdir) {
+			tmp->count++;
+			return tmp;
+		}
+
+	return NULL;
+}
+
 /**
  * add_bd_holder - create sysfs symlinks for bd_claim() relationship
  *
  * @bdev:	block device to be bd_claimed
  * @bo:		preallocated and initialized by alloc_bd_holder()
  *
- * If there is no matching entry with @bo in @bdev->bd_holder_list,
- * add @bo to the list, create symlinks.
+ * Add @bo to @bdev->bd_holder_list, create symlinks.
  *
- * Returns 0 if symlinks are created or already there.
- * Returns -ve if something fails and @bo can be freed.
+ * Returns 0 if symlinks are created.
+ * Returns -ve if something fails.
  */
 static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo)
 {
@@ -661,15 +683,6 @@ static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo)
 	if (!bo)
 		return -EINVAL;
 
-	list_for_each_entry(tmp, &bdev->bd_holder_list, list) {
-		if (tmp->sdir == bo->sdir) {
-			tmp->count++;
-			/* We've already done what we need to do here. */
-			free_bd_holder(bo);
-			return 0;
-		}
-	}
-
 	if (!bd_holder_grab_dirs(bdev, bo))
 		return -EBUSY;
 
@@ -740,7 +753,7 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
 				struct kobject *kobj)
 {
 	int res;
-	struct bd_holder *bo;
+	struct bd_holder *bo, *found;
 
 	if (!kobj)
 		return -EINVAL;
@@ -752,11 +765,15 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
 	mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION);
 	res = bd_claim(bdev, holder);
 	if (res == 0) {
-		res = add_bd_holder(bdev, bo);
-		if (res)
-			bd_release(bdev);
+		found = find_bd_holder(bdev, bo);
+		if (found == NULL) {
+			res = add_bd_holder(bdev, bo);
+			if (res)
+				bd_release(bdev);
+		}
 	}
-	if (res)
+
+	if (res || found)
 		free_bd_holder(bo);
 	mutex_unlock(&bdev->bd_mutex);
 
-- 
cgit v1.2.3


From 36a561d6a95c4b89ae4845bf91456b4f784b6eec Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 30 Oct 2006 22:07:03 -0800
Subject: [PATCH] find_bd_holder() fix

fs/block_dev.c: In function 'find_bd_holder':
fs/block_dev.c:666: warning: return makes integer from pointer without a cast
fs/block_dev.c:669: warning: return makes integer from pointer without a cast
fs/block_dev.c: In function 'add_bd_holder':
fs/block_dev.c:685: warning: unused variable 'tmp'
fs/block_dev.c: In function 'bd_claim_by_kobject':
fs/block_dev.c:773: warning: assignment makes pointer from integer without a cast

Acked-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index aaa8301f43f1..36c0e7af9d0f 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -651,7 +651,8 @@ static void free_bd_holder(struct bd_holder *bo)
  * If found, increment the reference count and return the pointer.
  * If not found, returns NULL.
  */
-static int find_bd_holder(struct block_device *bdev, struct bd_holder *bo)
+static struct bd_holder *find_bd_holder(struct block_device *bdev,
+					struct bd_holder *bo)
 {
 	struct bd_holder *tmp;
 
@@ -677,7 +678,6 @@ static int find_bd_holder(struct block_device *bdev, struct bd_holder *bo)
  */
 static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo)
 {
-	struct bd_holder *tmp;
 	int ret;
 
 	if (!bo)
-- 
cgit v1.2.3


From e5d9cbde6ce0001e49994df5fcdcbeff8be8037b Mon Sep 17 00:00:00 2001
From: Michael Halcrow <mhalcrow@us.ibm.com>
Date: Mon, 30 Oct 2006 22:07:16 -0800
Subject: [PATCH] eCryptfs: Clean up crypto initialization

Clean up the crypto initialization code; let the crypto API take care of the
key size checks.

Signed-off-by: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/crypto.c          | 66 ++++++++-----------------------------------
 fs/ecryptfs/ecryptfs_kernel.h |  4 +--
 fs/ecryptfs/keystore.c        | 19 ++++++-------
 fs/ecryptfs/main.c            | 13 ++-------
 4 files changed, 24 insertions(+), 78 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index ed35a9712fa1..82e7d02cefae 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1573,35 +1573,26 @@ out:
 
 /**
  * ecryptfs_process_cipher - Perform cipher initialization.
- * @tfm: Crypto context set by this function
  * @key_tfm: Crypto context for key material, set by this function
- * @cipher_name: Name of the cipher.
- * @key_size: Size of the key in bytes.
+ * @cipher_name: Name of the cipher
+ * @key_size: Size of the key in bytes
  *
  * Returns zero on success. Any crypto_tfm structs allocated here
  * should be released by other functions, such as on a superblock put
  * event, regardless of whether this function succeeds for fails.
  */
 int
-ecryptfs_process_cipher(struct crypto_tfm **tfm, struct crypto_tfm **key_tfm,
-			char *cipher_name, size_t key_size)
+ecryptfs_process_cipher(struct crypto_tfm **key_tfm, char *cipher_name,
+			size_t *key_size)
 {
 	char dummy_key[ECRYPTFS_MAX_KEY_BYTES];
 	int rc;
 
-	*tfm = *key_tfm = NULL;
-	if (key_size > ECRYPTFS_MAX_KEY_BYTES) {
+	*key_tfm = NULL;
+	if (*key_size > ECRYPTFS_MAX_KEY_BYTES) {
 		rc = -EINVAL;
 		printk(KERN_ERR "Requested key size is [%Zd] bytes; maximum "
-		       "allowable is [%d]\n", key_size, ECRYPTFS_MAX_KEY_BYTES);
-		goto out;
-	}
-	*tfm = crypto_alloc_tfm(cipher_name, (ECRYPTFS_DEFAULT_CHAINING_MODE
-					      | CRYPTO_TFM_REQ_WEAK_KEY));
-	if (!(*tfm)) {
-		rc = -EINVAL;
-		printk(KERN_ERR "Unable to allocate crypto cipher with name "
-		       "[%s]\n", cipher_name);
+		      "allowable is [%d]\n", *key_size, ECRYPTFS_MAX_KEY_BYTES);
 		goto out;
 	}
 	*key_tfm = crypto_alloc_tfm(cipher_name, CRYPTO_TFM_REQ_WEAK_KEY);
@@ -1611,46 +1602,13 @@ ecryptfs_process_cipher(struct crypto_tfm **tfm, struct crypto_tfm **key_tfm,
 		       "[%s]\n", cipher_name);
 		goto out;
 	}
-	if (key_size < crypto_tfm_alg_min_keysize(*tfm)) {
-		rc = -EINVAL;
-		printk(KERN_ERR "Request key size is [%Zd]; minimum key size "
-		       "supported by cipher [%s] is [%d]\n", key_size,
-		       cipher_name, crypto_tfm_alg_min_keysize(*tfm));
-		goto out;
-	}
-	if (key_size < crypto_tfm_alg_min_keysize(*key_tfm)) {
-		rc = -EINVAL;
-		printk(KERN_ERR "Request key size is [%Zd]; minimum key size "
-		       "supported by cipher [%s] is [%d]\n", key_size,
-		       cipher_name, crypto_tfm_alg_min_keysize(*key_tfm));
-		goto out;
-	}
-	if (key_size > crypto_tfm_alg_max_keysize(*tfm)) {
-		rc = -EINVAL;
-		printk(KERN_ERR "Request key size is [%Zd]; maximum key size "
-		       "supported by cipher [%s] is [%d]\n", key_size,
-		       cipher_name, crypto_tfm_alg_min_keysize(*tfm));
-		goto out;
-	}
-	if (key_size > crypto_tfm_alg_max_keysize(*key_tfm)) {
-		rc = -EINVAL;
-		printk(KERN_ERR "Request key size is [%Zd]; maximum key size "
-		       "supported by cipher [%s] is [%d]\n", key_size,
-		       cipher_name, crypto_tfm_alg_min_keysize(*key_tfm));
-		goto out;
-	}
-	get_random_bytes(dummy_key, key_size);
-	rc = crypto_cipher_setkey(*tfm, dummy_key, key_size);
-	if (rc) {
-		printk(KERN_ERR "Error attempting to set key of size [%Zd] for "
-		       "cipher [%s]; rc = [%d]\n", key_size, cipher_name, rc);
-		rc = -EINVAL;
-		goto out;
-	}
-	rc = crypto_cipher_setkey(*key_tfm, dummy_key, key_size);
+	if (*key_size == 0)
+		*key_size = crypto_tfm_alg_max_keysize(*key_tfm);
+	get_random_bytes(dummy_key, *key_size);
+	rc = crypto_cipher_setkey(*key_tfm, dummy_key, *key_size);
 	if (rc) {
 		printk(KERN_ERR "Error attempting to set key of size [%Zd] for "
-		       "cipher [%s]; rc = [%d]\n", key_size, cipher_name, rc);
+		       "cipher [%s]; rc = [%d]\n", *key_size, cipher_name, rc);
 		rc = -EINVAL;
 		goto out;
 	}
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 872c9958531a..4112df9dec50 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -473,8 +473,8 @@ ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
 			  unsigned char *src, struct dentry *ecryptfs_dentry);
 int ecryptfs_truncate(struct dentry *dentry, loff_t new_length);
 int
-ecryptfs_process_cipher(struct crypto_tfm **tfm, struct crypto_tfm **key_tfm,
-			char *cipher_name, size_t key_size);
+ecryptfs_process_cipher(struct crypto_tfm **key_tfm, char *cipher_name,
+			size_t *key_size);
 int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode);
 int ecryptfs_inode_set(struct inode *inode, void *lower_inode);
 void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode);
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index ba454785a0c5..bc706d33559a 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -493,19 +493,16 @@ static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
 			goto out;
 		}
 	}
-	if (password_s_ptr->session_key_encryption_key_bytes
-	    < crypto_tfm_alg_min_keysize(tfm)) {
-		printk(KERN_WARNING "Session key encryption key is [%d] bytes; "
-		       "minimum keysize for selected cipher is [%d] bytes.\n",
-		       password_s_ptr->session_key_encryption_key_bytes,
-		       crypto_tfm_alg_min_keysize(tfm));
-		rc = -EINVAL;
-		goto out;
-	}
 	if (tfm_mutex)
 		mutex_lock(tfm_mutex);
-	crypto_cipher_setkey(tfm, password_s_ptr->session_key_encryption_key,
-			     crypt_stat->key_size);
+	rc = crypto_cipher_setkey(tfm,
+				  password_s_ptr->session_key_encryption_key,
+				  crypt_stat->key_size);
+	if (rc < 0) {
+		printk(KERN_ERR "Error setting key for crypto context\n");
+		rc = -EINVAL;
+		goto out_free_tfm;
+	}
 	/* TODO: virt_to_scatterlist */
 	encrypted_session_key = (char *)__get_free_page(GFP_KERNEL);
 	if (!encrypted_session_key) {
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 5938a232d11b..a65f4865182c 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -208,7 +208,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
 	char *cipher_name_dst;
 	char *cipher_name_src;
 	char *cipher_key_bytes_src;
-	struct crypto_tfm *tmp_tfm;
 	int cipher_name_len;
 
 	if (!options) {
@@ -305,20 +304,12 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
 		    = '\0';
 	}
 	if (!cipher_key_bytes_set) {
-		mount_crypt_stat->global_default_cipher_key_size =
-			ECRYPTFS_DEFAULT_KEY_BYTES;
-		ecryptfs_printk(KERN_DEBUG, "Cipher key size was not "
-				"specified.  Defaulting to [%d]\n",
-				mount_crypt_stat->
-				global_default_cipher_key_size);
+		mount_crypt_stat->global_default_cipher_key_size = 0;
 	}
 	rc = ecryptfs_process_cipher(
-		&tmp_tfm,
 		&mount_crypt_stat->global_key_tfm,
 		mount_crypt_stat->global_default_cipher_name,
-		mount_crypt_stat->global_default_cipher_key_size);
-	if (tmp_tfm)
-		crypto_free_tfm(tmp_tfm);
+		&mount_crypt_stat->global_default_cipher_key_size);
 	if (rc) {
 		printk(KERN_ERR "Error attempting to initialize cipher [%s] "
 		       "with key size [%Zd] bytes; rc = [%d]\n",
-- 
cgit v1.2.3


From 565d9724b8ce49b530287de34aa17f45f21624d5 Mon Sep 17 00:00:00 2001
From: Michael Halcrow <mhalcrow@us.ibm.com>
Date: Mon, 30 Oct 2006 22:07:17 -0800
Subject: [PATCH] eCryptfs: Hash code to new crypto API

Update eCryptfs hash code to the new kernel crypto API.

Signed-off-by: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/crypto.c          | 36 +++++++++++++++++++++---------------
 fs/ecryptfs/ecryptfs_kernel.h |  7 ++++---
 2 files changed, 25 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 82e7d02cefae..f14c5a38215e 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -94,25 +94,31 @@ static int ecryptfs_calculate_md5(char *dst,
 				  struct ecryptfs_crypt_stat *crypt_stat,
 				  char *src, int len)
 {
-	int rc = 0;
 	struct scatterlist sg;
+	struct hash_desc desc = {
+		.tfm = crypt_stat->hash_tfm,
+		.flags = CRYPTO_TFM_REQ_MAY_SLEEP
+	};
+	int rc = 0;
 
-	mutex_lock(&crypt_stat->cs_md5_tfm_mutex);
+	mutex_lock(&crypt_stat->cs_hash_tfm_mutex);
 	sg_init_one(&sg, (u8 *)src, len);
-	if (!crypt_stat->md5_tfm) {
-		crypt_stat->md5_tfm =
-			crypto_alloc_tfm("md5", CRYPTO_TFM_REQ_MAY_SLEEP);
-		if (!crypt_stat->md5_tfm) {
-			rc = -ENOMEM;
+	if (!desc.tfm) {
+		desc.tfm = crypto_alloc_hash(ECRYPTFS_DEFAULT_HASH, 0,
+					     CRYPTO_ALG_ASYNC);
+		if (IS_ERR(desc.tfm)) {
+			rc = PTR_ERR(desc.tfm);
 			ecryptfs_printk(KERN_ERR, "Error attempting to "
-					"allocate crypto context\n");
+					"allocate crypto context; rc = [%d]\n",
+					rc);
 			goto out;
 		}
+		crypt_stat->hash_tfm = desc.tfm;
 	}
-	crypto_digest_init(crypt_stat->md5_tfm);
-	crypto_digest_update(crypt_stat->md5_tfm, &sg, 1);
-	crypto_digest_final(crypt_stat->md5_tfm, dst);
-	mutex_unlock(&crypt_stat->cs_md5_tfm_mutex);
+	crypto_hash_init(&desc);
+	crypto_hash_update(&desc, &sg, len);
+	crypto_hash_final(&desc, dst);
+	mutex_unlock(&crypt_stat->cs_hash_tfm_mutex);
 out:
 	return rc;
 }
@@ -178,7 +184,7 @@ ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
 	memset((void *)crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat));
 	mutex_init(&crypt_stat->cs_mutex);
 	mutex_init(&crypt_stat->cs_tfm_mutex);
-	mutex_init(&crypt_stat->cs_md5_tfm_mutex);
+	mutex_init(&crypt_stat->cs_hash_tfm_mutex);
 	ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_STRUCT_INITIALIZED);
 }
 
@@ -192,8 +198,8 @@ void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
 {
 	if (crypt_stat->tfm)
 		crypto_free_tfm(crypt_stat->tfm);
-	if (crypt_stat->md5_tfm)
-		crypto_free_tfm(crypt_stat->md5_tfm);
+	if (crypt_stat->hash_tfm)
+		crypto_free_hash(crypt_stat->hash_tfm);
 	memset(crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat));
 }
 
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 4112df9dec50..840aa010e0d3 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -175,6 +175,7 @@ ecryptfs_get_key_payload_data(struct key *key)
 #define ECRYPTFS_DEFAULT_CIPHER "aes"
 #define ECRYPTFS_DEFAULT_KEY_BYTES 16
 #define ECRYPTFS_DEFAULT_CHAINING_MODE CRYPTO_TFM_MODE_CBC
+#define ECRYPTFS_DEFAULT_HASH "md5"
 #define ECRYPTFS_TAG_3_PACKET_TYPE 0x8C
 #define ECRYPTFS_TAG_11_PACKET_TYPE 0xED
 #define MD5_DIGEST_SIZE 16
@@ -205,14 +206,14 @@ struct ecryptfs_crypt_stat {
 	unsigned int extent_mask;
 	struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
 	struct crypto_tfm *tfm;
-	struct crypto_tfm *md5_tfm; /* Crypto context for generating
-				     * the initialization vectors */
+	struct crypto_hash *hash_tfm; /* Crypto context for generating
+				       * the initialization vectors */
 	unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE];
 	unsigned char key[ECRYPTFS_MAX_KEY_BYTES];
 	unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES];
 	unsigned char keysigs[ECRYPTFS_MAX_NUM_KEYSIGS][ECRYPTFS_SIG_SIZE_HEX];
 	struct mutex cs_tfm_mutex;
-	struct mutex cs_md5_tfm_mutex;
+	struct mutex cs_hash_tfm_mutex;
 	struct mutex cs_mutex;
 };
 
-- 
cgit v1.2.3


From 8bba066f4e3854755a303cee37ea37bd080a46b3 Mon Sep 17 00:00:00 2001
From: Michael Halcrow <mhalcrow@us.ibm.com>
Date: Mon, 30 Oct 2006 22:07:18 -0800
Subject: [PATCH] eCryptfs: Cipher code to new crypto API

Update cipher block encryption code to the new crypto API.

Signed-off-by: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/crypto.c          |  92 +++++++++++++++++++++++++++++---------
 fs/ecryptfs/ecryptfs_kernel.h |   9 ++--
 fs/ecryptfs/keystore.c        | 101 ++++++++++++++++++++++++++++--------------
 fs/ecryptfs/main.c            |   2 +
 4 files changed, 146 insertions(+), 58 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index f14c5a38215e..2a1b6aa1a4a1 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -123,6 +123,28 @@ out:
 	return rc;
 }
 
+int ecryptfs_crypto_api_algify_cipher_name(char **algified_name,
+					   char *cipher_name,
+					   char *chaining_modifier)
+{
+	int cipher_name_len = strlen(cipher_name);
+	int chaining_modifier_len = strlen(chaining_modifier);
+	int algified_name_len;
+	int rc;
+
+	algified_name_len = (chaining_modifier_len + cipher_name_len + 3);
+	(*algified_name) = kmalloc(algified_name_len, GFP_KERNEL);
+	if (!(algified_name)) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	snprintf((*algified_name), algified_name_len, "%s(%s)",
+		 chaining_modifier, cipher_name);
+	rc = 0;
+out:
+	return rc;
+}
+
 /**
  * ecryptfs_derive_iv
  * @iv: destination for the derived iv vale
@@ -197,7 +219,7 @@ ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
 void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
 {
 	if (crypt_stat->tfm)
-		crypto_free_tfm(crypt_stat->tfm);
+		crypto_free_blkcipher(crypt_stat->tfm);
 	if (crypt_stat->hash_tfm)
 		crypto_free_hash(crypt_stat->hash_tfm);
 	memset(crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat));
@@ -209,7 +231,7 @@ void ecryptfs_destruct_mount_crypt_stat(
 	if (mount_crypt_stat->global_auth_tok_key)
 		key_put(mount_crypt_stat->global_auth_tok_key);
 	if (mount_crypt_stat->global_key_tfm)
-		crypto_free_tfm(mount_crypt_stat->global_key_tfm);
+		crypto_free_blkcipher(mount_crypt_stat->global_key_tfm);
 	memset(mount_crypt_stat, 0, sizeof(struct ecryptfs_mount_crypt_stat));
 }
 
@@ -275,6 +297,11 @@ static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
 			       struct scatterlist *src_sg, int size,
 			       unsigned char *iv)
 {
+	struct blkcipher_desc desc = {
+		.tfm = crypt_stat->tfm,
+		.info = iv,
+		.flags = CRYPTO_TFM_REQ_MAY_SLEEP
+	};
 	int rc = 0;
 
 	BUG_ON(!crypt_stat || !crypt_stat->tfm
@@ -288,8 +315,8 @@ static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
 	}
 	/* Consider doing this once, when the file is opened */
 	mutex_lock(&crypt_stat->cs_tfm_mutex);
-	rc = crypto_cipher_setkey(crypt_stat->tfm, crypt_stat->key,
-				  crypt_stat->key_size);
+	rc = crypto_blkcipher_setkey(crypt_stat->tfm, crypt_stat->key,
+				     crypt_stat->key_size);
 	if (rc) {
 		ecryptfs_printk(KERN_ERR, "Error setting key; rc = [%d]\n",
 				rc);
@@ -298,7 +325,7 @@ static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
 		goto out;
 	}
 	ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes.\n", size);
-	crypto_cipher_encrypt_iv(crypt_stat->tfm, dest_sg, src_sg, size, iv);
+	crypto_blkcipher_encrypt_iv(&desc, dest_sg, src_sg, size);
 	mutex_unlock(&crypt_stat->cs_tfm_mutex);
 out:
 	return rc;
@@ -681,12 +708,17 @@ static int decrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
 			       struct scatterlist *src_sg, int size,
 			       unsigned char *iv)
 {
+	struct blkcipher_desc desc = {
+		.tfm = crypt_stat->tfm,
+		.info = iv,
+		.flags = CRYPTO_TFM_REQ_MAY_SLEEP
+	};
 	int rc = 0;
 
 	/* Consider doing this once, when the file is opened */
 	mutex_lock(&crypt_stat->cs_tfm_mutex);
-	rc = crypto_cipher_setkey(crypt_stat->tfm, crypt_stat->key,
-				  crypt_stat->key_size);
+	rc = crypto_blkcipher_setkey(crypt_stat->tfm, crypt_stat->key,
+				     crypt_stat->key_size);
 	if (rc) {
 		ecryptfs_printk(KERN_ERR, "Error setting key; rc = [%d]\n",
 				rc);
@@ -695,8 +727,7 @@ static int decrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
 		goto out;
 	}
 	ecryptfs_printk(KERN_DEBUG, "Decrypting [%d] bytes.\n", size);
-	rc = crypto_cipher_decrypt_iv(crypt_stat->tfm, dest_sg, src_sg, size,
-				      iv);
+	rc = crypto_blkcipher_decrypt_iv(&desc, dest_sg, src_sg, size);
 	mutex_unlock(&crypt_stat->cs_tfm_mutex);
 	if (rc) {
 		ecryptfs_printk(KERN_ERR, "Error decrypting; rc = [%d]\n",
@@ -765,6 +796,7 @@ ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat,
  */
 int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat)
 {
+	char *full_alg_name;
 	int rc = -EINVAL;
 
 	if (!crypt_stat->cipher) {
@@ -781,16 +813,24 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat)
 		goto out;
 	}
 	mutex_lock(&crypt_stat->cs_tfm_mutex);
-	crypt_stat->tfm = crypto_alloc_tfm(crypt_stat->cipher,
-					   ECRYPTFS_DEFAULT_CHAINING_MODE
-					   | CRYPTO_TFM_REQ_WEAK_KEY);
-	mutex_unlock(&crypt_stat->cs_tfm_mutex);
+	rc = ecryptfs_crypto_api_algify_cipher_name(&full_alg_name,
+						    crypt_stat->cipher, "cbc");
+	if (rc)
+		goto out;
+	crypt_stat->tfm = crypto_alloc_blkcipher(full_alg_name, 0,
+						 CRYPTO_ALG_ASYNC);
+	kfree(full_alg_name);
 	if (!crypt_stat->tfm) {
 		ecryptfs_printk(KERN_ERR, "cryptfs: init_crypt_ctx(): "
 				"Error initializing cipher [%s]\n",
 				crypt_stat->cipher);
+		mutex_unlock(&crypt_stat->cs_tfm_mutex);
 		goto out;
 	}
+	crypto_blkcipher_set_flags(crypt_stat->tfm,
+				   (ECRYPTFS_DEFAULT_CHAINING_MODE
+				    | CRYPTO_TFM_REQ_WEAK_KEY));
+	mutex_unlock(&crypt_stat->cs_tfm_mutex);
 	rc = 0;
 out:
 	return rc;
@@ -1588,10 +1628,11 @@ out:
  * event, regardless of whether this function succeeds for fails.
  */
 int
-ecryptfs_process_cipher(struct crypto_tfm **key_tfm, char *cipher_name,
+ecryptfs_process_cipher(struct crypto_blkcipher **key_tfm, char *cipher_name,
 			size_t *key_size)
 {
 	char dummy_key[ECRYPTFS_MAX_KEY_BYTES];
+	char *full_alg_name;
 	int rc;
 
 	*key_tfm = NULL;
@@ -1601,17 +1642,26 @@ ecryptfs_process_cipher(struct crypto_tfm **key_tfm, char *cipher_name,
 		      "allowable is [%d]\n", *key_size, ECRYPTFS_MAX_KEY_BYTES);
 		goto out;
 	}
-	*key_tfm = crypto_alloc_tfm(cipher_name, CRYPTO_TFM_REQ_WEAK_KEY);
-	if (!(*key_tfm)) {
-		rc = -EINVAL;
+	rc = ecryptfs_crypto_api_algify_cipher_name(&full_alg_name, cipher_name,
+						    "ecb");
+	if (rc)
+		goto out;
+	*key_tfm = crypto_alloc_blkcipher(full_alg_name, 0, CRYPTO_ALG_ASYNC);
+	kfree(full_alg_name);
+	if (IS_ERR(*key_tfm)) {
+		rc = PTR_ERR(*key_tfm);
 		printk(KERN_ERR "Unable to allocate crypto cipher with name "
-		       "[%s]\n", cipher_name);
+		       "[%s]; rc = [%d]\n", cipher_name, rc);
 		goto out;
 	}
-	if (*key_size == 0)
-		*key_size = crypto_tfm_alg_max_keysize(*key_tfm);
+	crypto_blkcipher_set_flags(*key_tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+	if (*key_size == 0) {
+		struct blkcipher_alg *alg = crypto_blkcipher_alg(*key_tfm);
+
+		*key_size = alg->max_keysize;
+	}
 	get_random_bytes(dummy_key, *key_size);
-	rc = crypto_cipher_setkey(*key_tfm, dummy_key, *key_size);
+	rc = crypto_blkcipher_setkey(*key_tfm, dummy_key, *key_size);
 	if (rc) {
 		printk(KERN_ERR "Error attempting to set key of size [%Zd] for "
 		       "cipher [%s]; rc = [%d]\n", *key_size, cipher_name, rc);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 840aa010e0d3..199fcda50e1b 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -205,7 +205,7 @@ struct ecryptfs_crypt_stat {
 	size_t extent_shift;
 	unsigned int extent_mask;
 	struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
-	struct crypto_tfm *tfm;
+	struct crypto_blkcipher *tfm;
 	struct crypto_hash *hash_tfm; /* Crypto context for generating
 				       * the initialization vectors */
 	unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE];
@@ -245,7 +245,7 @@ struct ecryptfs_mount_crypt_stat {
 	struct ecryptfs_auth_tok *global_auth_tok;
 	struct key *global_auth_tok_key;
 	size_t global_default_cipher_key_size;
-	struct crypto_tfm *global_key_tfm;
+	struct crypto_blkcipher *global_key_tfm;
 	struct mutex global_key_tfm_mutex;
 	unsigned char global_default_cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE
 						 + 1];
@@ -426,6 +426,9 @@ void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat);
 void ecryptfs_destruct_mount_crypt_stat(
 	struct ecryptfs_mount_crypt_stat *mount_crypt_stat);
 int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat);
+int ecryptfs_crypto_api_algify_cipher_name(char **algified_name,
+					   char *cipher_name,
+					   char *chaining_modifier);
 int ecryptfs_write_inode_size_to_header(struct file *lower_file,
 					struct inode *lower_inode,
 					struct inode *inode);
@@ -474,7 +477,7 @@ ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
 			  unsigned char *src, struct dentry *ecryptfs_dentry);
 int ecryptfs_truncate(struct dentry *dentry, loff_t new_length);
 int
-ecryptfs_process_cipher(struct crypto_tfm **key_tfm, char *cipher_name,
+ecryptfs_process_cipher(struct crypto_blkcipher **key_tfm, char *cipher_name,
 			size_t *key_size);
 int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode);
 int ecryptfs_inode_set(struct inode *inode, void *lower_inode);
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index bc706d33559a..c3746f56d162 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -458,14 +458,16 @@ out:
 static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
 			       struct ecryptfs_crypt_stat *crypt_stat)
 {
-	int rc = 0;
 	struct ecryptfs_password *password_s_ptr;
-	struct crypto_tfm *tfm = NULL;
 	struct scatterlist src_sg[2], dst_sg[2];
 	struct mutex *tfm_mutex = NULL;
 	/* TODO: Use virt_to_scatterlist for these */
 	char *encrypted_session_key;
 	char *session_key;
+	struct blkcipher_desc desc = {
+		.flags = CRYPTO_TFM_REQ_MAY_SLEEP
+	};
+	int rc = 0;
 
 	password_s_ptr = &auth_tok->token.password;
 	if (ECRYPTFS_CHECK_FLAG(password_s_ptr->flags,
@@ -482,22 +484,32 @@ static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
 	if (!strcmp(crypt_stat->cipher,
 		    crypt_stat->mount_crypt_stat->global_default_cipher_name)
 	    && crypt_stat->mount_crypt_stat->global_key_tfm) {
-		tfm = crypt_stat->mount_crypt_stat->global_key_tfm;
+		desc.tfm = crypt_stat->mount_crypt_stat->global_key_tfm;
 		tfm_mutex = &crypt_stat->mount_crypt_stat->global_key_tfm_mutex;
 	} else {
-		tfm = crypto_alloc_tfm(crypt_stat->cipher,
-				       CRYPTO_TFM_REQ_WEAK_KEY);
-		if (!tfm) {
-			printk(KERN_ERR "Error allocating crypto context\n");
-			rc = -ENOMEM;
+		char *full_alg_name;
+
+		rc = ecryptfs_crypto_api_algify_cipher_name(&full_alg_name,
+							    crypt_stat->cipher,
+							    "ecb");
+		if (rc)
+			goto out;
+		desc.tfm = crypto_alloc_blkcipher(full_alg_name, 0,
+						  CRYPTO_ALG_ASYNC);
+		kfree(full_alg_name);
+		if (IS_ERR(desc.tfm)) {
+			rc = PTR_ERR(desc.tfm);
+			printk(KERN_ERR "Error allocating crypto context; "
+			       "rc = [%d]\n", rc);
 			goto out;
 		}
+		crypto_blkcipher_set_flags(desc.tfm, CRYPTO_TFM_REQ_WEAK_KEY);
 	}
 	if (tfm_mutex)
 		mutex_lock(tfm_mutex);
-	rc = crypto_cipher_setkey(tfm,
-				  password_s_ptr->session_key_encryption_key,
-				  crypt_stat->key_size);
+	rc = crypto_blkcipher_setkey(desc.tfm,
+				     password_s_ptr->session_key_encryption_key,
+				     crypt_stat->key_size);
 	if (rc < 0) {
 		printk(KERN_ERR "Error setting key for crypto context\n");
 		rc = -EINVAL;
@@ -528,9 +540,12 @@ static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
 	auth_tok->session_key.decrypted_key_size =
 	    auth_tok->session_key.encrypted_key_size;
 	dst_sg[0].length = auth_tok->session_key.encrypted_key_size;
-	/* TODO: Handle error condition */
-	crypto_cipher_decrypt(tfm, dst_sg, src_sg,
-			      auth_tok->session_key.encrypted_key_size);
+	rc = crypto_blkcipher_decrypt(&desc, dst_sg, src_sg,
+				      auth_tok->session_key.encrypted_key_size);
+	if (rc) {
+		printk(KERN_ERR "Error decrypting; rc = [%d]\n", rc);
+		goto out_free_memory;
+	}
 	auth_tok->session_key.decrypted_key_size =
 	    auth_tok->session_key.encrypted_key_size;
 	memcpy(auth_tok->session_key.decrypted_key, session_key,
@@ -543,6 +558,7 @@ static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
 	if (ecryptfs_verbosity > 0)
 		ecryptfs_dump_hex(crypt_stat->key,
 				  crypt_stat->key_size);
+out_free_memory:
 	memset(encrypted_session_key, 0, PAGE_CACHE_SIZE);
 	free_page((unsigned long)encrypted_session_key);
 	memset(session_key, 0, PAGE_CACHE_SIZE);
@@ -551,7 +567,7 @@ out_free_tfm:
 	if (tfm_mutex)
 		mutex_unlock(tfm_mutex);
 	else
-		crypto_free_tfm(tfm);
+		crypto_free_blkcipher(desc.tfm);
 out:
 	return rc;
 }
@@ -800,19 +816,21 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
 		   struct ecryptfs_crypt_stat *crypt_stat,
 		   struct ecryptfs_key_record *key_rec, size_t *packet_size)
 {
-	int rc = 0;
-
 	size_t i;
 	size_t signature_is_valid = 0;
 	size_t encrypted_session_key_valid = 0;
 	char session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES];
 	struct scatterlist dest_sg[2];
 	struct scatterlist src_sg[2];
-	struct crypto_tfm *tfm = NULL;
 	struct mutex *tfm_mutex = NULL;
 	size_t key_rec_size;
 	size_t packet_size_length;
 	size_t cipher_code;
+	struct blkcipher_desc desc = {
+		.tfm = NULL,
+		.flags = CRYPTO_TFM_REQ_MAY_SLEEP
+	};
+	int rc = 0;
 
 	(*packet_size) = 0;
 	/* Check for a valid signature on the auth_tok */
@@ -879,33 +897,48 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
 	if (!strcmp(crypt_stat->cipher,
 		    crypt_stat->mount_crypt_stat->global_default_cipher_name)
 	    && crypt_stat->mount_crypt_stat->global_key_tfm) {
-		tfm = crypt_stat->mount_crypt_stat->global_key_tfm;
+		desc.tfm = crypt_stat->mount_crypt_stat->global_key_tfm;
 		tfm_mutex = &crypt_stat->mount_crypt_stat->global_key_tfm_mutex;
-	} else
-		tfm = crypto_alloc_tfm(crypt_stat->cipher, 0);
-	if (!tfm) {
-		ecryptfs_printk(KERN_ERR, "Could not initialize crypto "
-				"context for cipher [%s]\n",
-				crypt_stat->cipher);
-		rc = -EINVAL;
-		goto out;
+	} else {
+		char *full_alg_name;
+
+		rc = ecryptfs_crypto_api_algify_cipher_name(&full_alg_name,
+							    crypt_stat->cipher,
+							    "ecb");
+		if (rc)
+			goto out;
+		desc.tfm = crypto_alloc_blkcipher(full_alg_name, 0,
+						  CRYPTO_ALG_ASYNC);
+		kfree(full_alg_name);
+		if (IS_ERR(desc.tfm)) {
+			rc = PTR_ERR(desc.tfm);
+			ecryptfs_printk(KERN_ERR, "Could not initialize crypto "
+					"context for cipher [%s]; rc = [%d]\n",
+					crypt_stat->cipher, rc);
+			goto out;
+		}
+		crypto_blkcipher_set_flags(desc.tfm, CRYPTO_TFM_REQ_WEAK_KEY);
 	}
 	if (tfm_mutex)
 		mutex_lock(tfm_mutex);
-	rc = crypto_cipher_setkey(tfm, session_key_encryption_key,
-				  crypt_stat->key_size);
+	rc = crypto_blkcipher_setkey(desc.tfm, session_key_encryption_key,
+				     crypt_stat->key_size);
 	if (rc < 0) {
 		if (tfm_mutex)
 			mutex_unlock(tfm_mutex);
 		ecryptfs_printk(KERN_ERR, "Error setting key for crypto "
-				"context\n");
+				"context; rc = [%d]\n", rc);
 		goto out;
 	}
 	rc = 0;
 	ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes of the key\n",
 			crypt_stat->key_size);
-	crypto_cipher_encrypt(tfm, dest_sg, src_sg,
-			      (*key_rec).enc_key_size);
+	rc = crypto_blkcipher_encrypt(&desc, dest_sg, src_sg,
+				      (*key_rec).enc_key_size);
+	if (rc) {
+		printk(KERN_ERR "Error encrypting; rc = [%d]\n", rc);
+		goto out;
+	}
 	if (tfm_mutex)
 		mutex_unlock(tfm_mutex);
 	ecryptfs_printk(KERN_DEBUG, "This should be the encrypted key:\n");
@@ -968,8 +1001,8 @@ encrypted_session_key_set:
 	       (*key_rec).enc_key_size);
 	(*packet_size) += (*key_rec).enc_key_size;
 out:
-	if (tfm && !tfm_mutex)
-		crypto_free_tfm(tfm);
+	if (desc.tfm && !tfm_mutex)
+		crypto_free_blkcipher(desc.tfm);
 	if (rc)
 		(*packet_size) = 0;
 	return rc;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index a65f4865182c..a78d87d14baf 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -315,6 +315,8 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
 		       "with key size [%Zd] bytes; rc = [%d]\n",
 		       mount_crypt_stat->global_default_cipher_name,
 		       mount_crypt_stat->global_default_cipher_key_size, rc);
+		mount_crypt_stat->global_key_tfm = NULL;
+		mount_crypt_stat->global_auth_tok_key = NULL;
 		rc = -EINVAL;
 		goto out;
 	}
-- 
cgit v1.2.3


From 7ff1d74f5670329ac4b5959a675f8698ba95be20 Mon Sep 17 00:00:00 2001
From: Michael Halcrow <mhalcrow@us.ibm.com>
Date: Mon, 30 Oct 2006 22:07:19 -0800
Subject: [PATCH] eCryptfs: Consolidate lower dentry_open's

Opens on lower dentry objects happen in several places in eCryptfs, and they
all involve the same steps (dget, mntget, dentry_open).  This patch
consolidates the lower open events into a single function call.

Signed-off-by: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/crypto.c          | 24 +++++++++++------------
 fs/ecryptfs/ecryptfs_kernel.h |  4 ++++
 fs/ecryptfs/file.c            | 44 +++++++++++++++++++++++++++++++++++--------
 fs/ecryptfs/inode.c           | 33 +++++++++++---------------------
 4 files changed, 63 insertions(+), 42 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 2a1b6aa1a4a1..f49f105394b7 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1191,28 +1191,28 @@ int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code)
 int ecryptfs_read_header_region(char *data, struct dentry *dentry,
 				struct vfsmount *mnt)
 {
-	struct file *file;
+	struct file *lower_file;
 	mm_segment_t oldfs;
 	int rc;
 
-	mnt = mntget(mnt);
-	file = dentry_open(dentry, mnt, O_RDONLY);
-	if (IS_ERR(file)) {
-		ecryptfs_printk(KERN_DEBUG, "Error opening file to "
-				"read header region\n");
-		mntput(mnt);
-		rc = PTR_ERR(file);
+	if ((rc = ecryptfs_open_lower_file(&lower_file, dentry, mnt,
+					   O_RDONLY))) {
+		printk(KERN_ERR
+		       "Error opening lower_file to read header region\n");
 		goto out;
 	}
-	file->f_pos = 0;
+	lower_file->f_pos = 0;
 	oldfs = get_fs();
 	set_fs(get_ds());
 	/* For releases 0.1 and 0.2, all of the header information
 	 * fits in the first data extent-sized region. */
-	rc = file->f_op->read(file, (char __user *)data,
-			      ECRYPTFS_DEFAULT_EXTENT_SIZE, &file->f_pos);
+	rc = lower_file->f_op->read(lower_file, (char __user *)data,
+			      ECRYPTFS_DEFAULT_EXTENT_SIZE, &lower_file->f_pos);
 	set_fs(oldfs);
-	fput(file);
+	if ((rc = ecryptfs_close_lower_file(lower_file))) {
+		printk(KERN_ERR "Error closing lower_file\n");
+		goto out;
+	}
 	rc = 0;
 out:
 	return rc;
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 199fcda50e1b..f992533d1692 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -482,5 +482,9 @@ ecryptfs_process_cipher(struct crypto_blkcipher **key_tfm, char *cipher_name,
 int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode);
 int ecryptfs_inode_set(struct inode *inode, void *lower_inode);
 void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode);
+int ecryptfs_open_lower_file(struct file **lower_file,
+			     struct dentry *lower_dentry,
+			     struct vfsmount *lower_mnt, int flags);
+int ecryptfs_close_lower_file(struct file *lower_file);
 
 #endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index c8550c9f9cd2..a92ef05eff8f 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -198,6 +198,33 @@ retry:
 
 struct kmem_cache *ecryptfs_file_info_cache;
 
+int ecryptfs_open_lower_file(struct file **lower_file,
+			     struct dentry *lower_dentry,
+			     struct vfsmount *lower_mnt, int flags)
+{
+	int rc = 0;
+
+	dget(lower_dentry);
+	mntget(lower_mnt);
+	*lower_file = dentry_open(lower_dentry, lower_mnt, flags);
+	if (IS_ERR(*lower_file)) {
+		printk(KERN_ERR "Error opening lower file for lower_dentry "
+		       "[0x%p], lower_mnt [0x%p], and flags [0x%x]\n",
+		       lower_dentry, lower_mnt, flags);
+		rc = PTR_ERR(*lower_file);
+		*lower_file = NULL;
+		goto out;
+	}
+out:
+	return rc;
+}
+
+int ecryptfs_close_lower_file(struct file *lower_file)
+{
+	fput(lower_file);
+	return 0;
+}
+
 /**
  * ecryptfs_open
  * @inode: inode speciying file to open
@@ -244,19 +271,15 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 		ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED);
 	}
 	mutex_unlock(&crypt_stat->cs_mutex);
-	/* This mntget & dget is undone via fput when the file is released */
-	dget(lower_dentry);
 	lower_flags = file->f_flags;
 	if ((lower_flags & O_ACCMODE) == O_WRONLY)
 		lower_flags = (lower_flags & O_ACCMODE) | O_RDWR;
 	if (file->f_flags & O_APPEND)
 		lower_flags &= ~O_APPEND;
 	lower_mnt = ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry);
-	mntget(lower_mnt);
 	/* Corresponding fput() in ecryptfs_release() */
-	lower_file = dentry_open(lower_dentry, lower_mnt, lower_flags);
-	if (IS_ERR(lower_file)) {
-		rc = PTR_ERR(lower_file);
+	if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry, lower_mnt,
+					   lower_flags))) {
 		ecryptfs_printk(KERN_ERR, "Error opening lower file\n");
 		goto out_puts;
 	}
@@ -341,11 +364,16 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
 	struct file *lower_file = ecryptfs_file_to_lower(file);
 	struct ecryptfs_file_info *file_info = ecryptfs_file_to_private(file);
 	struct inode *lower_inode = ecryptfs_inode_to_lower(inode);
+	int rc;
 
-	fput(lower_file);
+	if ((rc = ecryptfs_close_lower_file(lower_file))) {
+		printk(KERN_ERR "Error closing lower_file\n");
+		goto out;
+	}
 	inode->i_blocks = lower_inode->i_blocks;
 	kmem_cache_free(ecryptfs_file_info_cache, file_info);
-	return 0;
+out:
+	return rc;
 }
 
 static int
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index efdd2b7b62d7..2f2c6cf972f7 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -231,7 +231,6 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
 	int lower_flags;
 	struct ecryptfs_crypt_stat *crypt_stat;
 	struct dentry *lower_dentry;
-	struct dentry *tlower_dentry = NULL;
 	struct file *lower_file;
 	struct inode *inode, *lower_inode;
 	struct vfsmount *lower_mnt;
@@ -241,30 +240,19 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
 			lower_dentry->d_name.name);
 	inode = ecryptfs_dentry->d_inode;
 	crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
-	tlower_dentry = dget(lower_dentry);
-	if (!tlower_dentry) {
-		rc = -ENOMEM;
-		ecryptfs_printk(KERN_ERR, "Error dget'ing lower_dentry\n");
-		goto out;
-	}
 	lower_flags = ((O_CREAT | O_WRONLY | O_TRUNC) & O_ACCMODE) | O_RDWR;
 #if BITS_PER_LONG != 32
 	lower_flags |= O_LARGEFILE;
 #endif
 	lower_mnt = ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry);
-	mntget(lower_mnt);
 	/* Corresponding fput() at end of this function */
-	lower_file = dentry_open(tlower_dentry, lower_mnt, lower_flags);
-	if (IS_ERR(lower_file)) {
-		rc = PTR_ERR(lower_file);
+	if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry, lower_mnt,
+					   lower_flags))) {
 		ecryptfs_printk(KERN_ERR,
 				"Error opening dentry; rc = [%i]\n", rc);
 		goto out;
 	}
-	/* fput(lower_file) should handle the puts if we do this */
-	lower_file->f_dentry = tlower_dentry;
-	lower_file->f_vfsmnt = lower_mnt;
-	lower_inode = tlower_dentry->d_inode;
+	lower_inode = lower_dentry->d_inode;
 	if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
 		ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
 		ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED);
@@ -285,7 +273,8 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
 	}
 	rc = grow_file(ecryptfs_dentry, lower_file, inode, lower_inode);
 out_fput:
-	fput(lower_file);
+	if ((rc = ecryptfs_close_lower_file(lower_file)))
+		printk(KERN_ERR "Error closing lower_file\n");
 out:
 	return rc;
 }
@@ -832,12 +821,11 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
 	}
 	lower_dentry = ecryptfs_dentry_to_lower(dentry);
 	/* This dget & mntget is released through fput at out_fput: */
-	dget(lower_dentry);
 	lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
-	mntget(lower_mnt);
-	lower_file = dentry_open(lower_dentry, lower_mnt, O_RDWR);
-	if (unlikely(IS_ERR(lower_file))) {
-		rc = PTR_ERR(lower_file);
+	if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry, lower_mnt,
+					   O_RDWR))) {
+		ecryptfs_printk(KERN_ERR,
+				"Error opening dentry; rc = [%i]\n", rc);
 		goto out_free;
 	}
 	ecryptfs_set_file_lower(&fake_ecryptfs_file, lower_file);
@@ -879,7 +867,8 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
 		= CURRENT_TIME;
 	mark_inode_dirty_sync(inode);
 out_fput:
-	fput(lower_file);
+	if ((rc = ecryptfs_close_lower_file(lower_file)))
+		printk(KERN_ERR "Error closing lower_file\n");
 out_free:
 	if (ecryptfs_file_to_private(&fake_ecryptfs_file))
 		kmem_cache_free(ecryptfs_file_info_cache,
-- 
cgit v1.2.3


From 316bb95e8ed0ddcd767e8aa54264b6c6190f150c Mon Sep 17 00:00:00 2001
From: Michael Halcrow <mhalcrow@us.ibm.com>
Date: Mon, 30 Oct 2006 22:07:20 -0800
Subject: [PATCH] eCryptfs: Remove ecryptfs_umount_begin

There is no point to calling the lower umount_begin when the eCryptfs
umount_begin is called.

Signed-off-by: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/super.c | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index c337c0410fb1..825757ae4867 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -137,23 +137,6 @@ static void ecryptfs_clear_inode(struct inode *inode)
 	iput(ecryptfs_inode_to_lower(inode));
 }
 
-/**
- * ecryptfs_umount_begin
- *
- * Called in do_umount().
- */
-static void ecryptfs_umount_begin(struct vfsmount *vfsmnt, int flags)
-{
-	struct vfsmount *lower_mnt =
-		ecryptfs_dentry_to_lower_mnt(vfsmnt->mnt_sb->s_root);
-	struct super_block *lower_sb;
-
-	mntput(lower_mnt);
-	lower_sb = lower_mnt->mnt_sb;
-	if (lower_sb->s_op->umount_begin)
-		lower_sb->s_op->umount_begin(lower_mnt, flags);
-}
-
 /**
  * ecryptfs_show_options
  *
@@ -193,6 +176,5 @@ struct super_operations ecryptfs_sops = {
 	.statfs = ecryptfs_statfs,
 	.remount_fs = NULL,
 	.clear_inode = ecryptfs_clear_inode,
-	.umount_begin = ecryptfs_umount_begin,
 	.show_options = ecryptfs_show_options
 };
-- 
cgit v1.2.3


From 45ec4ababe999cb95f9c0cad03b2689cb0b77a2b Mon Sep 17 00:00:00 2001
From: Michael Halcrow <mhalcrow@us.ibm.com>
Date: Mon, 30 Oct 2006 22:07:20 -0800
Subject: [PATCH] eCryptfs: Fix handling of lower d_count

Fix the use of dget/dput calls to balance out on the lower filesystem.

Signed-off-by: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/dentry.c |  8 ++++++-
 fs/ecryptfs/inode.c  | 62 +++++++++++++++++-----------------------------------
 2 files changed, 27 insertions(+), 43 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index f0d2a433242b..0b9992ab990f 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -24,6 +24,7 @@
 
 #include <linux/dcache.h>
 #include <linux/namei.h>
+#include <linux/mount.h>
 #include "ecryptfs_kernel.h"
 
 /**
@@ -76,8 +77,13 @@ static void ecryptfs_d_release(struct dentry *dentry)
 	if (ecryptfs_dentry_to_private(dentry))
 		kmem_cache_free(ecryptfs_dentry_info_cache,
 				ecryptfs_dentry_to_private(dentry));
-	if (lower_dentry)
+	if (lower_dentry) {
+		struct vfsmount *lower_mnt =
+			ecryptfs_dentry_to_lower_mnt(dentry);
+
+		mntput(lower_mnt);
 		dput(lower_dentry);
+	}
 	return;
 }
 
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 2f2c6cf972f7..ff4865d24f0f 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -325,7 +325,6 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 	struct dentry *lower_dir_dentry;
 	struct dentry *lower_dentry;
 	struct vfsmount *lower_mnt;
-	struct dentry *tlower_dentry = NULL;
 	char *encoded_name;
 	unsigned int encoded_namelen;
 	struct ecryptfs_crypt_stat *crypt_stat = NULL;
@@ -336,27 +335,32 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 	lower_dir_dentry = ecryptfs_dentry_to_lower(dentry->d_parent);
 	dentry->d_op = &ecryptfs_dops;
 	if ((dentry->d_name.len == 1 && !strcmp(dentry->d_name.name, "."))
-	    || (dentry->d_name.len == 2 && !strcmp(dentry->d_name.name, "..")))
-		goto out_drop;
+	    || (dentry->d_name.len == 2
+		&& !strcmp(dentry->d_name.name, ".."))) {
+		d_drop(dentry);
+		goto out;
+	}
 	encoded_namelen = ecryptfs_encode_filename(crypt_stat,
 						   dentry->d_name.name,
 						   dentry->d_name.len,
 						   &encoded_name);
 	if (encoded_namelen < 0) {
 		rc = encoded_namelen;
-		goto out_drop;
+		d_drop(dentry);
+		goto out;
 	}
 	ecryptfs_printk(KERN_DEBUG, "encoded_name = [%s]; encoded_namelen "
 			"= [%d]\n", encoded_name, encoded_namelen);
 	lower_dentry = lookup_one_len(encoded_name, lower_dir_dentry,
 				      encoded_namelen - 1);
 	kfree(encoded_name);
-	lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
 	if (IS_ERR(lower_dentry)) {
 		ecryptfs_printk(KERN_ERR, "ERR from lower_dentry\n");
 		rc = PTR_ERR(lower_dentry);
-		goto out_drop;
+		d_drop(dentry);
+		goto out;
 	}
+	lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
 	ecryptfs_printk(KERN_DEBUG, "lower_dentry = [%p]; lower_dentry->"
        		"d_name.name = [%s]\n", lower_dentry,
 		lower_dentry->d_name.name);
@@ -397,12 +401,6 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 				"as we *think* we are about to unlink\n");
 		goto out;
 	}
-	tlower_dentry = dget(lower_dentry);
-	if (!tlower_dentry || IS_ERR(tlower_dentry)) {
-		rc = -ENOMEM;
-		ecryptfs_printk(KERN_ERR, "Cannot dget lower_dentry\n");
-		goto out_dput;
-	}
 	/* Released in this function */
 	page_virt =
 	    (char *)kmem_cache_alloc(ecryptfs_header_cache_2,
@@ -414,7 +412,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 		goto out_dput;
 	}
 	memset(page_virt, 0, PAGE_CACHE_SIZE);
-	rc = ecryptfs_read_header_region(page_virt, tlower_dentry, nd->mnt);
+	rc = ecryptfs_read_header_region(page_virt, lower_dentry, nd->mnt);
 	crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
 	if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED))
 		ecryptfs_set_default_sizes(crypt_stat);
@@ -437,9 +435,6 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 
 out_dput:
 	dput(lower_dentry);
-	if (tlower_dentry)
-		dput(tlower_dentry);
-out_drop:
 	d_drop(dentry);
 out:
 	return ERR_PTR(rc);
@@ -475,8 +470,8 @@ out_lock:
 	unlock_dir(lower_dir_dentry);
 	dput(lower_new_dentry);
 	dput(lower_old_dentry);
-	if (!new_dentry->d_inode)
-		d_drop(new_dentry);
+	d_drop(new_dentry);
+	d_drop(old_dentry);
 	return rc;
 }
 
@@ -565,41 +560,24 @@ out:
 
 static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
-	int rc = 0;
-	struct dentry *tdentry = NULL;
 	struct dentry *lower_dentry;
-	struct dentry *tlower_dentry = NULL;
 	struct dentry *lower_dir_dentry;
+	int rc;
 
 	lower_dentry = ecryptfs_dentry_to_lower(dentry);
-	if (!(tdentry = dget(dentry))) {
-		rc = -EINVAL;
-		ecryptfs_printk(KERN_ERR, "Error dget'ing dentry [%p]\n",
-				dentry);
-		goto out;
-	}
+	dget(dentry);
 	lower_dir_dentry = lock_parent(lower_dentry);
-	if (!(tlower_dentry = dget(lower_dentry))) {
-		rc = -EINVAL;
-		ecryptfs_printk(KERN_ERR, "Error dget'ing lower_dentry "
-				"[%p]\n", lower_dentry);
-		goto out;
-	}
+	dget(lower_dentry);
 	rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
-	if (!rc) {
-		d_delete(tlower_dentry);
-		tlower_dentry = NULL;
-	}
+	dput(lower_dentry);
+	if (!rc)
+		d_delete(lower_dentry);
 	ecryptfs_copy_attr_times(dir, lower_dir_dentry->d_inode);
 	dir->i_nlink = lower_dir_dentry->d_inode->i_nlink;
 	unlock_dir(lower_dir_dentry);
 	if (!rc)
 		d_drop(dentry);
-out:
-	if (tdentry)
-		dput(tdentry);
-	if (tlower_dentry)
-		dput(tlower_dentry);
+	dput(dentry);
 	return rc;
 }
 
-- 
cgit v1.2.3


From 8e87d4dc159148f04f515bc072df22a2c089e7f2 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 2 Nov 2006 03:45:24 +0000
Subject: [CIFS] report rename failure when target file is locked by Windows

Fixes Samba bugzilla bug # 4182

Rename by handle failures (retry after rename by path) were not
being returned back.

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/CHANGES |  4 +++-
 fs/cifs/inode.c | 14 +++++++++-----
 2 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 50afab81a59b..0b3c37ef52e0 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -3,7 +3,9 @@ Version 1.46
 Support deep tree mounts.  Better support OS/2, Win9x (DOS) time stamps.
 Allow null user to be specified on mount ("username="). Do not return
 EINVAL on readdir when filldir fails due to overwritten blocksize
-(fixes FC problem)
+(fixes FC problem).  Return error in rename 2nd attempt retry (ie report
+if rename by handle also fails, after rename by path fails, we were
+not reporting whether the retry worked or not).
 
 Version 1.45
 ------------
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 35d54bb0869a..dffe295825f4 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -885,10 +885,14 @@ int cifs_rename(struct inode *source_inode, struct dentry *source_direntry,
 			kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
 		if (info_buf_source != NULL) {
 			info_buf_target = info_buf_source + 1;
-			rc = CIFSSMBUnixQPathInfo(xid, pTcon, fromName,
-				info_buf_source, cifs_sb_source->local_nls, 
-				cifs_sb_source->mnt_cifs_flags &
-					CIFS_MOUNT_MAP_SPECIAL_CHR);
+			if (pTcon->ses->capabilities & CAP_UNIX)
+				rc = CIFSSMBUnixQPathInfo(xid, pTcon, fromName,
+					info_buf_source, 
+					cifs_sb_source->local_nls,
+					cifs_sb_source->mnt_cifs_flags &
+						CIFS_MOUNT_MAP_SPECIAL_CHR);
+			/* else rc is still EEXIST so will fall through to
+			   unlink the target and retry rename */
 			if (rc == 0) {
 				rc = CIFSSMBUnixQPathInfo(xid, pTcon, toName,
 						info_buf_target,
@@ -937,7 +941,7 @@ int cifs_rename(struct inode *source_inode, struct dentry *source_direntry,
 				 cifs_sb_source->mnt_cifs_flags & 
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
 		if (rc==0) {
-			CIFSSMBRenameOpenFile(xid, pTcon, netfid, toName,
+			rc = CIFSSMBRenameOpenFile(xid, pTcon, netfid, toName,
 					      cifs_sb_source->local_nls, 
 					      cifs_sb_source->mnt_cifs_flags &
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
-- 
cgit v1.2.3


From d572b87946f8c598b3cad86a7913862dd48daadb Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Thu, 2 Nov 2006 10:50:40 -0600
Subject: JFS: Remove redundant xattr permission checking

The vfs handles most permissions for setting and retrieving xattrs.
This patch removes a redundant and wrong check so that it won't override
the correct behavior which is being fixed in the vfs.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/xattr.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 4c7985ebca92..b753ba216450 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -756,6 +756,11 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
 	return -EOPNOTSUPP;
 }
 
+/*
+ * Most of the permission checking is done by xattr_permission in the vfs.
+ * The local file system is responsible for handling the system.* namespace.
+ * We also need to verify that this is a namespace that we recognize.
+ */
 static int can_set_xattr(struct inode *inode, const char *name,
 			 const void *value, size_t value_len)
 {
@@ -771,10 +776,6 @@ static int can_set_xattr(struct inode *inode, const char *name,
 	    strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN))
 		return -EOPNOTSUPP;
 
-	if (!S_ISREG(inode->i_mode) &&
-	    (!S_ISDIR(inode->i_mode) || inode->i_mode &S_ISVTX))
-		return -EPERM;
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From 7bd473fcc217adec000f213e8864bf9a161d57e1 Mon Sep 17 00:00:00 2001
From: Michael Halcrow <mhalcrow@us.ibm.com>
Date: Thu, 2 Nov 2006 22:06:56 -0800
Subject: [PATCH] eCryptfs: Fix pointer deref

I missed a pointer dereference in this kmalloc result check.

Signed-off-by: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/crypto.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index f49f105394b7..136175a69332 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -134,7 +134,7 @@ int ecryptfs_crypto_api_algify_cipher_name(char **algified_name,
 
 	algified_name_len = (chaining_modifier_len + cipher_name_len + 3);
 	(*algified_name) = kmalloc(algified_name_len, GFP_KERNEL);
-	if (!(algified_name)) {
+	if (!(*algified_name)) {
 		rc = -ENOMEM;
 		goto out;
 	}
-- 
cgit v1.2.3


From 87c2b7c045a44f6c1c7af23e64f2b286e6f7130a Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 2 Nov 2006 22:06:58 -0800
Subject: [PATCH] sys_pselect7 vs compat_sys_pselect7 uaccess error handling

758333458aa719bfc26ec16eafd4ad3a9e96014d fixes the not checked copy_to_user
return value of compat_sys_pselect7.  I ran into this too because of an old
source tree, but my fix would look quite a bit different to Andi's fix.

The reason is that the compat function IMHO should behave the very same as
the non-compat function if possible.  Since sys_pselect7 does not return
-EFAULT in this specific case, change the compat code so it behaves like
sys_pselect7.

Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/compat.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index 50624d4a70c6..8d0a0018a7d2 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1835,9 +1835,12 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
 
 	} while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
 
-	if (ret == 0 && tsp && !(current->personality & STICKY_TIMEOUTS)) {
+	if (tsp) {
 		struct compat_timespec rts;
 
+		if (current->personality & STICKY_TIMEOUTS)
+			goto sticky;
+
 		rts.tv_sec = timeout / HZ;
 		rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ);
 		if (rts.tv_nsec >= NSEC_PER_SEC) {
@@ -1846,8 +1849,19 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
 		}
 		if (compat_timespec_compare(&rts, &ts) >= 0)
 			rts = ts;
-		if (copy_to_user(tsp, &rts, sizeof(rts)))
-			ret = -EFAULT;
+		if (copy_to_user(tsp, &rts, sizeof(rts))) {
+sticky:
+			/*
+			 * If an application puts its timeval in read-only
+			 * memory, we don't want the Linux-specific update to
+			 * the timeval to cause a fault after the select has
+			 * completed successfully. However, because we're not
+			 * updating the timeval, we can't restart the system
+			 * call.
+			 */
+			if (ret == -ERESTARTNOHAND)
+				ret = -EINTR;
+		}
 	}
 
 	if (ret == -ERESTARTNOHAND) {
-- 
cgit v1.2.3


From 05ac9d4b3d7eac9e8542c83341a0e22d09aecf8f Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Thu, 2 Nov 2006 22:07:08 -0800
Subject: [PATCH] cifs: ->readpages() fixes

This just ignore the remaining pages, and will fix a forgot put_pages_list().

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Cc: Steven French <sfrench@us.ibm.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cifs/file.c | 23 +----------------------
 1 file changed, 1 insertion(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 976a691c5a68..7e056b9b49e8 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1806,13 +1806,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 		}
 		if ((rc < 0) || (smb_read_data == NULL)) {
 			cFYI(1, ("Read error in readpages: %d", rc));
-			/* clean up remaing pages off list */
-			while (!list_empty(page_list) && (i < num_pages)) {
-				page = list_entry(page_list->prev, struct page,
-						  lru);
-				list_del(&page->lru);
-				page_cache_release(page);
-			}
 			break;
 		} else if (bytes_read > 0) {
 			pSMBr = (struct smb_com_read_rsp *)smb_read_data;
@@ -1831,13 +1824,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 				   this case is ok - if we are at server EOF 
 				   we will hit it on next read */
 
-			/* while (!list_empty(page_list) && (i < num_pages)) {
-					page = list_entry(page_list->prev, 
-							  struct page, list);
-					list_del(&page->list);
-					page_cache_release(page);
-				}
-				break; */
+				/* break; */
 			}
 		} else {
 			cFYI(1, ("No bytes read (%d) at offset %lld . "
@@ -1845,14 +1832,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 				 bytes_read, offset));
 			/* BB turn off caching and do new lookup on 
 			   file size at server? */
-			while (!list_empty(page_list) && (i < num_pages)) {
-				page = list_entry(page_list->prev, struct page,
-						  lru);
-				list_del(&page->lru);
-
-				/* BB removeme - replace with zero of page? */
-				page_cache_release(page);
-			}
 			break;
 		}
 		if (smb_read_data) {
-- 
cgit v1.2.3


From 2e990021bfc65b1a3778479a9e6b4811f9c1ff0e Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Thu, 2 Nov 2006 22:07:09 -0800
Subject: [PATCH] fuse: ->readpages() cleanup

This just ignore the remaining pages.

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Cc: Steven French <sfrench@us.ibm.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/file.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 2bb5ace3882d..763a50daf1c0 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -397,14 +397,14 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
 
 	err = -EIO;
 	if (is_bad_inode(inode))
-		goto clean_pages_up;
+		goto out;
 
 	data.file = file;
 	data.inode = inode;
 	data.req = fuse_get_req(fc);
 	err = PTR_ERR(data.req);
 	if (IS_ERR(data.req))
-		goto clean_pages_up;
+		goto out;
 
 	err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
 	if (!err) {
@@ -413,10 +413,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
 		else
 			fuse_put_request(fc, data.req);
 	}
-	return err;
-
-clean_pages_up:
-	put_pages_list(pages);
+out:
 	return err;
 }
 
-- 
cgit v1.2.3


From 7011774db8afca43be466f0f0428434a9edf053e Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Thu, 2 Nov 2006 22:07:10 -0800
Subject: [PATCH] gfs2: ->readpages() fixes

This just ignore the remaining pages, and remove unneeded unlock_pages().

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Cc: Steven French <sfrench@us.ibm.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Acked-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/gfs2/ops_address.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 8d5963c7e123..015640b3f123 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -337,13 +337,6 @@ out:
 out_noerror:
 	ret = 0;
 out_unlock:
-	/* unlock all pages, we can't do any I/O right now */
-	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
-		struct page *page = list_entry(pages->prev, struct page, lru);
-		list_del(&page->lru);
-		unlock_page(page);
-		page_cache_release(page);
-	}
 	if (do_unlock)
 		gfs2_holder_uninit(&gh);
 	goto out;
-- 
cgit v1.2.3


From 7ef55b8a05c02db7c07d81827c69fe8f124e8654 Mon Sep 17 00:00:00 2001
From: Srinivasa Ds <srinivasa@in.ibm.com>
Date: Thu, 2 Nov 2006 22:07:12 -0800
Subject: [PATCH] NFS4: fix for recursive locking problem

When I was performing some operations on NFS, I got below error on server
side.

  =============================================
  [ INFO: possible recursive locking detected ]
  2.6.19-prep #1
  ---------------------------------------------
  nfsd4/3525 is trying to acquire lock:
   (&inode->i_mutex){--..}, at: [<c0611e5a>] mutex_lock+0x21/0x24

  but task is already holding lock:
   (&inode->i_mutex){--..}, at: [<c0611e5a>] mutex_lock+0x21/0x24

  other info that might help us debug this:
  2 locks held by nfsd4/3525:
   #0:  (client_mutex){--..}, at: [<c0611e5a>] mutex_lock+0x21/0x24
   #1:  (&inode->i_mutex){--..}, at: [<c0611e5a>] mutex_lock+0x21/0x24

  stack backtrace:
   [<c04051ed>] show_trace_log_lvl+0x58/0x16a
   [<c04057fa>] show_trace+0xd/0x10
   [<c0405913>] dump_stack+0x19/0x1b
   [<c043b6f1>] __lock_acquire+0x778/0x99c
   [<c043be86>] lock_acquire+0x4b/0x6d
   [<c0611ceb>] __mutex_lock_slowpath+0xbc/0x20a
   [<c0611e5a>] mutex_lock+0x21/0x24
   [<c047fd7e>] vfs_rmdir+0x76/0xf8
   [<f94b7ce9>] nfsd4_clear_clid_dir+0x2c/0x41 [nfsd]
   [<f94b7de9>] nfsd4_remove_clid_dir+0xb1/0xe8 [nfsd]
   [<f94b307b>] laundromat_main+0x9b/0x1c3 [nfsd]
   [<c04333d6>] run_workqueue+0x7a/0xbb
   [<c0433d0b>] worker_thread+0xd2/0x107
   [<c0436285>] kthread+0xc3/0xf2
   [<c0402005>] kernel_thread_helper+0x5/0xb
  ===================================================================

Cause for this problem was,2 successive mutex_lock calls on 2 diffrent inodes ,as shown below

	static int
	nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry)
	{
	        int status;

	        /* For now this directory should already be empty, but we empty it of
        	 * any regular files anyway, just in case the directory was created by
	         * a kernel from the future.... */
        	nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
	        mutex_lock(&dir->d_inode->i_mutex);
	        status = vfs_rmdir(dir->d_inode, dentry);
	...

	int vfs_rmdir(struct inode *dir, struct dentry *dentry)
	{
	        int error = may_delete(dir, dentry, 1);

	        if (error)
	                return error;

	        if (!dir->i_op || !dir->i_op->rmdir)
        	        return -EPERM;

	        DQUOT_INIT(dir);

	        mutex_lock(&dentry->d_inode->i_mutex);
	...

So I have developed the patch to overcome this problem.

Signed-off-by: Srinivasa DS <srinivasa@in.ibm.com>
Cc: Neil Brown <neilb@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4recover.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index e9d07704680e..81b8565d3837 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -274,7 +274,7 @@ nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry)
 	 * any regular files anyway, just in case the directory was created by
 	 * a kernel from the future.... */
 	nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
-	mutex_lock(&dir->d_inode->i_mutex);
+	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
 	status = vfs_rmdir(dir->d_inode, dentry);
 	mutex_unlock(&dir->d_inode->i_mutex);
 	return status;
-- 
cgit v1.2.3


From d2c89a4284ea4ecfba77c6f2d7d6f96d52e801e5 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Thu, 2 Nov 2006 22:07:20 -0800
Subject: [PATCH] reiserfs: reset errval after initializing bitmap cache

Callers after reiserfs_init_bitmap_cache() expect errval to contain -EINVAL
until much later.  If a condition fails before errval is reset later,
reiserfs_fill_super() will mistakenly return 0, causing an Oops in
do_add_mount().  This patch resets errval to -EINVAL after the call.

I view this as a temporary fix and real error codes should be used
throughout reiserfs_fill_super().

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/super.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 9041802df832..17249994110f 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1619,6 +1619,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 		      "jmacd-8: reiserfs_fill_super: unable to read bitmap");
 		goto error;
 	}
+	errval = -EINVAL;
 #ifdef CONFIG_REISERFS_CHECK
 	SWARN(silent, s, "CONFIG_REISERFS_CHECK is set ON");
 	SWARN(silent, s, "- it is slow mode for debugging.");
-- 
cgit v1.2.3


From f1f2d8713d16a1e198880bbc716eb24fae09c858 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Thu, 2 Nov 2006 22:07:29 -0800
Subject: [PATCH] Fix user.* xattr permission check for sticky dirs

The user.* extended attributes are only allowed on regular files and
directories.  Sticky directories further restrict write access to the owner
and privileged users.  (See the attr(5) man page for an explanation.)

The original check in ext2/ext3 when user.* xattrs were merged was more
restrictive than intended, and when the xattr permission checks were moved
into the VFS, read access to user.* attributes on sticky directores ended
up being denied in addition.

Originally-from: Gerard Neil <xyzzy@devferret.org>
Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Cc: Dave Kleikamp <shaggy@austin.ibm.com>
Cc: Jan Engelhardt <jengelh@linux01.gwdg.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/xattr.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/xattr.c b/fs/xattr.c
index 395635100f77..0901bdc2ce24 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -48,14 +48,21 @@ xattr_permission(struct inode *inode, const char *name, int mask)
 		return 0;
 
 	/*
-	 * The trusted.* namespace can only accessed by a privilegued user.
+	 * The trusted.* namespace can only be accessed by a privileged user.
 	 */
 	if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
 		return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM);
 
+	/* In user.* namespace, only regular files and directories can have
+	 * extended attributes. For sticky directories, only the owner and
+	 * privileged user can write attributes.
+	 */
 	if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
-		if (!S_ISREG(inode->i_mode) &&
-		    (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
+		if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
+			return -EPERM;
+		if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
+		    (mask & MAY_WRITE) && (current->fsuid != inode->i_uid) &&
+		    !capable(CAP_FOWNER))
 			return -EPERM;
 	}
 
-- 
cgit v1.2.3


From ddac0d39cf437d02fde9795ae57d9c4b4c146de9 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Sat, 4 Nov 2006 12:49:32 +0100
Subject: [PATCH] splice: fix problem introduced with inode diet

After the inode slimming patch that unionised i_pipe/i_bdev/i_cdev, it's
no longer enough to check for existance of ->i_pipe to verify that this
is a pipe.

Original patch from Eric Dumazet <dada1@cosmosbay.com>
Final solution suggested by Linus.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/splice.c | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/splice.c b/fs/splice.c
index 8d705954d294..da74583a00ee 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1108,6 +1108,19 @@ out_release:
 
 EXPORT_SYMBOL(do_splice_direct);
 
+/*
+ * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
+ * location, so checking ->i_pipe is not enough to verify that this is a
+ * pipe.
+ */
+static inline struct pipe_inode_info *pipe_info(struct inode *inode)
+{
+	if (S_ISFIFO(inode->i_mode))
+		return inode->i_pipe;
+
+	return NULL;
+}
+
 /*
  * Determine where to splice to/from.
  */
@@ -1119,7 +1132,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
 	loff_t offset, *off;
 	long ret;
 
-	pipe = in->f_dentry->d_inode->i_pipe;
+	pipe = pipe_info(in->f_dentry->d_inode);
 	if (pipe) {
 		if (off_in)
 			return -ESPIPE;
@@ -1140,7 +1153,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
 		return ret;
 	}
 
-	pipe = out->f_dentry->d_inode->i_pipe;
+	pipe = pipe_info(out->f_dentry->d_inode);
 	if (pipe) {
 		if (off_out)
 			return -ESPIPE;
@@ -1298,7 +1311,7 @@ static int get_iovec_page_array(const struct iovec __user *iov,
 static long do_vmsplice(struct file *file, const struct iovec __user *iov,
 			unsigned long nr_segs, unsigned int flags)
 {
-	struct pipe_inode_info *pipe = file->f_dentry->d_inode->i_pipe;
+	struct pipe_inode_info *pipe;
 	struct page *pages[PIPE_BUFFERS];
 	struct partial_page partial[PIPE_BUFFERS];
 	struct splice_pipe_desc spd = {
@@ -1308,7 +1321,8 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
 		.ops = &user_page_pipe_buf_ops,
 	};
 
-	if (unlikely(!pipe))
+	pipe = pipe_info(file->f_dentry->d_inode);
+	if (!pipe)
 		return -EBADF;
 	if (unlikely(nr_segs > UIO_MAXIOV))
 		return -EINVAL;
@@ -1535,8 +1549,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 static long do_tee(struct file *in, struct file *out, size_t len,
 		   unsigned int flags)
 {
-	struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe;
-	struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe;
+	struct pipe_inode_info *ipipe = pipe_info(in->f_dentry->d_inode);
+	struct pipe_inode_info *opipe = pipe_info(out->f_dentry->d_inode);
 	int ret = -EINVAL;
 
 	/*
-- 
cgit v1.2.3