diff options
Diffstat (limited to 'fs')
77 files changed, 1593 insertions, 873 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 37db79a2ff95..97e3bdedb1e6 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -902,65 +902,7 @@ endif # BLOCK menu "Pseudo filesystems" -config PROC_FS - bool "/proc file system support" if EMBEDDED - default y - help - This is a virtual file system providing information about the status - of the system. "Virtual" means that it doesn't take up any space on - your hard disk: the files are created on the fly by the kernel when - you try to access them. Also, you cannot read the files with older - version of the program less: you need to use more or cat. - - It's totally cool; for example, "cat /proc/interrupts" gives - information about what the different IRQs are used for at the moment - (there is a small number of Interrupt ReQuest lines in your computer - that are used by the attached devices to gain the CPU's attention -- - often a source of trouble if two devices are mistakenly configured - to use the same IRQ). The program procinfo to display some - information about your system gathered from the /proc file system. - - Before you can use the /proc file system, it has to be mounted, - meaning it has to be given a location in the directory hierarchy. - That location should be /proc. A command such as "mount -t proc proc - /proc" or the equivalent line in /etc/fstab does the job. - - The /proc file system is explained in the file - <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage - ("man 5 proc"). - - This option will enlarge your kernel by about 67 KB. Several - programs depend on this, so everyone should say Y here. - -config PROC_KCORE - bool "/proc/kcore support" if !ARM - depends on PROC_FS && MMU - -config PROC_VMCORE - bool "/proc/vmcore support (EXPERIMENTAL)" - depends on PROC_FS && CRASH_DUMP - default y - help - Exports the dump image of crashed kernel in ELF format. - -config PROC_SYSCTL - bool "Sysctl support (/proc/sys)" if EMBEDDED - depends on PROC_FS - select SYSCTL - default y - ---help--- - The sysctl interface provides a means of dynamically changing - certain kernel parameters and variables on the fly without requiring - a recompile of the kernel or reboot of the system. The primary - interface is through /proc/sys. If you say Y here a tree of - modifiable sysctl entries will be generated beneath the - /proc/sys directory. They are explained in the files - in <file:Documentation/sysctl/>. Note that enabling this - option will enlarge the kernel by at least 8 KB. - - As it is generally a good thing, you should say Y here unless - building a kernel for install/rescue disks or your system is very - limited in memory. +source "fs/proc/Kconfig" config SYSFS bool "sysfs file system support" if EMBEDDED @@ -2093,20 +2035,6 @@ config CODA_FS To compile the coda client support as a module, choose M here: the module will be called coda. -config CODA_FS_OLD_API - bool "Use 96-bit Coda file identifiers" - depends on CODA_FS - help - A new kernel-userspace API had to be introduced for Coda v6.0 - to support larger 128-bit file identifiers as needed by the - new realms implementation. - - However this new API is not backward compatible with older - clients. If you really need to run the old Coda userspace - cache manager then say Y. - - For most cases you probably want to say N. - config AFS_FS tristate "Andrew File System support (AFS) (EXPERIMENTAL)" depends on INET && EXPERIMENTAL @@ -586,7 +586,6 @@ static void use_mm(struct mm_struct *mm) struct task_struct *tsk = current; task_lock(tsk); - tsk->flags |= PF_BORROWED_MM; active_mm = tsk->active_mm; atomic_inc(&mm->mm_count); tsk->mm = mm; @@ -610,7 +609,6 @@ static void unuse_mm(struct mm_struct *mm) struct task_struct *tsk = current; task_lock(tsk); - tsk->flags &= ~PF_BORROWED_MM; tsk->mm = NULL; /* active_mm is still 'mm' */ enter_lazy_tlb(mm, tsk); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 639d2d8b5710..3b6ff854d983 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -131,6 +131,15 @@ static int padzero(unsigned long elf_bss) #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; }) #endif +#ifndef ELF_BASE_PLATFORM +/* + * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture. + * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value + * will be copied to the user stack in the same manner as AT_PLATFORM. + */ +#define ELF_BASE_PLATFORM NULL +#endif + static int create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, unsigned long load_addr, unsigned long interp_load_addr) @@ -142,7 +151,9 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, elf_addr_t __user *envp; elf_addr_t __user *sp; elf_addr_t __user *u_platform; + elf_addr_t __user *u_base_platform; const char *k_platform = ELF_PLATFORM; + const char *k_base_platform = ELF_BASE_PLATFORM; int items; elf_addr_t *elf_info; int ei_index = 0; @@ -172,6 +183,19 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, return -EFAULT; } + /* + * If this architecture has a "base" platform capability + * string, copy it to userspace. + */ + u_base_platform = NULL; + if (k_base_platform) { + size_t len = strlen(k_base_platform) + 1; + + u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len); + if (__copy_to_user(u_base_platform, k_base_platform, len)) + return -EFAULT; + } + /* Create the ELF interpreter info */ elf_info = (elf_addr_t *)current->mm->saved_auxv; /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */ @@ -209,6 +233,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, NEW_AUX_ENT(AT_PLATFORM, (elf_addr_t)(unsigned long)u_platform); } + if (k_base_platform) { + NEW_AUX_ENT(AT_BASE_PLATFORM, + (elf_addr_t)(unsigned long)u_base_platform); + } if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) { NEW_AUX_ENT(AT_EXECFD, bprm->interp_data); } @@ -1478,7 +1506,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, const struct user_regset_view *view = task_user_regset_view(dump_task); struct elf_thread_core_info *t; struct elf_prpsinfo *psinfo; - struct task_struct *g, *p; + struct core_thread *ct; unsigned int i; info->size = 0; @@ -1517,31 +1545,26 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, /* * Allocate a structure for each thread. */ - rcu_read_lock(); - do_each_thread(g, p) - if (p->mm == dump_task->mm) { - t = kzalloc(offsetof(struct elf_thread_core_info, - notes[info->thread_notes]), - GFP_ATOMIC); - if (unlikely(!t)) { - rcu_read_unlock(); - return 0; - } - t->task = p; - if (p == dump_task || !info->thread) { - t->next = info->thread; - info->thread = t; - } else { - /* - * Make sure to keep the original task at - * the head of the list. - */ - t->next = info->thread->next; - info->thread->next = t; - } + for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) { + t = kzalloc(offsetof(struct elf_thread_core_info, + notes[info->thread_notes]), + GFP_KERNEL); + if (unlikely(!t)) + return 0; + + t->task = ct->task; + if (ct->task == dump_task || !info->thread) { + t->next = info->thread; + info->thread = t; + } else { + /* + * Make sure to keep the original task at + * the head of the list. + */ + t->next = info->thread->next; + info->thread->next = t; } - while_each_thread(g, p); - rcu_read_unlock(); + } /* * Now fill in each thread's information. @@ -1688,7 +1711,6 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, { #define NUM_NOTES 6 struct list_head *t; - struct task_struct *g, *p; info->notes = NULL; info->prstatus = NULL; @@ -1720,20 +1742,19 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, info->thread_status_size = 0; if (signr) { + struct core_thread *ct; struct elf_thread_status *ets; - rcu_read_lock(); - do_each_thread(g, p) - if (current->mm == p->mm && current != p) { - ets = kzalloc(sizeof(*ets), GFP_ATOMIC); - if (!ets) { - rcu_read_unlock(); - return 0; - } - ets->thread = p; - list_add(&ets->list, &info->thread_list); - } - while_each_thread(g, p); - rcu_read_unlock(); + + for (ct = current->mm->core_state->dumper.next; + ct; ct = ct->next) { + ets = kzalloc(sizeof(*ets), GFP_KERNEL); + if (!ets) + return 0; + + ets->thread = ct->task; + list_add(&ets->list, &info->thread_list); + } + list_for_each(t, &info->thread_list) { int sz; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index d051a32e6270..1b59b1edf26d 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1573,7 +1573,6 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, struct memelfnote *notes = NULL; struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */ struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */ - struct task_struct *g, *p; LIST_HEAD(thread_list); struct list_head *t; elf_fpregset_t *fpu = NULL; @@ -1622,20 +1621,19 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, #endif if (signr) { + struct core_thread *ct; struct elf_thread_status *tmp; - rcu_read_lock(); - do_each_thread(g,p) - if (current->mm == p->mm && current != p) { - tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); - if (!tmp) { - rcu_read_unlock(); - goto cleanup; - } - tmp->thread = p; - list_add(&tmp->list, &thread_list); - } - while_each_thread(g,p); - rcu_read_unlock(); + + for (ct = current->mm->core_state->dumper.next; + ct; ct = ct->next) { + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + goto cleanup; + + tmp->thread = ct->task; + list_add(&tmp->list, &thread_list); + } + list_for_each(t, &thread_list) { struct elf_thread_status *tmp; int sz; diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c index e1c854890f94..bf4a3fd3c8e3 100644 --- a/fs/coda/coda_linux.c +++ b/fs/coda/coda_linux.c @@ -28,11 +28,9 @@ int coda_fake_statfs; char * coda_f2s(struct CodaFid *f) { static char s[60]; -#ifdef CONFIG_CODA_FS_OLD_API - sprintf(s, "(%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2]); -#else + sprintf(s, "(%08x.%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2], f->opaque[3]); -#endif + return s; } diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index 40c36f7352a6..0d9b80ec689c 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -378,11 +378,7 @@ MODULE_AUTHOR("Jan Harkes, Peter J. Braam"); MODULE_DESCRIPTION("Coda Distributed File System VFS interface"); MODULE_ALIAS_CHARDEV_MAJOR(CODA_PSDEV_MAJOR); MODULE_LICENSE("GPL"); -#ifdef CONFIG_CODA_FS_OLD_API -MODULE_VERSION("5.3.21"); -#else MODULE_VERSION("6.6"); -#endif static int __init init_coda(void) { diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c index 359e531094dd..ce432bca95d1 100644 --- a/fs/coda/upcall.c +++ b/fs/coda/upcall.c @@ -52,12 +52,8 @@ static void *alloc_upcall(int opcode, int size) inp->ih.opcode = opcode; inp->ih.pid = current->pid; inp->ih.pgid = task_pgrp_nr(current); -#ifdef CONFIG_CODA_FS_OLD_API - memset(&inp->ih.cred, 0, sizeof(struct coda_cred)); - inp->ih.cred.cr_fsuid = current->fsuid; -#else inp->ih.uid = current->fsuid; -#endif + return (void*)inp; } @@ -166,20 +162,11 @@ int venus_close(struct super_block *sb, struct CodaFid *fid, int flags, union inputArgs *inp; union outputArgs *outp; int insize, outsize, error; -#ifdef CONFIG_CODA_FS_OLD_API - struct coda_cred cred = { 0, }; - cred.cr_fsuid = uid; -#endif insize = SIZE(release); UPARG(CODA_CLOSE); -#ifdef CONFIG_CODA_FS_OLD_API - memcpy(&(inp->ih.cred), &cred, sizeof(cred)); -#else inp->ih.uid = uid; -#endif - inp->coda_close.VFid = *fid; inp->coda_close.flags = flags; diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 18e2c548161d..5235c67e7594 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -25,7 +25,6 @@ #include <linux/slab.h> #include <linux/raid/md.h> #include <linux/kd.h> -#include <linux/dirent.h> #include <linux/route.h> #include <linux/in6.h> #include <linux/ipv6_route.h> diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index 78878c5781ca..eba87ff3177b 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -116,7 +116,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, if (xop->callback == NULL) wait_event(recv_wq, (op->done != 0)); else { - rv = -EINPROGRESS; + rv = FILE_LOCK_DEFERRED; goto out; } diff --git a/fs/dquot.c b/fs/dquot.c index 5ac77da19959..1346eebe74ce 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -562,6 +562,8 @@ static struct shrinker dqcache_shrinker = { */ static void dqput(struct dquot *dquot) { + int ret; + if (!dquot) return; #ifdef __DQUOT_PARANOIA @@ -594,7 +596,19 @@ we_slept: if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) { spin_unlock(&dq_list_lock); /* Commit dquot before releasing */ - dquot->dq_sb->dq_op->write_dquot(dquot); + ret = dquot->dq_sb->dq_op->write_dquot(dquot); + if (ret < 0) { + printk(KERN_ERR "VFS: cannot write quota structure on " + "device %s (error %d). Quota may get out of " + "sync!\n", dquot->dq_sb->s_id, ret); + /* + * We clear dirty bit anyway, so that we avoid + * infinite loop here + */ + spin_lock(&dq_list_lock); + clear_dquot_dirty(dquot); + spin_unlock(&dq_list_lock); + } goto we_slept; } /* Clear flag in case dquot was inactive (something bad happened) */ @@ -875,7 +889,10 @@ static void print_warning(struct dquot *dquot, const int warntype) char *msg = NULL; struct tty_struct *tty; - if (!need_print_warning(dquot)) + if (warntype == QUOTA_NL_IHARDBELOW || + warntype == QUOTA_NL_ISOFTBELOW || + warntype == QUOTA_NL_BHARDBELOW || + warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot)) return; mutex_lock(&tty_mutex); @@ -1083,6 +1100,35 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war return QUOTA_OK; } +static int info_idq_free(struct dquot *dquot, ulong inodes) +{ + if (test_bit(DQ_FAKE_B, &dquot->dq_flags) || + dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit) + return QUOTA_NL_NOWARN; + + if (dquot->dq_dqb.dqb_curinodes - inodes <= dquot->dq_dqb.dqb_isoftlimit) + return QUOTA_NL_ISOFTBELOW; + if (dquot->dq_dqb.dqb_curinodes >= dquot->dq_dqb.dqb_ihardlimit && + dquot->dq_dqb.dqb_curinodes - inodes < dquot->dq_dqb.dqb_ihardlimit) + return QUOTA_NL_IHARDBELOW; + return QUOTA_NL_NOWARN; +} + +static int info_bdq_free(struct dquot *dquot, qsize_t space) +{ + if (test_bit(DQ_FAKE_B, &dquot->dq_flags) || + toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit) + return QUOTA_NL_NOWARN; + + if (toqb(dquot->dq_dqb.dqb_curspace - space) <= + dquot->dq_dqb.dqb_bsoftlimit) + return QUOTA_NL_BSOFTBELOW; + if (toqb(dquot->dq_dqb.dqb_curspace) >= dquot->dq_dqb.dqb_bhardlimit && + toqb(dquot->dq_dqb.dqb_curspace - space) < + dquot->dq_dqb.dqb_bhardlimit) + return QUOTA_NL_BHARDBELOW; + return QUOTA_NL_NOWARN; +} /* * Initialize quota pointers in inode * Transaction must be started at entry @@ -1139,6 +1185,28 @@ int dquot_drop(struct inode *inode) return 0; } +/* Wrapper to remove references to quota structures from inode */ +void vfs_dq_drop(struct inode *inode) +{ + /* Here we can get arbitrary inode from clear_inode() so we have + * to be careful. OTOH we don't need locking as quota operations + * are allowed to change only at mount time */ + if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op + && inode->i_sb->dq_op->drop) { + int cnt; + /* Test before calling to rule out calls from proc and such + * where we are not allowed to block. Note that this is + * actually reliable test even without the lock - the caller + * must assure that nobody can come after the DQUOT_DROP and + * add quota pointers back anyway */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + if (inode->i_dquot[cnt] != NODQUOT) + break; + if (cnt < MAXQUOTAS) + inode->i_sb->dq_op->drop(inode); + } +} + /* * Following four functions update i_blocks+i_bytes fields and * quota information (together with appropriate checks) @@ -1248,6 +1316,7 @@ warn_put_all: int dquot_free_space(struct inode *inode, qsize_t number) { unsigned int cnt; + char warntype[MAXQUOTAS]; /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ @@ -1256,6 +1325,7 @@ out_sub: inode_sub_bytes(inode, number); return QUOTA_OK; } + down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); /* Now recheck reliably when holding dqptr_sem */ if (IS_NOQUOTA(inode)) { @@ -1266,6 +1336,7 @@ out_sub: for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (inode->i_dquot[cnt] == NODQUOT) continue; + warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number); dquot_decr_space(inode->i_dquot[cnt], number); } inode_sub_bytes(inode, number); @@ -1274,6 +1345,7 @@ out_sub: for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (inode->i_dquot[cnt]) mark_dquot_dirty(inode->i_dquot[cnt]); + flush_warnings(inode->i_dquot, warntype); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); return QUOTA_OK; } @@ -1284,11 +1356,13 @@ out_sub: int dquot_free_inode(const struct inode *inode, unsigned long number) { unsigned int cnt; + char warntype[MAXQUOTAS]; /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ if (IS_NOQUOTA(inode)) return QUOTA_OK; + down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); /* Now recheck reliably when holding dqptr_sem */ if (IS_NOQUOTA(inode)) { @@ -1299,6 +1373,7 @@ int dquot_free_inode(const struct inode *inode, unsigned long number) for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (inode->i_dquot[cnt] == NODQUOT) continue; + warntype[cnt] = info_idq_free(inode->i_dquot[cnt], number); dquot_decr_inodes(inode->i_dquot[cnt], number); } spin_unlock(&dq_data_lock); @@ -1306,6 +1381,7 @@ int dquot_free_inode(const struct inode *inode, unsigned long number) for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (inode->i_dquot[cnt]) mark_dquot_dirty(inode->i_dquot[cnt]); + flush_warnings(inode->i_dquot, warntype); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); return QUOTA_OK; } @@ -1323,7 +1399,8 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) struct dquot *transfer_to[MAXQUOTAS]; int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid, chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid; - char warntype[MAXQUOTAS]; + char warntype_to[MAXQUOTAS]; + char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS]; /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ @@ -1332,7 +1409,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) /* Clear the arrays */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { transfer_to[cnt] = transfer_from[cnt] = NODQUOT; - warntype[cnt] = QUOTA_NL_NOWARN; + warntype_to[cnt] = QUOTA_NL_NOWARN; } down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); /* Now recheck reliably when holding dqptr_sem */ @@ -1364,8 +1441,9 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) if (transfer_to[cnt] == NODQUOT) continue; transfer_from[cnt] = inode->i_dquot[cnt]; - if (check_idq(transfer_to[cnt], 1, warntype+cnt) == NO_QUOTA || - check_bdq(transfer_to[cnt], space, 0, warntype+cnt) == NO_QUOTA) + if (check_idq(transfer_to[cnt], 1, warntype_to + cnt) == + NO_QUOTA || check_bdq(transfer_to[cnt], space, 0, + warntype_to + cnt) == NO_QUOTA) goto warn_put_all; } @@ -1381,6 +1459,10 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) /* Due to IO error we might not have transfer_from[] structure */ if (transfer_from[cnt]) { + warntype_from_inodes[cnt] = + info_idq_free(transfer_from[cnt], 1); + warntype_from_space[cnt] = + info_bdq_free(transfer_from[cnt], space); dquot_decr_inodes(transfer_from[cnt], 1); dquot_decr_space(transfer_from[cnt], space); } @@ -1400,7 +1482,9 @@ warn_put_all: if (transfer_to[cnt]) mark_dquot_dirty(transfer_to[cnt]); } - flush_warnings(transfer_to, warntype); + flush_warnings(transfer_to, warntype_to); + flush_warnings(transfer_from, warntype_from_inodes); + flush_warnings(transfer_from, warntype_from_space); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (ret == QUOTA_OK && transfer_from[cnt] != NODQUOT) @@ -1412,6 +1496,18 @@ warn_put_all: return ret; } +/* Wrapper for transferring ownership of an inode */ +int vfs_dq_transfer(struct inode *inode, struct iattr *iattr) +{ + if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) { + vfs_dq_init(inode); + if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA) + return 1; + } + return 0; +} + + /* * Write info of quota file to disk */ @@ -1752,6 +1848,22 @@ out: return error; } +/* Wrapper to turn on quotas when remounting rw */ +int vfs_dq_quota_on_remount(struct super_block *sb) +{ + int cnt; + int ret = 0, err; + + if (!sb->s_qcop || !sb->s_qcop->quota_on) + return -ENOSYS; + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1); + if (err < 0 && !ret) + ret = err; + } + return ret; +} + /* Generic routine for getting common part of quota structure */ static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di) { @@ -2087,8 +2199,11 @@ EXPORT_SYMBOL(dquot_release); EXPORT_SYMBOL(dquot_mark_dquot_dirty); EXPORT_SYMBOL(dquot_initialize); EXPORT_SYMBOL(dquot_drop); +EXPORT_SYMBOL(vfs_dq_drop); EXPORT_SYMBOL(dquot_alloc_space); EXPORT_SYMBOL(dquot_alloc_inode); EXPORT_SYMBOL(dquot_free_space); EXPORT_SYMBOL(dquot_free_inode); EXPORT_SYMBOL(dquot_transfer); +EXPORT_SYMBOL(vfs_dq_transfer); +EXPORT_SYMBOL(vfs_dq_quota_on_remount); diff --git a/fs/exec.c b/fs/exec.c index 190ed1f92774..5e559013e303 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -25,19 +25,18 @@ #include <linux/slab.h> #include <linux/file.h> #include <linux/fdtable.h> -#include <linux/mman.h> +#include <linux/mm.h> #include <linux/stat.h> #include <linux/fcntl.h> #include <linux/smp_lock.h> +#include <linux/swap.h> #include <linux/string.h> #include <linux/init.h> -#include <linux/pagemap.h> #include <linux/highmem.h> #include <linux/spinlock.h> #include <linux/key.h> #include <linux/personality.h> #include <linux/binfmts.h> -#include <linux/swap.h> #include <linux/utsname.h> #include <linux/pid_namespace.h> #include <linux/module.h> @@ -47,7 +46,6 @@ #include <linux/mount.h> #include <linux/security.h> #include <linux/syscalls.h> -#include <linux/rmap.h> #include <linux/tsacct_kern.h> #include <linux/cn_proc.h> #include <linux/audit.h> @@ -724,12 +722,10 @@ static int exec_mmap(struct mm_struct *mm) * Make sure that if there is a core dump in progress * for the old mm, we get out and die instead of going * through with the exec. We must hold mmap_sem around - * checking core_waiters and changing tsk->mm. The - * core-inducing thread will increment core_waiters for - * each thread whose ->mm == old_mm. + * checking core_state and changing tsk->mm. */ down_read(&old_mm->mmap_sem); - if (unlikely(old_mm->core_waiters)) { + if (unlikely(old_mm->core_state)) { up_read(&old_mm->mmap_sem); return -EINTR; } @@ -1328,6 +1324,7 @@ int do_execve(char * filename, if (retval < 0) goto out; + current->flags &= ~PF_KTHREAD; retval = search_binary_handler(bprm,regs); if (retval >= 0) { /* execve success */ @@ -1382,17 +1379,14 @@ EXPORT_SYMBOL(set_binfmt); * name into corename, which must have space for at least * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. */ -static int format_corename(char *corename, const char *pattern, long signr) +static int format_corename(char *corename, int nr_threads, long signr) { - const char *pat_ptr = pattern; + const char *pat_ptr = core_pattern; + int ispipe = (*pat_ptr == '|'); char *out_ptr = corename; char *const out_end = corename + CORENAME_MAX_SIZE; int rc; int pid_in_pattern = 0; - int ispipe = 0; - - if (*pattern == '|') - ispipe = 1; /* Repeat as long as we have more pattern to process and more output space */ @@ -1493,7 +1487,7 @@ static int format_corename(char *corename, const char *pattern, long signr) * and core_uses_pid is set, then .%pid will be appended to * the filename. Do not do this for piped commands. */ if (!ispipe && !pid_in_pattern - && (core_uses_pid || atomic_read(¤t->mm->mm_users) != 1)) { + && (core_uses_pid || nr_threads)) { rc = snprintf(out_ptr, out_end - out_ptr, ".%d", task_tgid_vnr(current)); if (rc > out_end - out_ptr) @@ -1505,9 +1499,10 @@ out: return ispipe; } -static void zap_process(struct task_struct *start) +static int zap_process(struct task_struct *start) { struct task_struct *t; + int nr = 0; start->signal->flags = SIGNAL_GROUP_EXIT; start->signal->group_stop_count = 0; @@ -1515,72 +1510,99 @@ static void zap_process(struct task_struct *start) t = start; do { if (t != current && t->mm) { - t->mm->core_waiters++; sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); + nr++; } - } while ((t = next_thread(t)) != start); + } while_each_thread(start, t); + + return nr; } static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, - int exit_code) + struct core_state *core_state, int exit_code) { struct task_struct *g, *p; unsigned long flags; - int err = -EAGAIN; + int nr = -EAGAIN; spin_lock_irq(&tsk->sighand->siglock); if (!signal_group_exit(tsk->signal)) { + mm->core_state = core_state; tsk->signal->group_exit_code = exit_code; - zap_process(tsk); - err = 0; + nr = zap_process(tsk); } spin_unlock_irq(&tsk->sighand->siglock); - if (err) - return err; + if (unlikely(nr < 0)) + return nr; - if (atomic_read(&mm->mm_users) == mm->core_waiters + 1) + if (atomic_read(&mm->mm_users) == nr + 1) goto done; - + /* + * We should find and kill all tasks which use this mm, and we should + * count them correctly into ->nr_threads. We don't take tasklist + * lock, but this is safe wrt: + * + * fork: + * None of sub-threads can fork after zap_process(leader). All + * processes which were created before this point should be + * visible to zap_threads() because copy_process() adds the new + * process to the tail of init_task.tasks list, and lock/unlock + * of ->siglock provides a memory barrier. + * + * do_exit: + * The caller holds mm->mmap_sem. This means that the task which + * uses this mm can't pass exit_mm(), so it can't exit or clear + * its ->mm. + * + * de_thread: + * It does list_replace_rcu(&leader->tasks, ¤t->tasks), + * we must see either old or new leader, this does not matter. + * However, it can change p->sighand, so lock_task_sighand(p) + * must be used. Since p->mm != NULL and we hold ->mmap_sem + * it can't fail. + * + * Note also that "g" can be the old leader with ->mm == NULL + * and already unhashed and thus removed from ->thread_group. + * This is OK, __unhash_process()->list_del_rcu() does not + * clear the ->next pointer, we will find the new leader via + * next_thread(). + */ rcu_read_lock(); for_each_process(g) { if (g == tsk->group_leader) continue; - + if (g->flags & PF_KTHREAD) + continue; p = g; do { if (p->mm) { - if (p->mm == mm) { - /* - * p->sighand can't disappear, but - * may be changed by de_thread() - */ + if (unlikely(p->mm == mm)) { lock_task_sighand(p, &flags); - zap_process(p); + nr += zap_process(p); unlock_task_sighand(p, &flags); } break; } - } while ((p = next_thread(p)) != g); + } while_each_thread(g, p); } rcu_read_unlock(); done: - return mm->core_waiters; + atomic_set(&core_state->nr_threads, nr); + return nr; } -static int coredump_wait(int exit_code) +static int coredump_wait(int exit_code, struct core_state *core_state) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; - struct completion startup_done; struct completion *vfork_done; int core_waiters; - init_completion(&mm->core_done); - init_completion(&startup_done); - mm->core_startup_done = &startup_done; - - core_waiters = zap_threads(tsk, mm, exit_code); + init_completion(&core_state->startup); + core_state->dumper.task = tsk; + core_state->dumper.next = NULL; + core_waiters = zap_threads(tsk, mm, core_state, exit_code); up_write(&mm->mmap_sem); if (unlikely(core_waiters < 0)) @@ -1597,12 +1619,32 @@ static int coredump_wait(int exit_code) } if (core_waiters) - wait_for_completion(&startup_done); + wait_for_completion(&core_state->startup); fail: - BUG_ON(mm->core_waiters); return core_waiters; } +static void coredump_finish(struct mm_struct *mm) +{ + struct core_thread *curr, *next; + struct task_struct *task; + + next = mm->core_state->dumper.next; + while ((curr = next) != NULL) { + next = curr->next; + task = curr->task; + /* + * see exit_mm(), curr->task must not see + * ->task == NULL before we read ->next. + */ + smp_mb(); + curr->task = NULL; + wake_up_process(task); + } + + mm->core_state = NULL; +} + /* * set_dumpable converts traditional three-value dumpable to two flags and * stores them into mm->flags. It modifies lower two bits of mm->flags, but @@ -1654,6 +1696,7 @@ int get_dumpable(struct mm_struct *mm) int do_coredump(long signr, int exit_code, struct pt_regs * regs) { + struct core_state core_state; char corename[CORENAME_MAX_SIZE + 1]; struct mm_struct *mm = current->mm; struct linux_binfmt * binfmt; @@ -1677,7 +1720,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) /* * If another thread got here first, or we are not dumpable, bail out. */ - if (mm->core_waiters || !get_dumpable(mm)) { + if (mm->core_state || !get_dumpable(mm)) { up_write(&mm->mmap_sem); goto fail; } @@ -1692,7 +1735,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) current->fsuid = 0; /* Dump root private */ } - retval = coredump_wait(exit_code); + retval = coredump_wait(exit_code, &core_state); if (retval < 0) goto fail; @@ -1707,7 +1750,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) * uses lock_kernel() */ lock_kernel(); - ispipe = format_corename(corename, core_pattern, signr); + ispipe = format_corename(corename, retval, signr); unlock_kernel(); /* * Don't bother to check the RLIMIT_CORE value if core_pattern points @@ -1786,7 +1829,7 @@ fail_unlock: argv_free(helper_argv); current->fsuid = fsuid; - complete_all(&mm->core_done); + coredump_finish(mm); fail: return retval; } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index ef50cbc792db..31308a3b0b8b 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -31,6 +31,7 @@ #include <linux/seq_file.h> #include <linux/mount.h> #include <linux/log2.h> +#include <linux/quotaops.h> #include <asm/uaccess.h> #include "ext2.h" #include "xattr.h" diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c index eaa23d2d5213..70c0dbdcdcb7 100644 --- a/fs/ext2/xattr_security.c +++ b/fs/ext2/xattr_security.c @@ -14,7 +14,7 @@ static size_t ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size, const char *name, size_t name_len) { - const int prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; + const int prefix_len = XATTR_SECURITY_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; if (list && total_len <= list_size) { diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c index 83ee149f353d..e8219f8eae9f 100644 --- a/fs/ext2/xattr_trusted.c +++ b/fs/ext2/xattr_trusted.c @@ -12,13 +12,11 @@ #include <linux/ext2_fs.h> #include "xattr.h" -#define XATTR_TRUSTED_PREFIX "trusted." - static size_t ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, const char *name, size_t name_len) { - const int prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1; + const int prefix_len = XATTR_TRUSTED_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; if (!capable(CAP_SYS_ADMIN)) diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c index f383e7c3a7b5..92495d28c62f 100644 --- a/fs/ext2/xattr_user.c +++ b/fs/ext2/xattr_user.c @@ -11,13 +11,11 @@ #include "ext2.h" #include "xattr.h" -#define XATTR_USER_PREFIX "user." - static size_t ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size, const char *name, size_t name_len) { - const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1; + const size_t prefix_len = XATTR_USER_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; if (!test_opt(inode->i_sb, XATTR_USER)) diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 8ca3bfd72427..2eea96ec78ed 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -272,7 +272,7 @@ static void free_rb_tree_fname(struct rb_root *root) while (n) { /* Do the node's children first */ - if ((n)->rb_left) { + if (n->rb_left) { n = n->rb_left; continue; } @@ -301,24 +301,18 @@ static void free_rb_tree_fname(struct rb_root *root) parent->rb_right = NULL; n = parent; } - root->rb_node = NULL; } -static struct dir_private_info *create_dir_info(loff_t pos) +static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos) { struct dir_private_info *p; - p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL); + p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); if (!p) return NULL; - p->root.rb_node = NULL; - p->curr_node = NULL; - p->extra_fname = NULL; - p->last_pos = 0; p->curr_hash = pos2maj_hash(pos); p->curr_minor_hash = pos2min_hash(pos); - p->next_hash = 0; return p; } @@ -433,7 +427,7 @@ static int ext3_dx_readdir(struct file * filp, int ret; if (!info) { - info = create_dir_info(filp->f_pos); + info = ext3_htree_create_dir_info(filp->f_pos); if (!info) return -ENOMEM; filp->private_data = info; diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 77126821b2e9..47b678d73e7a 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -669,6 +669,14 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino) if (IS_ERR(inode)) goto iget_failed; + /* + * If the orphans has i_nlinks > 0 then it should be able to be + * truncated, otherwise it won't be removed from the orphan list + * during processing and an infinite loop will result. + */ + if (inode->i_nlink && !ext3_can_truncate(inode)) + goto bad_orphan; + if (NEXT_ORPHAN(inode) > max_ino) goto bad_orphan; brelse(bitmap_bh); @@ -690,6 +698,7 @@ bad_orphan: printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", NEXT_ORPHAN(inode)); printk(KERN_NOTICE "max_ino=%lu\n", max_ino); + printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink); /* Avoid freeing blocks if we got a bad deleted inode */ if (inode->i_nlink == 0) inode->i_blocks = 0; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 6ae4ecf3ce40..3bf07d70b914 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2127,7 +2127,21 @@ static void ext3_free_data(handle_t *handle, struct inode *inode, if (this_bh) { BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, this_bh); + + /* + * The buffer head should have an attached journal head at this + * point. However, if the data is corrupted and an indirect + * block pointed to itself, it would have been detached when + * the block was cleared. Check for this instead of OOPSing. + */ + if (bh2jh(this_bh)) + ext3_journal_dirty_metadata(handle, this_bh); + else + ext3_error(inode->i_sb, "ext3_free_data", + "circular indirect block detected, " + "inode=%lu, block=%llu", + inode->i_ino, + (unsigned long long)this_bh->b_blocknr); } } @@ -2253,6 +2267,19 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode, } } +int ext3_can_truncate(struct inode *inode) +{ + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return 0; + if (S_ISREG(inode->i_mode)) + return 1; + if (S_ISDIR(inode->i_mode)) + return 1; + if (S_ISLNK(inode->i_mode)) + return !ext3_inode_is_fast_symlink(inode); + return 0; +} + /* * ext3_truncate() * @@ -2297,12 +2324,7 @@ void ext3_truncate(struct inode *inode) unsigned blocksize = inode->i_sb->s_blocksize; struct page *page; - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; - if (ext3_inode_is_fast_symlink(inode)) - return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + if (!ext3_can_truncate(inode)) return; /* @@ -2513,6 +2535,16 @@ static int __ext3_get_inode_loc(struct inode *inode, } if (!buffer_uptodate(bh)) { lock_buffer(bh); + + /* + * If the buffer has the write error flag, we have failed + * to write out another inode in the same block. In this + * case, we don't have to read the block because we may + * read the old inode data successfully. + */ + if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) + set_buffer_uptodate(bh); + if (buffer_uptodate(bh)) { /* someone brought it uptodate while we waited */ unlock_buffer(bh); diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 0b8cf80154f1..de13e919cd81 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -240,13 +240,13 @@ static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) { unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) - EXT3_DIR_REC_LEN(2) - infosize; - return 0? 20: entry_space / sizeof(struct dx_entry); + return entry_space / sizeof(struct dx_entry); } static inline unsigned dx_node_limit (struct inode *dir) { unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0); - return 0? 22: entry_space / sizeof(struct dx_entry); + return entry_space / sizeof(struct dx_entry); } /* @@ -991,19 +991,21 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, de = (struct ext3_dir_entry_2 *) bh->b_data; top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize - EXT3_DIR_REC_LEN(0)); - for (; de < top; de = ext3_next_entry(de)) - if (ext3_match (namelen, name, de)) { - if (!ext3_check_dir_entry("ext3_find_entry", - dir, de, bh, - (block<<EXT3_BLOCK_SIZE_BITS(sb)) - +((char *)de - bh->b_data))) { - brelse (bh); + for (; de < top; de = ext3_next_entry(de)) { + int off = (block << EXT3_BLOCK_SIZE_BITS(sb)) + + ((char *) de - bh->b_data); + + if (!ext3_check_dir_entry(__func__, dir, de, bh, off)) { + brelse(bh); *err = ERR_BAD_DX_DIR; goto errout; } - *res_dir = de; - dx_release (frames); - return bh; + + if (ext3_match(namelen, name, de)) { + *res_dir = de; + dx_release(frames); + return bh; + } } brelse (bh); /* Check to see if we should continue to search */ diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 2845425077e8..615788c6843a 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -842,7 +842,7 @@ static int parse_options (char *options, struct super_block *sb, int data_opt = 0; int option; #ifdef CONFIG_QUOTA - int qtype; + int qtype, qfmt; char *qname; #endif @@ -1018,9 +1018,11 @@ static int parse_options (char *options, struct super_block *sb, case Opt_grpjquota: qtype = GRPQUOTA; set_qf_name: - if (sb_any_quota_enabled(sb)) { + if ((sb_any_quota_enabled(sb) || + sb_any_quota_suspended(sb)) && + !sbi->s_qf_names[qtype]) { printk(KERN_ERR - "EXT3-fs: Cannot change journalled " + "EXT3-fs: Cannot change journaled " "quota options when quota turned on.\n"); return 0; } @@ -1056,9 +1058,11 @@ set_qf_name: case Opt_offgrpjquota: qtype = GRPQUOTA; clear_qf_name: - if (sb_any_quota_enabled(sb)) { + if ((sb_any_quota_enabled(sb) || + sb_any_quota_suspended(sb)) && + sbi->s_qf_names[qtype]) { printk(KERN_ERR "EXT3-fs: Cannot change " - "journalled quota options when " + "journaled quota options when " "quota turned on.\n"); return 0; } @@ -1069,10 +1073,20 @@ clear_qf_name: sbi->s_qf_names[qtype] = NULL; break; case Opt_jqfmt_vfsold: - sbi->s_jquota_fmt = QFMT_VFS_OLD; - break; + qfmt = QFMT_VFS_OLD; + goto set_qf_format; case Opt_jqfmt_vfsv0: - sbi->s_jquota_fmt = QFMT_VFS_V0; + qfmt = QFMT_VFS_V0; +set_qf_format: + if ((sb_any_quota_enabled(sb) || + sb_any_quota_suspended(sb)) && + sbi->s_jquota_fmt != qfmt) { + printk(KERN_ERR "EXT3-fs: Cannot change " + "journaled quota options when " + "quota turned on.\n"); + return 0; + } + sbi->s_jquota_fmt = qfmt; break; case Opt_quota: case Opt_usrquota: @@ -1084,7 +1098,8 @@ clear_qf_name: set_opt(sbi->s_mount_opt, GRPQUOTA); break; case Opt_noquota: - if (sb_any_quota_enabled(sb)) { + if (sb_any_quota_enabled(sb) || + sb_any_quota_suspended(sb)) { printk(KERN_ERR "EXT3-fs: Cannot change quota " "options when quota turned on.\n"); return 0; @@ -1169,14 +1184,14 @@ clear_qf_name: } if (!sbi->s_jquota_fmt) { - printk(KERN_ERR "EXT3-fs: journalled quota format " + printk(KERN_ERR "EXT3-fs: journaled quota format " "not specified.\n"); return 0; } } else { if (sbi->s_jquota_fmt) { - printk(KERN_ERR "EXT3-fs: journalled quota format " - "specified with no journalling " + printk(KERN_ERR "EXT3-fs: journaled quota format " + "specified with no journaling " "enabled.\n"); return 0; } @@ -1370,7 +1385,7 @@ static void ext3_orphan_cleanup (struct super_block * sb, int ret = ext3_quota_on_mount(sb, i); if (ret < 0) printk(KERN_ERR - "EXT3-fs: Cannot turn on journalled " + "EXT3-fs: Cannot turn on journaled " "quota: error %d\n", ret); } } @@ -2712,7 +2727,7 @@ static int ext3_release_dquot(struct dquot *dquot) static int ext3_mark_dquot_dirty(struct dquot *dquot) { - /* Are we journalling quotas? */ + /* Are we journaling quotas? */ if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { dquot_mark_dquot_dirty(dquot); @@ -2759,23 +2774,42 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id, if (!test_opt(sb, QUOTA)) return -EINVAL; - /* Not journalling quota or remount? */ - if ((!EXT3_SB(sb)->s_qf_names[USRQUOTA] && - !EXT3_SB(sb)->s_qf_names[GRPQUOTA]) || remount) + /* When remounting, no checks are needed and in fact, path is NULL */ + if (remount) return vfs_quota_on(sb, type, format_id, path, remount); + err = path_lookup(path, LOOKUP_FOLLOW, &nd); if (err) return err; + /* Quotafile not on the same filesystem? */ if (nd.path.mnt->mnt_sb != sb) { path_put(&nd.path); return -EXDEV; } - /* Quotafile not in fs root? */ - if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) - printk(KERN_WARNING - "EXT3-fs: Quota file not on filesystem root. " - "Journalled quota will not work.\n"); + /* Journaling quota? */ + if (EXT3_SB(sb)->s_qf_names[type]) { + /* Quotafile not of fs root? */ + if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) + printk(KERN_WARNING + "EXT3-fs: Quota file not on filesystem root. " + "Journaled quota will not work.\n"); + } + + /* + * When we journal data on quota file, we have to flush journal to see + * all updates to the file when we bypass pagecache... + */ + if (ext3_should_journal_data(nd.path.dentry->d_inode)) { + /* + * We don't need to lock updates but journal_flush() could + * otherwise be livelocked... + */ + journal_lock_updates(EXT3_SB(sb)->s_journal); + journal_flush(EXT3_SB(sb)->s_journal); + journal_unlock_updates(EXT3_SB(sb)->s_journal); + } + path_put(&nd.path); return vfs_quota_on(sb, type, format_id, path, remount); } diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c index 821efaf2b94e..37b81097bdf2 100644 --- a/fs/ext3/xattr_security.c +++ b/fs/ext3/xattr_security.c @@ -15,7 +15,7 @@ static size_t ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size, const char *name, size_t name_len) { - const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; + const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c index 0327497a55ce..c7c41a410c4b 100644 --- a/fs/ext3/xattr_trusted.c +++ b/fs/ext3/xattr_trusted.c @@ -13,13 +13,11 @@ #include <linux/ext3_fs.h> #include "xattr.h" -#define XATTR_TRUSTED_PREFIX "trusted." - static size_t ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, const char *name, size_t name_len) { - const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1; + const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; if (!capable(CAP_SYS_ADMIN)) diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c index 1abd8f92c440..430fe63b31b3 100644 --- a/fs/ext3/xattr_user.c +++ b/fs/ext3/xattr_user.c @@ -12,13 +12,11 @@ #include <linux/ext3_fs.h> #include "xattr.h" -#define XATTR_USER_PREFIX "user." - static size_t ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size, const char *name, size_t name_len) { - const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1; + const size_t prefix_len = XATTR_USER_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; if (!test_opt(inode->i_sb, XATTR_USER)) diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 34541d06e626..cd4a0162e10d 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -17,7 +17,6 @@ #include <linux/slab.h> #include <linux/time.h> #include <linux/msdos_fs.h> -#include <linux/dirent.h> #include <linux/smp_lock.h> #include <linux/buffer_head.h> #include <linux/compat.h> @@ -124,10 +123,11 @@ static inline int fat_get_entry(struct inode *dir, loff_t *pos, * but ignore that right now. * Ahem... Stack smashing in ring 0 isn't fun. Fixed. */ -static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int len, +static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len, int uni_xlate, struct nls_table *nls) { - wchar_t *ip, ec; + const wchar_t *ip; + wchar_t ec; unsigned char *op, nc; int charlen; int k; @@ -167,6 +167,16 @@ static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int len, return (op - ascii); } +static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni, + unsigned char *buf, int size) +{ + if (sbi->options.utf8) + return utf8_wcstombs(buf, uni, size); + else + return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate, + sbi->nls_io); +} + static inline int fat_short2uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *uni) { @@ -227,6 +237,19 @@ fat_shortname2uni(struct nls_table *nls, unsigned char *buf, int buf_size, return len; } +static inline int fat_name_match(struct msdos_sb_info *sbi, + const unsigned char *a, int a_len, + const unsigned char *b, int b_len) +{ + if (a_len != b_len) + return 0; + + if (sbi->options.name_check != 's') + return !nls_strnicmp(sbi->nls_io, a, b, a_len); + else + return !memcmp(a, b, a_len); +} + enum { PARSE_INVALID = 1, PARSE_NOT_LONGNAME, PARSE_EOF, }; /** @@ -302,6 +325,19 @@ parse_long: } /* + * Maximum buffer size of short name. + * [(MSDOS_NAME + '.') * max one char + nul] + * For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul] + */ +#define FAT_MAX_SHORT_SIZE ((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1) +/* + * Maximum buffer size of unicode chars from slots. + * [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)] + */ +#define FAT_MAX_UNI_CHARS ((MSDOS_SLOTS - 1) * 13 + 1) +#define FAT_MAX_UNI_SIZE (FAT_MAX_UNI_CHARS * sizeof(wchar_t)) + +/* * Return values: negative -> error, 0 -> not found, positive -> found, * value is the total amount of slots, including the shortname entry. */ @@ -312,29 +348,20 @@ int fat_search_long(struct inode *inode, const unsigned char *name, struct msdos_sb_info *sbi = MSDOS_SB(sb); struct buffer_head *bh = NULL; struct msdos_dir_entry *de; - struct nls_table *nls_io = sbi->nls_io; struct nls_table *nls_disk = sbi->nls_disk; - wchar_t bufuname[14]; unsigned char nr_slots; - int xlate_len; + wchar_t bufuname[14]; wchar_t *unicode = NULL; unsigned char work[MSDOS_NAME]; - unsigned char *bufname = NULL; - int uni_xlate = sbi->options.unicode_xlate; - int utf8 = sbi->options.utf8; - int anycase = (sbi->options.name_check != 's'); + unsigned char bufname[FAT_MAX_SHORT_SIZE]; unsigned short opt_shortname = sbi->options.shortname; loff_t cpos = 0; - int chl, i, j, last_u, err; - - bufname = __getname(); - if (!bufname) - return -ENOMEM; + int chl, i, j, last_u, err, len; err = -ENOENT; - while(1) { + while (1) { if (fat_get_entry(inode, &cpos, &bh, &de) == -1) - goto EODir; + goto end_of_dir; parse_record: nr_slots = 0; if (de->name[0] == DELETED_FLAG) @@ -353,7 +380,7 @@ parse_record: else if (status == PARSE_NOT_LONGNAME) goto parse_record; else if (status == PARSE_EOF) - goto EODir; + goto end_of_dir; } memcpy(work, de->name, sizeof(de->name)); @@ -394,30 +421,24 @@ parse_record: if (!last_u) continue; + /* Compare shortname */ bufuname[last_u] = 0x0000; - xlate_len = utf8 - ?utf8_wcstombs(bufname, bufuname, PATH_MAX) - :uni16_to_x8(bufname, bufuname, PATH_MAX, uni_xlate, nls_io); - if (xlate_len == name_len) - if ((!anycase && !memcmp(name, bufname, xlate_len)) || - (anycase && !nls_strnicmp(nls_io, name, bufname, - xlate_len))) - goto Found; + len = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname)); + if (fat_name_match(sbi, name, name_len, bufname, len)) + goto found; if (nr_slots) { - xlate_len = utf8 - ?utf8_wcstombs(bufname, unicode, PATH_MAX) - :uni16_to_x8(bufname, unicode, PATH_MAX, uni_xlate, nls_io); - if (xlate_len != name_len) - continue; - if ((!anycase && !memcmp(name, bufname, xlate_len)) || - (anycase && !nls_strnicmp(nls_io, name, bufname, - xlate_len))) - goto Found; + void *longname = unicode + FAT_MAX_UNI_CHARS; + int size = PATH_MAX - FAT_MAX_UNI_SIZE; + + /* Compare longname */ + len = fat_uni_to_x8(sbi, unicode, longname, size); + if (fat_name_match(sbi, name, name_len, longname, len)) + goto found; } } -Found: +found: nr_slots++; /* include the de */ sinfo->slot_off = cpos - nr_slots * sizeof(*de); sinfo->nr_slots = nr_slots; @@ -425,9 +446,7 @@ Found: sinfo->bh = bh; sinfo->i_pos = fat_make_i_pos(sb, sinfo->bh, sinfo->de); err = 0; -EODir: - if (bufname) - __putname(bufname); +end_of_dir: if (unicode) __putname(unicode); @@ -453,23 +472,20 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent, struct msdos_sb_info *sbi = MSDOS_SB(sb); struct buffer_head *bh; struct msdos_dir_entry *de; - struct nls_table *nls_io = sbi->nls_io; struct nls_table *nls_disk = sbi->nls_disk; - unsigned char long_slots; - const char *fill_name; - int fill_len; + unsigned char nr_slots; wchar_t bufuname[14]; wchar_t *unicode = NULL; - unsigned char c, work[MSDOS_NAME], bufname[56], *ptname = bufname; - unsigned long lpos, dummy, *furrfu = &lpos; - int uni_xlate = sbi->options.unicode_xlate; + unsigned char c, work[MSDOS_NAME]; + unsigned char bufname[FAT_MAX_SHORT_SIZE], *ptname = bufname; + unsigned short opt_shortname = sbi->options.shortname; int isvfat = sbi->options.isvfat; - int utf8 = sbi->options.utf8; int nocase = sbi->options.nocase; - unsigned short opt_shortname = sbi->options.shortname; + const char *fill_name = NULL; unsigned long inum; - int chi, chl, i, i2, j, last, last_u, dotoffset = 0; + unsigned long lpos, dummy, *furrfu = &lpos; loff_t cpos; + int chi, chl, i, i2, j, last, last_u, dotoffset = 0, fill_len = 0; int ret = 0; lock_super(sb); @@ -489,43 +505,58 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent, cpos = 0; } } - if (cpos & (sizeof(struct msdos_dir_entry)-1)) { + if (cpos & (sizeof(struct msdos_dir_entry) - 1)) { ret = -ENOENT; goto out; } bh = NULL; -GetNew: +get_new: if (fat_get_entry(inode, &cpos, &bh, &de) == -1) - goto EODir; + goto end_of_dir; parse_record: - long_slots = 0; - /* Check for long filename entry */ - if (isvfat) { + nr_slots = 0; + /* + * Check for long filename entry, but if short_only, we don't + * need to parse long filename. + */ + if (isvfat && !short_only) { if (de->name[0] == DELETED_FLAG) - goto RecEnd; + goto record_end; if (de->attr != ATTR_EXT && (de->attr & ATTR_VOLUME)) - goto RecEnd; + goto record_end; if (de->attr != ATTR_EXT && IS_FREE(de->name)) - goto RecEnd; + goto record_end; } else { if ((de->attr & ATTR_VOLUME) || IS_FREE(de->name)) - goto RecEnd; + goto record_end; } if (isvfat && de->attr == ATTR_EXT) { int status = fat_parse_long(inode, &cpos, &bh, &de, - &unicode, &long_slots); + &unicode, &nr_slots); if (status < 0) { filp->f_pos = cpos; ret = status; goto out; } else if (status == PARSE_INVALID) - goto RecEnd; + goto record_end; else if (status == PARSE_NOT_LONGNAME) goto parse_record; else if (status == PARSE_EOF) - goto EODir; + goto end_of_dir; + + if (nr_slots) { + void *longname = unicode + FAT_MAX_UNI_CHARS; + int size = PATH_MAX - FAT_MAX_UNI_SIZE; + int len = fat_uni_to_x8(sbi, unicode, longname, size); + + fill_name = longname; + fill_len = len; + /* !both && !short_only, so we don't need shortname. */ + if (!both) + goto start_filldir; + } } if (sbi->options.dotsOK) { @@ -587,12 +618,32 @@ parse_record: } } if (!last) - goto RecEnd; + goto record_end; i = last + dotoffset; j = last_u; - lpos = cpos - (long_slots+1)*sizeof(struct msdos_dir_entry); + if (isvfat) { + bufuname[j] = 0x0000; + i = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname)); + } + if (nr_slots) { + /* hack for fat_ioctl_filldir() */ + struct fat_ioctl_filldir_callback *p = dirent; + + p->longname = fill_name; + p->long_len = fill_len; + p->shortname = bufname; + p->short_len = i; + fill_name = NULL; + fill_len = 0; + } else { + fill_name = bufname; + fill_len = i; + } + +start_filldir: + lpos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry); if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) inum = inode->i_ino; else if (!memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) { @@ -607,49 +658,17 @@ parse_record: inum = iunique(sb, MSDOS_ROOT_INO); } - if (isvfat) { - bufuname[j] = 0x0000; - i = utf8 ? utf8_wcstombs(bufname, bufuname, sizeof(bufname)) - : uni16_to_x8(bufname, bufuname, sizeof(bufname), uni_xlate, nls_io); - } - - fill_name = bufname; - fill_len = i; - if (!short_only && long_slots) { - /* convert the unicode long name. 261 is maximum size - * of unicode buffer. (13 * slots + nul) */ - void *longname = unicode + 261; - int buf_size = PATH_MAX - (261 * sizeof(unicode[0])); - int long_len = utf8 - ? utf8_wcstombs(longname, unicode, buf_size) - : uni16_to_x8(longname, unicode, buf_size, uni_xlate, nls_io); - - if (!both) { - fill_name = longname; - fill_len = long_len; - } else { - /* hack for fat_ioctl_filldir() */ - struct fat_ioctl_filldir_callback *p = dirent; - - p->longname = longname; - p->long_len = long_len; - p->shortname = bufname; - p->short_len = i; - fill_name = NULL; - fill_len = 0; - } - } if (filldir(dirent, fill_name, fill_len, *furrfu, inum, (de->attr & ATTR_DIR) ? DT_DIR : DT_REG) < 0) - goto FillFailed; + goto fill_failed; -RecEnd: +record_end: furrfu = &lpos; filp->f_pos = cpos; - goto GetNew; -EODir: + goto get_new; +end_of_dir: filp->f_pos = cpos; -FillFailed: +fill_failed: brelse(bh); if (unicode) __putname(unicode); @@ -715,7 +734,7 @@ efault: \ return -EFAULT; \ } -FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, dirent) +FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, __fat_dirent) static int fat_ioctl_readdir(struct inode *inode, struct file *filp, void __user *dirent, filldir_t filldir, @@ -741,7 +760,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *filp, static int fat_dir_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { - struct dirent __user *d1 = (struct dirent __user *)arg; + struct __fat_dirent __user *d1 = (struct __fat_dirent __user *)arg; int short_only, both; switch (cmd) { @@ -757,7 +776,7 @@ static int fat_dir_ioctl(struct inode *inode, struct file *filp, return fat_generic_ioctl(inode, filp, cmd, arg); } - if (!access_ok(VERIFY_WRITE, d1, sizeof(struct dirent[2]))) + if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2]))) return -EFAULT; /* * Yes, we don't need this put_user() absolutely. However old @@ -1082,7 +1101,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts) goto error_free; } - fat_date_unix2dos(ts->tv_sec, &time, &date); + fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); de = (struct msdos_dir_entry *)bhs[0]->b_data; /* filling the new directory slots ("." and ".." entries) */ diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 46a4508ffd2e..23676f9d79ce 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -382,17 +382,20 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) & ~((loff_t)sbi->cluster_size - 1)) >> 9; inode->i_mtime.tv_sec = - date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date)); + date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date), + sbi->options.tz_utc); inode->i_mtime.tv_nsec = 0; if (sbi->options.isvfat) { int secs = de->ctime_cs / 100; int csecs = de->ctime_cs % 100; inode->i_ctime.tv_sec = date_dos2unix(le16_to_cpu(de->ctime), - le16_to_cpu(de->cdate)) + secs; + le16_to_cpu(de->cdate), + sbi->options.tz_utc) + secs; inode->i_ctime.tv_nsec = csecs * 10000000; inode->i_atime.tv_sec = - date_dos2unix(0, le16_to_cpu(de->adate)); + date_dos2unix(0, le16_to_cpu(de->adate), + sbi->options.tz_utc); inode->i_atime.tv_nsec = 0; } else inode->i_ctime = inode->i_atime = inode->i_mtime; @@ -591,11 +594,14 @@ retry: raw_entry->attr = fat_attr(inode); raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart); raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16); - fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, &raw_entry->date); + fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, + &raw_entry->date, sbi->options.tz_utc); if (sbi->options.isvfat) { __le16 atime; - fat_date_unix2dos(inode->i_ctime.tv_sec,&raw_entry->ctime,&raw_entry->cdate); - fat_date_unix2dos(inode->i_atime.tv_sec,&atime,&raw_entry->adate); + fat_date_unix2dos(inode->i_ctime.tv_sec, &raw_entry->ctime, + &raw_entry->cdate, sbi->options.tz_utc); + fat_date_unix2dos(inode->i_atime.tv_sec, &atime, + &raw_entry->adate, sbi->options.tz_utc); raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 + inode->i_ctime.tv_nsec / 10000000; } @@ -836,6 +842,8 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt) } if (sbi->options.flush) seq_puts(m, ",flush"); + if (opts->tz_utc) + seq_puts(m, ",tz=UTC"); return 0; } @@ -848,7 +856,7 @@ enum { Opt_charset, Opt_shortname_lower, Opt_shortname_win95, Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, - Opt_obsolate, Opt_flush, Opt_err, + Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err, }; static match_table_t fat_tokens = { @@ -883,6 +891,7 @@ static match_table_t fat_tokens = { {Opt_obsolate, "cvf_options=%100s"}, {Opt_obsolate, "posix"}, {Opt_flush, "flush"}, + {Opt_tz_utc, "tz=UTC"}, {Opt_err, NULL}, }; static match_table_t msdos_tokens = { @@ -947,10 +956,11 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, opts->utf8 = opts->unicode_xlate = 0; opts->numtail = 1; opts->usefree = opts->nocase = 0; + opts->tz_utc = 0; *debug = 0; if (!options) - return 0; + goto out; while ((p = strsep(&options, ",")) != NULL) { int token; @@ -1036,6 +1046,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, case Opt_flush: opts->flush = 1; break; + case Opt_tz_utc: + opts->tz_utc = 1; + break; /* msdos specific */ case Opt_dots: @@ -1104,10 +1117,13 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, return -EINVAL; } } + +out: /* UTF-8 doesn't provide FAT semantics */ if (!strcmp(opts->iocharset, "utf8")) { printk(KERN_ERR "FAT: utf8 is not a recommended IO charset" - " for FAT filesystems, filesystem will be case sensitive!\n"); + " for FAT filesystems, filesystem will be " + "case sensitive!\n"); } /* If user doesn't specify allow_utime, it's initialized from dmask. */ diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 61f23511eacf..79fb98ad36d4 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -142,7 +142,7 @@ static int day_n[] = { }; /* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */ -int date_dos2unix(unsigned short time, unsigned short date) +int date_dos2unix(unsigned short time, unsigned short date, int tz_utc) { int month, year, secs; @@ -156,16 +156,18 @@ int date_dos2unix(unsigned short time, unsigned short date) ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 && month < 2 ? 1 : 0)+3653); /* days since 1.1.70 plus 80's leap day */ - secs += sys_tz.tz_minuteswest*60; + if (!tz_utc) + secs += sys_tz.tz_minuteswest*60; return secs; } /* Convert linear UNIX date to a MS-DOS time/date pair. */ -void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date) +void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, int tz_utc) { int day, year, nl_day, month; - unix_date -= sys_tz.tz_minuteswest*60; + if (!tz_utc) + unix_date -= sys_tz.tz_minuteswest*60; /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */ if (unix_date < 315532800) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 2060bf06b906..51d0035ff07e 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -97,7 +97,7 @@ void fuse_invalidate_attr(struct inode *inode) * timeout is unknown (unlink, rmdir, rename and in some cases * lookup) */ -static void fuse_invalidate_entry_cache(struct dentry *entry) +void fuse_invalidate_entry_cache(struct dentry *entry) { fuse_dentry_settime(entry, 0); } @@ -112,18 +112,16 @@ static void fuse_invalidate_entry(struct dentry *entry) fuse_invalidate_entry_cache(entry); } -static void fuse_lookup_init(struct fuse_req *req, struct inode *dir, - struct dentry *entry, +static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_req *req, + u64 nodeid, struct qstr *name, struct fuse_entry_out *outarg) { - struct fuse_conn *fc = get_fuse_conn(dir); - memset(outarg, 0, sizeof(struct fuse_entry_out)); req->in.h.opcode = FUSE_LOOKUP; - req->in.h.nodeid = get_node_id(dir); + req->in.h.nodeid = nodeid; req->in.numargs = 1; - req->in.args[0].size = entry->d_name.len + 1; - req->in.args[0].value = entry->d_name.name; + req->in.args[0].size = name->len + 1; + req->in.args[0].value = name->name; req->out.numargs = 1; if (fc->minor < 9) req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; @@ -189,7 +187,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) attr_version = fuse_get_attr_version(fc); parent = dget_parent(entry); - fuse_lookup_init(req, parent->d_inode, entry, &outarg); + fuse_lookup_init(fc, req, get_node_id(parent->d_inode), + &entry->d_name, &outarg); request_send(fc, req); dput(parent); err = req->out.h.error; @@ -225,7 +224,7 @@ static int invalid_nodeid(u64 nodeid) return !nodeid || nodeid == FUSE_ROOT_ID; } -static struct dentry_operations fuse_dentry_operations = { +struct dentry_operations fuse_dentry_operations = { .d_revalidate = fuse_dentry_revalidate, }; @@ -239,85 +238,127 @@ int fuse_valid_type(int m) * Add a directory inode to a dentry, ensuring that no other dentry * refers to this inode. Called with fc->inst_mutex. */ -static int fuse_d_add_directory(struct dentry *entry, struct inode *inode) +static struct dentry *fuse_d_add_directory(struct dentry *entry, + struct inode *inode) { struct dentry *alias = d_find_alias(inode); - if (alias) { + if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) { /* This tries to shrink the subtree below alias */ fuse_invalidate_entry(alias); dput(alias); if (!list_empty(&inode->i_dentry)) - return -EBUSY; + return ERR_PTR(-EBUSY); + } else { + dput(alias); } - d_add(entry, inode); - return 0; + return d_splice_alias(inode, entry); } -static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, - struct nameidata *nd) +int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, + struct fuse_entry_out *outarg, struct inode **inode) { - int err; - struct fuse_entry_out outarg; - struct inode *inode = NULL; - struct fuse_conn *fc = get_fuse_conn(dir); + struct fuse_conn *fc = get_fuse_conn_super(sb); struct fuse_req *req; struct fuse_req *forget_req; u64 attr_version; + int err; - if (entry->d_name.len > FUSE_NAME_MAX) - return ERR_PTR(-ENAMETOOLONG); + *inode = NULL; + err = -ENAMETOOLONG; + if (name->len > FUSE_NAME_MAX) + goto out; req = fuse_get_req(fc); + err = PTR_ERR(req); if (IS_ERR(req)) - return ERR_CAST(req); + goto out; forget_req = fuse_get_req(fc); + err = PTR_ERR(forget_req); if (IS_ERR(forget_req)) { fuse_put_request(fc, req); - return ERR_CAST(forget_req); + goto out; } attr_version = fuse_get_attr_version(fc); - fuse_lookup_init(req, dir, entry, &outarg); + fuse_lookup_init(fc, req, nodeid, name, outarg); request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); /* Zero nodeid is same as -ENOENT, but with valid timeout */ - if (!err && outarg.nodeid && - (invalid_nodeid(outarg.nodeid) || - !fuse_valid_type(outarg.attr.mode))) - err = -EIO; - if (!err && outarg.nodeid) { - inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, - &outarg.attr, entry_attr_timeout(&outarg), - attr_version); - if (!inode) { - fuse_send_forget(fc, forget_req, outarg.nodeid, 1); - return ERR_PTR(-ENOMEM); - } + if (err || !outarg->nodeid) + goto out_put_forget; + + err = -EIO; + if (!outarg->nodeid) + goto out_put_forget; + if (!fuse_valid_type(outarg->attr.mode)) + goto out_put_forget; + + *inode = fuse_iget(sb, outarg->nodeid, outarg->generation, + &outarg->attr, entry_attr_timeout(outarg), + attr_version); + err = -ENOMEM; + if (!*inode) { + fuse_send_forget(fc, forget_req, outarg->nodeid, 1); + goto out; } + err = 0; + + out_put_forget: fuse_put_request(fc, forget_req); - if (err && err != -ENOENT) - return ERR_PTR(err); + out: + return err; +} + +static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, + struct nameidata *nd) +{ + int err; + struct fuse_entry_out outarg; + struct inode *inode; + struct dentry *newent; + struct fuse_conn *fc = get_fuse_conn(dir); + bool outarg_valid = true; + + err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name, + &outarg, &inode); + if (err == -ENOENT) { + outarg_valid = false; + err = 0; + } + if (err) + goto out_err; + + err = -EIO; + if (inode && get_node_id(inode) == FUSE_ROOT_ID) + goto out_iput; if (inode && S_ISDIR(inode->i_mode)) { mutex_lock(&fc->inst_mutex); - err = fuse_d_add_directory(entry, inode); + newent = fuse_d_add_directory(entry, inode); mutex_unlock(&fc->inst_mutex); - if (err) { - iput(inode); - return ERR_PTR(err); - } - } else - d_add(entry, inode); + err = PTR_ERR(newent); + if (IS_ERR(newent)) + goto out_iput; + } else { + newent = d_splice_alias(inode, entry); + } + entry = newent ? newent : entry; entry->d_op = &fuse_dentry_operations; - if (!err) + if (outarg_valid) fuse_change_entry_timeout(entry, &outarg); else fuse_invalidate_entry_cache(entry); - return NULL; + + return newent; + + out_iput: + iput(inode); + out_err: + return ERR_PTR(err); } /* diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 8092f0d9fd1f..67ff2c6a8f63 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1341,6 +1341,11 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock) pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0; int err; + if (fl->fl_lmops && fl->fl_lmops->fl_grant) { + /* NLM needs asynchronous locks, which we don't support yet */ + return -ENOLCK; + } + /* Unlock on close is handled by the flush method */ if (fl->fl_flags & FL_CLOSE) return 0; @@ -1365,7 +1370,9 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl) struct fuse_conn *fc = get_fuse_conn(inode); int err; - if (cmd == F_GETLK) { + if (cmd == F_CANCELLK) { + err = 0; + } else if (cmd == F_GETLK) { if (fc->no_lock) { posix_test_lock(file, fl); err = 0; @@ -1373,7 +1380,7 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl) err = fuse_getlk(file, fl); } else { if (fc->no_lock) - err = posix_lock_file_wait(file, fl); + err = posix_lock_file(file, fl, NULL); else err = fuse_setlk(file, fl, 0); } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index bae948657c4f..3a876076bdd1 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -363,6 +363,9 @@ struct fuse_conn { /** Do not send separate SETATTR request before open(O_TRUNC) */ unsigned atomic_o_trunc : 1; + /** Filesystem supports NFS exporting. Only set in INIT */ + unsigned export_support : 1; + /* * The following bitfields are only for optimization purposes * and hence races in setting them will not cause malfunction @@ -464,6 +467,8 @@ static inline u64 get_node_id(struct inode *inode) /** Device operations */ extern const struct file_operations fuse_dev_operations; +extern struct dentry_operations fuse_dentry_operations; + /** * Get a filled in inode */ @@ -471,6 +476,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, int generation, struct fuse_attr *attr, u64 attr_valid, u64 attr_version); +int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, + struct fuse_entry_out *outarg, struct inode **inode); + /** * Send FORGET command */ @@ -604,6 +612,8 @@ void fuse_abort_conn(struct fuse_conn *fc); */ void fuse_invalidate_attr(struct inode *inode); +void fuse_invalidate_entry_cache(struct dentry *entry); + /** * Acquire reference to fuse_conn */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 3141690558c8..7d2f7d6e22e2 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -18,6 +18,7 @@ #include <linux/statfs.h> #include <linux/random.h> #include <linux/sched.h> +#include <linux/exportfs.h> MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); MODULE_DESCRIPTION("Filesystem in Userspace"); @@ -552,6 +553,174 @@ static struct inode *get_root_inode(struct super_block *sb, unsigned mode) return fuse_iget(sb, 1, 0, &attr, 0, 0); } +struct fuse_inode_handle +{ + u64 nodeid; + u32 generation; +}; + +static struct dentry *fuse_get_dentry(struct super_block *sb, + struct fuse_inode_handle *handle) +{ + struct fuse_conn *fc = get_fuse_conn_super(sb); + struct inode *inode; + struct dentry *entry; + int err = -ESTALE; + + if (handle->nodeid == 0) + goto out_err; + + inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid); + if (!inode) { + struct fuse_entry_out outarg; + struct qstr name; + + if (!fc->export_support) + goto out_err; + + name.len = 1; + name.name = "."; + err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg, + &inode); + if (err && err != -ENOENT) + goto out_err; + if (err || !inode) { + err = -ESTALE; + goto out_err; + } + err = -EIO; + if (get_node_id(inode) != handle->nodeid) + goto out_iput; + } + err = -ESTALE; + if (inode->i_generation != handle->generation) + goto out_iput; + + entry = d_alloc_anon(inode); + err = -ENOMEM; + if (!entry) + goto out_iput; + + if (get_node_id(inode) != FUSE_ROOT_ID) { + entry->d_op = &fuse_dentry_operations; + fuse_invalidate_entry_cache(entry); + } + + return entry; + + out_iput: + iput(inode); + out_err: + return ERR_PTR(err); +} + +static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, + int connectable) +{ + struct inode *inode = dentry->d_inode; + bool encode_parent = connectable && !S_ISDIR(inode->i_mode); + int len = encode_parent ? 6 : 3; + u64 nodeid; + u32 generation; + + if (*max_len < len) + return 255; + + nodeid = get_fuse_inode(inode)->nodeid; + generation = inode->i_generation; + + fh[0] = (u32)(nodeid >> 32); + fh[1] = (u32)(nodeid & 0xffffffff); + fh[2] = generation; + + if (encode_parent) { + struct inode *parent; + + spin_lock(&dentry->d_lock); + parent = dentry->d_parent->d_inode; + nodeid = get_fuse_inode(parent)->nodeid; + generation = parent->i_generation; + spin_unlock(&dentry->d_lock); + + fh[3] = (u32)(nodeid >> 32); + fh[4] = (u32)(nodeid & 0xffffffff); + fh[5] = generation; + } + + *max_len = len; + return encode_parent ? 0x82 : 0x81; +} + +static struct dentry *fuse_fh_to_dentry(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type) +{ + struct fuse_inode_handle handle; + + if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3) + return NULL; + + handle.nodeid = (u64) fid->raw[0] << 32; + handle.nodeid |= (u64) fid->raw[1]; + handle.generation = fid->raw[2]; + return fuse_get_dentry(sb, &handle); +} + +static struct dentry *fuse_fh_to_parent(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type) +{ + struct fuse_inode_handle parent; + + if (fh_type != 0x82 || fh_len < 6) + return NULL; + + parent.nodeid = (u64) fid->raw[3] << 32; + parent.nodeid |= (u64) fid->raw[4]; + parent.generation = fid->raw[5]; + return fuse_get_dentry(sb, &parent); +} + +static struct dentry *fuse_get_parent(struct dentry *child) +{ + struct inode *child_inode = child->d_inode; + struct fuse_conn *fc = get_fuse_conn(child_inode); + struct inode *inode; + struct dentry *parent; + struct fuse_entry_out outarg; + struct qstr name; + int err; + + if (!fc->export_support) + return ERR_PTR(-ESTALE); + + name.len = 2; + name.name = ".."; + err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode), + &name, &outarg, &inode); + if (err && err != -ENOENT) + return ERR_PTR(err); + if (err || !inode) + return ERR_PTR(-ESTALE); + + parent = d_alloc_anon(inode); + if (!parent) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + if (get_node_id(inode) != FUSE_ROOT_ID) { + parent->d_op = &fuse_dentry_operations; + fuse_invalidate_entry_cache(parent); + } + + return parent; +} + +static const struct export_operations fuse_export_operations = { + .fh_to_dentry = fuse_fh_to_dentry, + .fh_to_parent = fuse_fh_to_parent, + .encode_fh = fuse_encode_fh, + .get_parent = fuse_get_parent, +}; + static const struct super_operations fuse_super_operations = { .alloc_inode = fuse_alloc_inode, .destroy_inode = fuse_destroy_inode, @@ -581,6 +750,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->no_lock = 1; if (arg->flags & FUSE_ATOMIC_O_TRUNC) fc->atomic_o_trunc = 1; + if (arg->minor >= 9) { + /* LOOKUP has dependency on proto version */ + if (arg->flags & FUSE_EXPORT_SUPPORT) + fc->export_support = 1; + } if (arg->flags & FUSE_BIG_WRITES) fc->big_writes = 1; } else { @@ -607,7 +781,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->minor = FUSE_KERNEL_MINOR_VERSION; arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | - FUSE_BIG_WRITES; + FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); @@ -652,6 +826,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = FUSE_SUPER_MAGIC; sb->s_op = &fuse_super_operations; sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_export_op = &fuse_export_operations; file = fget(d.fd); if (!file) diff --git a/fs/hfs/bitmap.c b/fs/hfs/bitmap.c index 24e75798ddf0..c6e97366e8ac 100644 --- a/fs/hfs/bitmap.c +++ b/fs/hfs/bitmap.c @@ -145,7 +145,7 @@ u32 hfs_vbm_search_free(struct super_block *sb, u32 goal, u32 *num_bits) if (!*num_bits) return 0; - down(&HFS_SB(sb)->bitmap_lock); + mutex_lock(&HFS_SB(sb)->bitmap_lock); bitmap = HFS_SB(sb)->bitmap; pos = hfs_find_set_zero_bits(bitmap, HFS_SB(sb)->fs_ablocks, goal, num_bits); @@ -162,7 +162,7 @@ u32 hfs_vbm_search_free(struct super_block *sb, u32 goal, u32 *num_bits) HFS_SB(sb)->free_ablocks -= *num_bits; hfs_bitmap_dirty(sb); out: - up(&HFS_SB(sb)->bitmap_lock); + mutex_unlock(&HFS_SB(sb)->bitmap_lock); return pos; } @@ -205,7 +205,7 @@ int hfs_clear_vbm_bits(struct super_block *sb, u16 start, u16 count) if ((start + count) > HFS_SB(sb)->fs_ablocks) return -2; - down(&HFS_SB(sb)->bitmap_lock); + mutex_lock(&HFS_SB(sb)->bitmap_lock); /* bitmap is always on a 32-bit boundary */ curr = HFS_SB(sb)->bitmap + (start / 32); len = count; @@ -236,7 +236,7 @@ int hfs_clear_vbm_bits(struct super_block *sb, u16 start, u16 count) } out: HFS_SB(sb)->free_ablocks += len; - up(&HFS_SB(sb)->bitmap_lock); + mutex_unlock(&HFS_SB(sb)->bitmap_lock); hfs_bitmap_dirty(sb); return 0; diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index f6621a785202..9b9d6395bad3 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c @@ -40,7 +40,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke { struct hfs_mdb *mdb = HFS_SB(sb)->mdb; HFS_I(tree->inode)->flags = 0; - init_MUTEX(&HFS_I(tree->inode)->extents_lock); + mutex_init(&HFS_I(tree->inode)->extents_lock); switch (id) { case HFS_EXT_CNID: hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize, diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c index c176f67ba0a5..2c16316d2917 100644 --- a/fs/hfs/extent.c +++ b/fs/hfs/extent.c @@ -343,16 +343,16 @@ int hfs_get_block(struct inode *inode, sector_t block, goto done; } - down(&HFS_I(inode)->extents_lock); + mutex_lock(&HFS_I(inode)->extents_lock); res = hfs_ext_read_extent(inode, ablock); if (!res) dblock = hfs_ext_find_block(HFS_I(inode)->cached_extents, ablock - HFS_I(inode)->cached_start); else { - up(&HFS_I(inode)->extents_lock); + mutex_unlock(&HFS_I(inode)->extents_lock); return -EIO; } - up(&HFS_I(inode)->extents_lock); + mutex_unlock(&HFS_I(inode)->extents_lock); done: map_bh(bh_result, sb, HFS_SB(sb)->fs_start + @@ -375,7 +375,7 @@ int hfs_extend_file(struct inode *inode) u32 start, len, goal; int res; - down(&HFS_I(inode)->extents_lock); + mutex_lock(&HFS_I(inode)->extents_lock); if (HFS_I(inode)->alloc_blocks == HFS_I(inode)->first_blocks) goal = hfs_ext_lastblock(HFS_I(inode)->first_extents); else { @@ -425,7 +425,7 @@ int hfs_extend_file(struct inode *inode) goto insert_extent; } out: - up(&HFS_I(inode)->extents_lock); + mutex_unlock(&HFS_I(inode)->extents_lock); if (!res) { HFS_I(inode)->alloc_blocks += len; mark_inode_dirty(inode); @@ -487,7 +487,7 @@ void hfs_file_truncate(struct inode *inode) if (blk_cnt == alloc_cnt) goto out; - down(&HFS_I(inode)->extents_lock); + mutex_lock(&HFS_I(inode)->extents_lock); hfs_find_init(HFS_SB(sb)->ext_tree, &fd); while (1) { if (alloc_cnt == HFS_I(inode)->first_blocks) { @@ -514,7 +514,7 @@ void hfs_file_truncate(struct inode *inode) hfs_brec_remove(&fd); } hfs_find_exit(&fd); - up(&HFS_I(inode)->extents_lock); + mutex_unlock(&HFS_I(inode)->extents_lock); HFS_I(inode)->alloc_blocks = blk_cnt; out: diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 147374b6f675..9955232fdf8c 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -11,6 +11,7 @@ #include <linux/slab.h> #include <linux/types.h> +#include <linux/mutex.h> #include <linux/buffer_head.h> #include <linux/fs.h> @@ -53,7 +54,7 @@ struct hfs_inode_info { struct list_head open_dir_list; struct inode *rsrc_inode; - struct semaphore extents_lock; + struct mutex extents_lock; u16 alloc_blocks, clump_blocks; sector_t fs_blocks; @@ -139,7 +140,7 @@ struct hfs_sb_info { struct nls_table *nls_io, *nls_disk; - struct semaphore bitmap_lock; + struct mutex bitmap_lock; unsigned long flags; diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 97f8446c4ff4..dc4ec640e875 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -150,7 +150,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode) if (!inode) return NULL; - init_MUTEX(&HFS_I(inode)->extents_lock); + mutex_init(&HFS_I(inode)->extents_lock); INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name); inode->i_ino = HFS_SB(sb)->next_id++; @@ -281,7 +281,7 @@ static int hfs_read_inode(struct inode *inode, void *data) HFS_I(inode)->flags = 0; HFS_I(inode)->rsrc_inode = NULL; - init_MUTEX(&HFS_I(inode)->extents_lock); + mutex_init(&HFS_I(inode)->extents_lock); INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); /* Initialize the inode */ diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 8cf67974adf6..ac2ec5ef66e4 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -372,7 +372,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &hfs_super_operations; sb->s_flags |= MS_NODIRATIME; - init_MUTEX(&sbi->bitmap_lock); + mutex_init(&sbi->bitmap_lock); res = hfs_mdb_get(sb); if (res) { diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index 12e899cd7886..fec8f61227ff 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -199,16 +199,16 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, goto done; } - down(&HFSPLUS_I(inode).extents_lock); + mutex_lock(&HFSPLUS_I(inode).extents_lock); res = hfsplus_ext_read_extent(inode, ablock); if (!res) { dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).cached_extents, ablock - HFSPLUS_I(inode).cached_start); } else { - up(&HFSPLUS_I(inode).extents_lock); + mutex_unlock(&HFSPLUS_I(inode).extents_lock); return -EIO; } - up(&HFSPLUS_I(inode).extents_lock); + mutex_unlock(&HFSPLUS_I(inode).extents_lock); done: dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock); @@ -355,7 +355,7 @@ int hfsplus_file_extend(struct inode *inode) return -ENOSPC; } - down(&HFSPLUS_I(inode).extents_lock); + mutex_lock(&HFSPLUS_I(inode).extents_lock); if (HFSPLUS_I(inode).alloc_blocks == HFSPLUS_I(inode).first_blocks) goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).first_extents); else { @@ -408,7 +408,7 @@ int hfsplus_file_extend(struct inode *inode) goto insert_extent; } out: - up(&HFSPLUS_I(inode).extents_lock); + mutex_unlock(&HFSPLUS_I(inode).extents_lock); if (!res) { HFSPLUS_I(inode).alloc_blocks += len; mark_inode_dirty(inode); @@ -465,7 +465,7 @@ void hfsplus_file_truncate(struct inode *inode) if (blk_cnt == alloc_cnt) goto out; - down(&HFSPLUS_I(inode).extents_lock); + mutex_lock(&HFSPLUS_I(inode).extents_lock); hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd); while (1) { if (alloc_cnt == HFSPLUS_I(inode).first_blocks) { @@ -492,7 +492,7 @@ void hfsplus_file_truncate(struct inode *inode) hfs_brec_remove(&fd); } hfs_find_exit(&fd); - up(&HFSPLUS_I(inode).extents_lock); + mutex_unlock(&HFSPLUS_I(inode).extents_lock); HFSPLUS_I(inode).alloc_blocks = blk_cnt; out: diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 9e59537b43d5..f027a905225f 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -11,6 +11,7 @@ #define _LINUX_HFSPLUS_FS_H #include <linux/fs.h> +#include <linux/mutex.h> #include <linux/buffer_head.h> #include "hfsplus_raw.h" @@ -154,7 +155,7 @@ struct hfsplus_sb_info { struct hfsplus_inode_info { - struct semaphore extents_lock; + struct mutex extents_lock; u32 clump_blocks, alloc_blocks; sector_t fs_blocks; /* Allocation extents from catalog record or volume header */ diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 67e1c8b467c4..cc3b5e24339b 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -163,7 +163,7 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent inode->i_ino = dir->i_ino; INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); - init_MUTEX(&HFSPLUS_I(inode).extents_lock); + mutex_init(&HFSPLUS_I(inode).extents_lock); HFSPLUS_I(inode).flags = HFSPLUS_FLG_RSRC; hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); @@ -316,7 +316,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode) inode->i_nlink = 1; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); - init_MUTEX(&HFSPLUS_I(inode).extents_lock); + mutex_init(&HFSPLUS_I(inode).extents_lock); atomic_set(&HFSPLUS_I(inode).opencnt, 0); HFSPLUS_I(inode).flags = 0; memset(HFSPLUS_I(inode).first_extents, 0, sizeof(hfsplus_extent_rec)); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index ce97a54518d8..3859118531c7 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -34,7 +34,7 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) return inode; INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); - init_MUTEX(&HFSPLUS_I(inode).extents_lock); + mutex_init(&HFSPLUS_I(inode).extents_lock); HFSPLUS_I(inode).flags = 0; HFSPLUS_I(inode).rsrc_inode = NULL; atomic_set(&HFSPLUS_I(inode).opencnt, 0); diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index 6bd48f0a7047..c2fb2dd0131f 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -209,6 +209,11 @@ repeat: while (rs.len > 2) { /* There may be one byte for padding somewhere */ rr = (struct rock_ridge *)rs.chr; + /* + * Ignore rock ridge info if rr->len is out of range, but + * don't return -EIO because that would make the file + * invisible. + */ if (rr->len < 3) goto out; /* Something got screwed up here */ sig = isonum_721(rs.chr); @@ -216,8 +221,12 @@ repeat: goto eio; rs.chr += rr->len; rs.len -= rr->len; + /* + * As above, just ignore the rock ridge info if rr->len + * is bogus. + */ if (rs.len < 0) - goto eio; /* corrupted isofs */ + goto out; /* Something got screwed up here */ switch (sig) { case SIG('R', 'R'): @@ -307,6 +316,11 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de, repeat: while (rs.len > 2) { /* There may be one byte for padding somewhere */ rr = (struct rock_ridge *)rs.chr; + /* + * Ignore rock ridge info if rr->len is out of range, but + * don't return -EIO because that would make the file + * invisible. + */ if (rr->len < 3) goto out; /* Something got screwed up here */ sig = isonum_721(rs.chr); @@ -314,8 +328,12 @@ repeat: goto eio; rs.chr += rr->len; rs.len -= rr->len; + /* + * As above, just ignore the rock ridge info if rr->len + * is bogus. + */ if (rs.len < 0) - goto eio; /* corrupted isofs */ + goto out; /* Something got screwed up here */ switch (sig) { #ifndef CONFIG_ZISOFS /* No flag for SF or ZF */ diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 5a8ca61498ca..2eccbfaa1d48 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -36,7 +36,7 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) /* * When an ext3-ordered file is truncated, it is possible that many pages are - * not sucessfully freed, because they are attached to a committing transaction. + * not successfully freed, because they are attached to a committing transaction. * After the transaction commits, these pages are left on the LRU, with no * ->mapping, and with attached buffers. These pages are trivially reclaimable * by the VM, but their apparent absence upsets the VM accounting, and it makes @@ -45,8 +45,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) * So here, we have a buffer which has just come off the forget list. Look to * see if we can strip all buffers from the backing page. * - * Called under lock_journal(), and possibly under journal_datalist_lock. The - * caller provided us with a ref against the buffer, and we drop that here. + * Called under journal->j_list_lock. The caller provided us with a ref + * against the buffer, and we drop that here. */ static void release_buffer_page(struct buffer_head *bh) { @@ -78,6 +78,19 @@ nope: } /* + * Decrement reference counter for data buffer. If it has been marked + * 'BH_Freed', release it and the page to which it belongs if possible. + */ +static void release_data_buffer(struct buffer_head *bh) +{ + if (buffer_freed(bh)) { + clear_buffer_freed(bh); + release_buffer_page(bh); + } else + put_bh(bh); +} + +/* * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is * held. For ranking reasons we must trylock. If we lose, schedule away and * return 0. j_list_lock is dropped in this case. @@ -172,7 +185,7 @@ static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) /* * Submit all the data buffers to disk */ -static void journal_submit_data_buffers(journal_t *journal, +static int journal_submit_data_buffers(journal_t *journal, transaction_t *commit_transaction) { struct journal_head *jh; @@ -180,6 +193,7 @@ static void journal_submit_data_buffers(journal_t *journal, int locked; int bufs = 0; struct buffer_head **wbuf = journal->j_wbuf; + int err = 0; /* * Whenever we unlock the journal and sleep, things can get added @@ -231,7 +245,7 @@ write_out_data: if (locked) unlock_buffer(bh); BUFFER_TRACE(bh, "already cleaned up"); - put_bh(bh); + release_data_buffer(bh); continue; } if (locked && test_clear_buffer_dirty(bh)) { @@ -253,15 +267,17 @@ write_out_data: put_bh(bh); } else { BUFFER_TRACE(bh, "writeout complete: unfile"); + if (unlikely(!buffer_uptodate(bh))) + err = -EIO; __journal_unfile_buffer(jh); jbd_unlock_bh_state(bh); if (locked) unlock_buffer(bh); journal_remove_journal_head(bh); - /* Once for our safety reference, once for + /* One for our safety reference, other for * journal_remove_journal_head() */ put_bh(bh); - put_bh(bh); + release_data_buffer(bh); } if (need_resched() || spin_needbreak(&journal->j_list_lock)) { @@ -271,6 +287,8 @@ write_out_data: } spin_unlock(&journal->j_list_lock); journal_do_submit_data(wbuf, bufs); + + return err; } /* @@ -410,8 +428,7 @@ void journal_commit_transaction(journal_t *journal) * Now start flushing things to disk, in the order they appear * on the transaction lists. Data blocks go first. */ - err = 0; - journal_submit_data_buffers(journal, commit_transaction); + err = journal_submit_data_buffers(journal, commit_transaction); /* * Wait for all previously submitted IO to complete. @@ -426,10 +443,21 @@ void journal_commit_transaction(journal_t *journal) if (buffer_locked(bh)) { spin_unlock(&journal->j_list_lock); wait_on_buffer(bh); - if (unlikely(!buffer_uptodate(bh))) - err = -EIO; spin_lock(&journal->j_list_lock); } + if (unlikely(!buffer_uptodate(bh))) { + if (TestSetPageLocked(bh->b_page)) { + spin_unlock(&journal->j_list_lock); + lock_page(bh->b_page); + spin_lock(&journal->j_list_lock); + } + if (bh->b_page->mapping) + set_bit(AS_EIO, &bh->b_page->mapping->flags); + + unlock_page(bh->b_page); + SetPageError(bh->b_page); + err = -EIO; + } if (!inverted_lock(journal, bh)) { put_bh(bh); spin_lock(&journal->j_list_lock); @@ -443,17 +471,21 @@ void journal_commit_transaction(journal_t *journal) } else { jbd_unlock_bh_state(bh); } - put_bh(bh); + release_data_buffer(bh); cond_resched_lock(&journal->j_list_lock); } spin_unlock(&journal->j_list_lock); - if (err) - journal_abort(journal, err); + if (err) { + char b[BDEVNAME_SIZE]; - journal_write_revoke_records(journal, commit_transaction); + printk(KERN_WARNING + "JBD: Detected IO errors while flushing file data " + "on %s\n", bdevname(journal->j_fs_dev, b)); + err = 0; + } - jbd_debug(3, "JBD: commit phase 2\n"); + journal_write_revoke_records(journal, commit_transaction); /* * If we found any dirty or locked buffers, then we should have diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index b99c3b3654c4..aa7143a8349b 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -68,7 +68,6 @@ EXPORT_SYMBOL(journal_set_features); EXPORT_SYMBOL(journal_create); EXPORT_SYMBOL(journal_load); EXPORT_SYMBOL(journal_destroy); -EXPORT_SYMBOL(journal_update_superblock); EXPORT_SYMBOL(journal_abort); EXPORT_SYMBOL(journal_errno); EXPORT_SYMBOL(journal_ack_err); @@ -1636,9 +1635,10 @@ static int journal_init_journal_head_cache(void) static void journal_destroy_journal_head_cache(void) { - J_ASSERT(journal_head_cache != NULL); - kmem_cache_destroy(journal_head_cache); - journal_head_cache = NULL; + if (journal_head_cache) { + kmem_cache_destroy(journal_head_cache); + journal_head_cache = NULL; + } } /* diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index 1bb43e987f4b..c7bd649bbbdc 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -166,138 +166,123 @@ static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal, return NULL; } +void journal_destroy_revoke_caches(void) +{ + if (revoke_record_cache) { + kmem_cache_destroy(revoke_record_cache); + revoke_record_cache = NULL; + } + if (revoke_table_cache) { + kmem_cache_destroy(revoke_table_cache); + revoke_table_cache = NULL; + } +} + int __init journal_init_revoke_caches(void) { + J_ASSERT(!revoke_record_cache); + J_ASSERT(!revoke_table_cache); + revoke_record_cache = kmem_cache_create("revoke_record", sizeof(struct jbd_revoke_record_s), 0, SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, NULL); if (!revoke_record_cache) - return -ENOMEM; + goto record_cache_failure; revoke_table_cache = kmem_cache_create("revoke_table", sizeof(struct jbd_revoke_table_s), 0, SLAB_TEMPORARY, NULL); - if (!revoke_table_cache) { - kmem_cache_destroy(revoke_record_cache); - revoke_record_cache = NULL; - return -ENOMEM; - } + if (!revoke_table_cache) + goto table_cache_failure; + return 0; -} -void journal_destroy_revoke_caches(void) -{ - kmem_cache_destroy(revoke_record_cache); - revoke_record_cache = NULL; - kmem_cache_destroy(revoke_table_cache); - revoke_table_cache = NULL; +table_cache_failure: + journal_destroy_revoke_caches(); +record_cache_failure: + return -ENOMEM; } -/* Initialise the revoke table for a given journal to a given size. */ - -int journal_init_revoke(journal_t *journal, int hash_size) +static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size) { - int shift, tmp; + int shift = 0; + int tmp = hash_size; + struct jbd_revoke_table_s *table; - J_ASSERT (journal->j_revoke_table[0] == NULL); + table = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); + if (!table) + goto out; - shift = 0; - tmp = hash_size; while((tmp >>= 1UL) != 0UL) shift++; - journal->j_revoke_table[0] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); - if (!journal->j_revoke_table[0]) - return -ENOMEM; - journal->j_revoke = journal->j_revoke_table[0]; - - /* Check that the hash_size is a power of two */ - J_ASSERT(is_power_of_2(hash_size)); - - journal->j_revoke->hash_size = hash_size; - - journal->j_revoke->hash_shift = shift; - - journal->j_revoke->hash_table = + table->hash_size = hash_size; + table->hash_shift = shift; + table->hash_table = kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); - if (!journal->j_revoke->hash_table) { - kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]); - journal->j_revoke = NULL; - return -ENOMEM; + if (!table->hash_table) { + kmem_cache_free(revoke_table_cache, table); + table = NULL; + goto out; } for (tmp = 0; tmp < hash_size; tmp++) - INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); + INIT_LIST_HEAD(&table->hash_table[tmp]); - journal->j_revoke_table[1] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); - if (!journal->j_revoke_table[1]) { - kfree(journal->j_revoke_table[0]->hash_table); - kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]); - return -ENOMEM; +out: + return table; +} + +static void journal_destroy_revoke_table(struct jbd_revoke_table_s *table) +{ + int i; + struct list_head *hash_list; + + for (i = 0; i < table->hash_size; i++) { + hash_list = &table->hash_table[i]; + J_ASSERT(list_empty(hash_list)); } - journal->j_revoke = journal->j_revoke_table[1]; + kfree(table->hash_table); + kmem_cache_free(revoke_table_cache, table); +} - /* Check that the hash_size is a power of two */ +/* Initialise the revoke table for a given journal to a given size. */ +int journal_init_revoke(journal_t *journal, int hash_size) +{ + J_ASSERT(journal->j_revoke_table[0] == NULL); J_ASSERT(is_power_of_2(hash_size)); - journal->j_revoke->hash_size = hash_size; + journal->j_revoke_table[0] = journal_init_revoke_table(hash_size); + if (!journal->j_revoke_table[0]) + goto fail0; - journal->j_revoke->hash_shift = shift; + journal->j_revoke_table[1] = journal_init_revoke_table(hash_size); + if (!journal->j_revoke_table[1]) + goto fail1; - journal->j_revoke->hash_table = - kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); - if (!journal->j_revoke->hash_table) { - kfree(journal->j_revoke_table[0]->hash_table); - kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]); - kmem_cache_free(revoke_table_cache, journal->j_revoke_table[1]); - journal->j_revoke = NULL; - return -ENOMEM; - } - - for (tmp = 0; tmp < hash_size; tmp++) - INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); + journal->j_revoke = journal->j_revoke_table[1]; spin_lock_init(&journal->j_revoke_lock); return 0; -} -/* Destoy a journal's revoke table. The table must already be empty! */ +fail1: + journal_destroy_revoke_table(journal->j_revoke_table[0]); +fail0: + return -ENOMEM; +} +/* Destroy a journal's revoke table. The table must already be empty! */ void journal_destroy_revoke(journal_t *journal) { - struct jbd_revoke_table_s *table; - struct list_head *hash_list; - int i; - - table = journal->j_revoke_table[0]; - if (!table) - return; - - for (i=0; i<table->hash_size; i++) { - hash_list = &table->hash_table[i]; - J_ASSERT (list_empty(hash_list)); - } - - kfree(table->hash_table); - kmem_cache_free(revoke_table_cache, table); - journal->j_revoke = NULL; - - table = journal->j_revoke_table[1]; - if (!table) - return; - - for (i=0; i<table->hash_size; i++) { - hash_list = &table->hash_table[i]; - J_ASSERT (list_empty(hash_list)); - } - - kfree(table->hash_table); - kmem_cache_free(revoke_table_cache, table); journal->j_revoke = NULL; + if (journal->j_revoke_table[0]) + journal_destroy_revoke_table(journal->j_revoke_table[0]); + if (journal->j_revoke_table[1]) + journal_destroy_revoke_table(journal->j_revoke_table[1]); } diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 67ff2024c23c..8dee32007500 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -1648,12 +1648,42 @@ out: return; } +/* + * journal_try_to_free_buffers() could race with journal_commit_transaction() + * The latter might still hold the a count on buffers when inspecting + * them on t_syncdata_list or t_locked_list. + * + * journal_try_to_free_buffers() will call this function to + * wait for the current transaction to finish syncing data buffers, before + * tryinf to free that buffer. + * + * Called with journal->j_state_lock held. + */ +static void journal_wait_for_transaction_sync_data(journal_t *journal) +{ + transaction_t *transaction = NULL; + tid_t tid; + + spin_lock(&journal->j_state_lock); + transaction = journal->j_committing_transaction; + + if (!transaction) { + spin_unlock(&journal->j_state_lock); + return; + } + + tid = transaction->t_tid; + spin_unlock(&journal->j_state_lock); + log_wait_commit(journal, tid); +} /** * int journal_try_to_free_buffers() - try to free page buffers. * @journal: journal for operation * @page: to try and free - * @unused_gfp_mask: unused + * @gfp_mask: we use the mask to detect how hard should we try to release + * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to + * release the buffers. * * * For all the buffers on this page, @@ -1682,9 +1712,11 @@ out: * journal_try_to_free_buffer() is changing its state. But that * cannot happen because we never reallocate freed data as metadata * while the data is part of a transaction. Yes? + * + * Return 0 on failure, 1 on success */ int journal_try_to_free_buffers(journal_t *journal, - struct page *page, gfp_t unused_gfp_mask) + struct page *page, gfp_t gfp_mask) { struct buffer_head *head; struct buffer_head *bh; @@ -1713,7 +1745,28 @@ int journal_try_to_free_buffers(journal_t *journal, if (buffer_jbd(bh)) goto busy; } while ((bh = bh->b_this_page) != head); + ret = try_to_free_buffers(page); + + /* + * There are a number of places where journal_try_to_free_buffers() + * could race with journal_commit_transaction(), the later still + * holds the reference to the buffers to free while processing them. + * try_to_free_buffers() failed to free those buffers. Some of the + * caller of releasepage() request page buffers to be dropped, otherwise + * treat the fail-to-free as errors (such as generic_file_direct_IO()) + * + * So, if the caller of try_to_release_page() wants the synchronous + * behaviour(i.e make sure buffers are dropped upon return), + * let's wait for the current transaction to finish flush of + * dirty data buffers, then try to free those buffers again, + * with the journal locked. + */ + if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) { + journal_wait_for_transaction_sync_data(journal); + ret = try_to_free_buffers(page); + } + busy: return ret; } diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 0288e6d7936a..359c091d8965 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -22,6 +22,7 @@ #include <linux/parser.h> #include <linux/completion.h> #include <linux/vfs.h> +#include <linux/quotaops.h> #include <linux/mount.h> #include <linux/moduleparam.h> #include <linux/kthread.h> diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 1f6dc518505c..31668b690e03 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -582,7 +582,15 @@ again: } if (status < 0) goto out_unlock; - status = nlm_stat_to_errno(resp->status); + /* + * EAGAIN doesn't make sense for sleeping locks, and in some + * cases NLM_LCK_DENIED is returned for a permanent error. So + * turn it into an ENOLCK. + */ + if (resp->status == nlm_lck_denied && (fl_flags & FL_SLEEP)) + status = -ENOLCK; + else + status = nlm_stat_to_errno(resp->status); out_unblock: nlmclnt_finish_block(block); out: diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 821b9acdfb66..cf0d5c2c318d 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -418,8 +418,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, goto out; case -EAGAIN: ret = nlm_lck_denied; - break; - case -EINPROGRESS: + goto out; + case FILE_LOCK_DEFERRED: if (wait) break; /* Filesystem lock operation is in progress @@ -434,10 +434,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, goto out; } - ret = nlm_lck_denied; - if (!wait) - goto out; - ret = nlm_lck_blocked; /* Append to list of blocked */ @@ -507,7 +503,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, } error = vfs_test_lock(file->f_file, &lock->fl); - if (error == -EINPROGRESS) { + if (error == FILE_LOCK_DEFERRED) { ret = nlmsvc_defer_lock_rqst(rqstp, block); goto out; } @@ -731,8 +727,7 @@ nlmsvc_grant_blocked(struct nlm_block *block) switch (error) { case 0: break; - case -EAGAIN: - case -EINPROGRESS: + case FILE_LOCK_DEFERRED: dprintk("lockd: lock still blocked error %d\n", error); nlmsvc_insert_block(block, NLM_NEVER); nlmsvc_release_block(block); diff --git a/fs/locks.c b/fs/locks.c index dce8c747371c..01490300f7cb 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -779,8 +779,10 @@ find_conflict: if (!flock_locks_conflict(request, fl)) continue; error = -EAGAIN; - if (request->fl_flags & FL_SLEEP) - locks_insert_block(fl, request); + if (!(request->fl_flags & FL_SLEEP)) + goto out; + error = FILE_LOCK_DEFERRED; + locks_insert_block(fl, request); goto out; } if (request->fl_flags & FL_ACCESS) @@ -836,7 +838,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str error = -EDEADLK; if (posix_locks_deadlock(request, fl)) goto out; - error = -EAGAIN; + error = FILE_LOCK_DEFERRED; locks_insert_block(fl, request); goto out; } @@ -1035,7 +1037,7 @@ int posix_lock_file_wait(struct file *filp, struct file_lock *fl) might_sleep (); for (;;) { error = posix_lock_file(filp, fl, NULL); - if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP)) + if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); if (!error) @@ -1107,9 +1109,7 @@ int locks_mandatory_area(int read_write, struct inode *inode, for (;;) { error = __posix_lock_file(inode, &fl, NULL); - if (error != -EAGAIN) - break; - if (!(fl.fl_flags & FL_SLEEP)) + if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl.fl_wait, !fl.fl_next); if (!error) { @@ -1531,7 +1531,7 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl) might_sleep(); for (;;) { error = flock_lock_file(filp, fl); - if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP)) + if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); if (!error) @@ -1716,17 +1716,17 @@ out: * fl_grant is set. Callers expecting ->lock() to return asynchronously * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if) * the request is for a blocking lock. When ->lock() does return asynchronously, - * it must return -EINPROGRESS, and call ->fl_grant() when the lock + * it must return FILE_LOCK_DEFERRED, and call ->fl_grant() when the lock * request completes. * If the request is for non-blocking lock the file system should return - * -EINPROGRESS then try to get the lock and call the callback routine with - * the result. If the request timed out the callback routine will return a + * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine + * with the result. If the request timed out the callback routine will return a * nonzero return code and the file system should release the lock. The file * system is also responsible to keep a corresponding posix lock when it * grants a lock so the VFS can find out which locks are locally held and do * the correct lock cleanup when required. * The underlying filesystem must not drop the kernel lock or call - * ->fl_grant() before returning to the caller with a -EINPROGRESS + * ->fl_grant() before returning to the caller with a FILE_LOCK_DEFERRED * return code. */ int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) @@ -1738,6 +1738,30 @@ int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, str } EXPORT_SYMBOL_GPL(vfs_lock_file); +static int do_lock_file_wait(struct file *filp, unsigned int cmd, + struct file_lock *fl) +{ + int error; + + error = security_file_lock(filp, fl->fl_type); + if (error) + return error; + + for (;;) { + error = vfs_lock_file(filp, cmd, fl, NULL); + if (error != FILE_LOCK_DEFERRED) + break; + error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); + if (!error) + continue; + + locks_delete_block(fl); + break; + } + + return error; +} + /* Apply the lock described by l to an open file descriptor. * This implements both the F_SETLK and F_SETLKW commands of fcntl(). */ @@ -1795,26 +1819,7 @@ again: goto out; } - error = security_file_lock(filp, file_lock->fl_type); - if (error) - goto out; - - if (filp->f_op && filp->f_op->lock != NULL) - error = filp->f_op->lock(filp, cmd, file_lock); - else { - for (;;) { - error = posix_lock_file(filp, file_lock, NULL); - if (error != -EAGAIN || cmd == F_SETLK) - break; - error = wait_event_interruptible(file_lock->fl_wait, - !file_lock->fl_next); - if (!error) - continue; - - locks_delete_block(file_lock); - break; - } - } + error = do_lock_file_wait(filp, cmd, file_lock); /* * Attempt to detect a close/fcntl race and recover by @@ -1932,26 +1937,7 @@ again: goto out; } - error = security_file_lock(filp, file_lock->fl_type); - if (error) - goto out; - - if (filp->f_op && filp->f_op->lock != NULL) - error = filp->f_op->lock(filp, cmd, file_lock); - else { - for (;;) { - error = posix_lock_file(filp, file_lock, NULL); - if (error != -EAGAIN || cmd == F_SETLK64) - break; - error = wait_event_interruptible(file_lock->fl_wait, - !file_lock->fl_next); - if (!error) - continue; - - locks_delete_block(file_lock); - break; - } - } + error = do_lock_file_wait(filp, cmd, file_lock); /* * Attempt to detect a close/fcntl race and recover by diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 84f6242ba6fc..523d73713418 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -256,9 +256,6 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) if (!s->s_root) goto out_iput; - if (!NO_TRUNCATE) - s->s_root->d_op = &minix_dentry_operations; - if (!(s->s_flags & MS_RDONLY)) { if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */ ms->s_state &= ~MINIX_VALID_FS; diff --git a/fs/minix/minix.h b/fs/minix/minix.h index 326edfe96108..e6a0b193bea4 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h @@ -2,11 +2,6 @@ #include <linux/pagemap.h> #include <linux/minix_fs.h> -/* - * change the define below to 0 if you want names > info->s_namelen chars to be - * truncated. Else they will be disallowed (ENAMETOOLONG). - */ -#define NO_TRUNCATE 1 #define INODE_VERSION(inode) minix_sb(inode->i_sb)->s_version #define MINIX_V1 0x0001 /* original minix fs */ #define MINIX_V2 0x0002 /* minix V2 fs */ @@ -83,7 +78,6 @@ extern const struct inode_operations minix_file_inode_operations; extern const struct inode_operations minix_dir_inode_operations; extern const struct file_operations minix_file_operations; extern const struct file_operations minix_dir_operations; -extern struct dentry_operations minix_dentry_operations; static inline struct minix_sb_info *minix_sb(struct super_block *sb) { diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 102241bc9c79..32b131cd6121 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -18,30 +18,6 @@ static int add_nondir(struct dentry *dentry, struct inode *inode) return err; } -static int minix_hash(struct dentry *dentry, struct qstr *qstr) -{ - unsigned long hash; - int i; - const unsigned char *name; - - i = minix_sb(dentry->d_inode->i_sb)->s_namelen; - if (i >= qstr->len) - return 0; - /* Truncate the name in place, avoids having to define a compare - function. */ - qstr->len = i; - name = qstr->name; - hash = init_name_hash(); - while (i--) - hash = partial_name_hash(*name++, hash); - qstr->hash = end_name_hash(hash); - return 0; -} - -struct dentry_operations minix_dentry_operations = { - .d_hash = minix_hash, -}; - static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) { struct inode * inode = NULL; diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c index 1f7f2956412a..e844b9809d27 100644 --- a/fs/msdos/namei.c +++ b/fs/msdos/namei.c @@ -14,12 +14,7 @@ /* Characters that are undesirable in an MS-DOS file name */ static unsigned char bad_chars[] = "*?<>|\""; -static unsigned char bad_if_strict_pc[] = "+=,; "; -/* GEMDOS is less restrictive */ -static unsigned char bad_if_strict_atari[] = " "; - -#define bad_if_strict(opts) \ - ((opts)->atari ? bad_if_strict_atari : bad_if_strict_pc) +static unsigned char bad_if_strict[] = "+=,; "; /***** Formats an MS-DOS file name. Rejects invalid names. */ static int msdos_format_name(const unsigned char *name, int len, @@ -40,21 +35,20 @@ static int msdos_format_name(const unsigned char *name, int len, /* Get rid of dot - test for it elsewhere */ name++; len--; - } else if (!opts->atari) + } else return -EINVAL; } /* - * disallow names that _really_ start with a dot for MS-DOS, - * GEMDOS does not care + * disallow names that _really_ start with a dot */ - space = !opts->atari; + space = 1; c = 0; for (walk = res; len && walk - res < 8; walk++) { c = *name++; len--; if (opts->name_check != 'r' && strchr(bad_chars, c)) return -EINVAL; - if (opts->name_check == 's' && strchr(bad_if_strict(opts), c)) + if (opts->name_check == 's' && strchr(bad_if_strict, c)) return -EINVAL; if (c >= 'A' && c <= 'Z' && opts->name_check == 's') return -EINVAL; @@ -94,7 +88,7 @@ static int msdos_format_name(const unsigned char *name, int len, if (opts->name_check != 'r' && strchr(bad_chars, c)) return -EINVAL; if (opts->name_check == 's' && - strchr(bad_if_strict(opts), c)) + strchr(bad_if_strict, c)) return -EINVAL; if (c < ' ' || c == ':' || c == '\\') return -EINVAL; @@ -243,6 +237,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name, int is_dir, int is_hid, int cluster, struct timespec *ts, struct fat_slot_info *sinfo) { + struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb); struct msdos_dir_entry de; __le16 time, date; int err; @@ -252,7 +247,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name, if (is_hid) de.attr |= ATTR_HIDDEN; de.lcase = 0; - fat_date_unix2dos(ts->tv_sec, &time, &date); + fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); de.cdate = de.adate = 0; de.ctime = 0; de.ctime_cs = 0; diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index 6b6225ac4926..15c6faeec77c 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c @@ -19,6 +19,13 @@ #define NFSDDBG_FACILITY NFSDDBG_LOCKD +#ifdef CONFIG_LOCKD_V4 +#define nlm_stale_fh nlm4_stale_fh +#define nlm_failed nlm4_failed +#else +#define nlm_stale_fh nlm_lck_denied_nolocks +#define nlm_failed nlm_lck_denied_nolocks +#endif /* * Note: we hold the dentry use count while the file is open. */ @@ -47,12 +54,10 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp) return 0; case nfserr_dropit: return nlm_drop_reply; -#ifdef CONFIG_LOCKD_V4 case nfserr_stale: - return nlm4_stale_fh; -#endif + return nlm_stale_fh; default: - return nlm_lck_denied; + return nlm_failed; } } diff --git a/fs/partitions/check.c b/fs/partitions/check.c index efef715135d3..7d6b34e201db 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -344,18 +344,18 @@ static ssize_t whole_disk_show(struct device *dev, static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, whole_disk_show, NULL); -void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags) +int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags) { struct hd_struct *p; int err; p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) - return; + return -ENOMEM; if (!init_part_stats(p)) { - kfree(p); - return; + err = -ENOMEM; + goto out0; } p->start_sect = start; p->nr_sects = len; @@ -378,15 +378,31 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, /* delay uevent until 'holders' subdir is created */ p->dev.uevent_suppress = 1; - device_add(&p->dev); + err = device_add(&p->dev); + if (err) + goto out1; partition_sysfs_add_subdir(p); p->dev.uevent_suppress = 0; - if (flags & ADDPART_FLAG_WHOLEDISK) + if (flags & ADDPART_FLAG_WHOLEDISK) { err = device_create_file(&p->dev, &dev_attr_whole_disk); + if (err) + goto out2; + } /* suppress uevent if the disk supresses it */ if (!disk->dev.uevent_suppress) kobject_uevent(&p->dev.kobj, KOBJ_ADD); + + return 0; + +out2: + device_del(&p->dev); +out1: + put_device(&p->dev); + free_part_stats(p); +out0: + kfree(p); + return err; } /* Not exported, helper to add_disk(). */ @@ -483,10 +499,16 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) if (!size) continue; if (from + size > get_capacity(disk)) { - printk(" %s: p%d exceeds device capacity\n", + printk(KERN_ERR " %s: p%d exceeds device capacity\n", disk->disk_name, p); + continue; + } + res = add_partition(disk, p, from, size, state->parts[p].flags); + if (res) { + printk(KERN_ERR " %s: p%d could not be added: %d\n", + disk->disk_name, p, -res); + continue; } - add_partition(disk, p, from, size, state->parts[p].flags); #ifdef CONFIG_BLK_DEV_MD if (state->parts[p].flags & ADDPART_FLAG_RAID) md_autodetect_dev(bdev->bd_dev+p); diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c index e7b07006bc41..038a6022152f 100644 --- a/fs/partitions/efi.c +++ b/fs/partitions/efi.c @@ -95,13 +95,6 @@ #include "check.h" #include "efi.h" -#undef EFI_DEBUG -#ifdef EFI_DEBUG -#define Dprintk(x...) printk(KERN_DEBUG x) -#else -#define Dprintk(x...) -#endif - /* This allows a kernel command line option 'gpt' to override * the test for invalid PMBR. Not __initdata because reloading * the partition tables happens after init too. @@ -305,10 +298,10 @@ is_gpt_valid(struct block_device *bdev, u64 lba, /* Check the GUID Partition Table signature */ if (le64_to_cpu((*gpt)->signature) != GPT_HEADER_SIGNATURE) { - Dprintk("GUID Partition Table Header signature is wrong:" - "%lld != %lld\n", - (unsigned long long)le64_to_cpu((*gpt)->signature), - (unsigned long long)GPT_HEADER_SIGNATURE); + pr_debug("GUID Partition Table Header signature is wrong:" + "%lld != %lld\n", + (unsigned long long)le64_to_cpu((*gpt)->signature), + (unsigned long long)GPT_HEADER_SIGNATURE); goto fail; } @@ -318,9 +311,8 @@ is_gpt_valid(struct block_device *bdev, u64 lba, crc = efi_crc32((const unsigned char *) (*gpt), le32_to_cpu((*gpt)->header_size)); if (crc != origcrc) { - Dprintk - ("GUID Partition Table Header CRC is wrong: %x != %x\n", - crc, origcrc); + pr_debug("GUID Partition Table Header CRC is wrong: %x != %x\n", + crc, origcrc); goto fail; } (*gpt)->header_crc32 = cpu_to_le32(origcrc); @@ -328,9 +320,9 @@ is_gpt_valid(struct block_device *bdev, u64 lba, /* Check that the my_lba entry points to the LBA that contains * the GUID Partition Table */ if (le64_to_cpu((*gpt)->my_lba) != lba) { - Dprintk("GPT my_lba incorrect: %lld != %lld\n", - (unsigned long long)le64_to_cpu((*gpt)->my_lba), - (unsigned long long)lba); + pr_debug("GPT my_lba incorrect: %lld != %lld\n", + (unsigned long long)le64_to_cpu((*gpt)->my_lba), + (unsigned long long)lba); goto fail; } @@ -339,15 +331,15 @@ is_gpt_valid(struct block_device *bdev, u64 lba, */ lastlba = last_lba(bdev); if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) { - Dprintk("GPT: first_usable_lba incorrect: %lld > %lld\n", - (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba), - (unsigned long long)lastlba); + pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n", + (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba), + (unsigned long long)lastlba); goto fail; } if (le64_to_cpu((*gpt)->last_usable_lba) > lastlba) { - Dprintk("GPT: last_usable_lba incorrect: %lld > %lld\n", - (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba), - (unsigned long long)lastlba); + pr_debug("GPT: last_usable_lba incorrect: %lld > %lld\n", + (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba), + (unsigned long long)lastlba); goto fail; } @@ -360,7 +352,7 @@ is_gpt_valid(struct block_device *bdev, u64 lba, le32_to_cpu((*gpt)->sizeof_partition_entry)); if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) { - Dprintk("GUID Partitition Entry Array CRC check failed.\n"); + pr_debug("GUID Partitition Entry Array CRC check failed.\n"); goto fail_ptes; } @@ -616,7 +608,7 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev) return 0; } - Dprintk("GUID Partition Table is valid! Yea!\n"); + pr_debug("GUID Partition Table is valid! Yea!\n"); for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) { if (!is_pte_valid(&ptes[i], last_lba(bdev))) diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c index 0fdda2e8a4cc..8652fb99e962 100644 --- a/fs/partitions/ldm.c +++ b/fs/partitions/ldm.c @@ -133,17 +133,17 @@ static bool ldm_parse_privhead(const u8 *data, struct privhead *ph) bool is_vista = false; BUG_ON(!data || !ph); - if (MAGIC_PRIVHEAD != BE64(data)) { + if (MAGIC_PRIVHEAD != get_unaligned_be64(data)) { ldm_error("Cannot find PRIVHEAD structure. LDM database is" " corrupt. Aborting."); return false; } - ph->ver_major = BE16(data + 0x000C); - ph->ver_minor = BE16(data + 0x000E); - ph->logical_disk_start = BE64(data + 0x011B); - ph->logical_disk_size = BE64(data + 0x0123); - ph->config_start = BE64(data + 0x012B); - ph->config_size = BE64(data + 0x0133); + ph->ver_major = get_unaligned_be16(data + 0x000C); + ph->ver_minor = get_unaligned_be16(data + 0x000E); + ph->logical_disk_start = get_unaligned_be64(data + 0x011B); + ph->logical_disk_size = get_unaligned_be64(data + 0x0123); + ph->config_start = get_unaligned_be64(data + 0x012B); + ph->config_size = get_unaligned_be64(data + 0x0133); /* Version 2.11 is Win2k/XP and version 2.12 is Vista. */ if (ph->ver_major == 2 && ph->ver_minor == 12) is_vista = true; @@ -191,14 +191,14 @@ static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc) { BUG_ON (!data || !toc); - if (MAGIC_TOCBLOCK != BE64 (data)) { + if (MAGIC_TOCBLOCK != get_unaligned_be64(data)) { ldm_crit ("Cannot find TOCBLOCK, database may be corrupt."); return false; } strncpy (toc->bitmap1_name, data + 0x24, sizeof (toc->bitmap1_name)); toc->bitmap1_name[sizeof (toc->bitmap1_name) - 1] = 0; - toc->bitmap1_start = BE64 (data + 0x2E); - toc->bitmap1_size = BE64 (data + 0x36); + toc->bitmap1_start = get_unaligned_be64(data + 0x2E); + toc->bitmap1_size = get_unaligned_be64(data + 0x36); if (strncmp (toc->bitmap1_name, TOC_BITMAP1, sizeof (toc->bitmap1_name)) != 0) { @@ -208,8 +208,8 @@ static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc) } strncpy (toc->bitmap2_name, data + 0x46, sizeof (toc->bitmap2_name)); toc->bitmap2_name[sizeof (toc->bitmap2_name) - 1] = 0; - toc->bitmap2_start = BE64 (data + 0x50); - toc->bitmap2_size = BE64 (data + 0x58); + toc->bitmap2_start = get_unaligned_be64(data + 0x50); + toc->bitmap2_size = get_unaligned_be64(data + 0x58); if (strncmp (toc->bitmap2_name, TOC_BITMAP2, sizeof (toc->bitmap2_name)) != 0) { ldm_crit ("TOCBLOCK's second bitmap is '%s', should be '%s'.", @@ -237,22 +237,22 @@ static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm) { BUG_ON (!data || !vm); - if (MAGIC_VMDB != BE32 (data)) { + if (MAGIC_VMDB != get_unaligned_be32(data)) { ldm_crit ("Cannot find the VMDB, database may be corrupt."); return false; } - vm->ver_major = BE16 (data + 0x12); - vm->ver_minor = BE16 (data + 0x14); + vm->ver_major = get_unaligned_be16(data + 0x12); + vm->ver_minor = get_unaligned_be16(data + 0x14); if ((vm->ver_major != 4) || (vm->ver_minor != 10)) { ldm_error ("Expected VMDB version %d.%d, got %d.%d. " "Aborting.", 4, 10, vm->ver_major, vm->ver_minor); return false; } - vm->vblk_size = BE32 (data + 0x08); - vm->vblk_offset = BE32 (data + 0x0C); - vm->last_vblk_seq = BE32 (data + 0x04); + vm->vblk_size = get_unaligned_be32(data + 0x08); + vm->vblk_offset = get_unaligned_be32(data + 0x0C); + vm->last_vblk_seq = get_unaligned_be32(data + 0x04); ldm_debug ("Parsed VMDB successfully."); return true; @@ -507,7 +507,7 @@ static bool ldm_validate_vmdb (struct block_device *bdev, unsigned long base, goto out; /* Already logged */ /* Are there uncommitted transactions? */ - if (BE16(data + 0x10) != 0x01) { + if (get_unaligned_be16(data + 0x10) != 0x01) { ldm_crit ("Database is not in a consistent state. Aborting."); goto out; } @@ -802,7 +802,7 @@ static bool ldm_parse_cmp3 (const u8 *buffer, int buflen, struct vblk *vb) return false; len += VBLK_SIZE_CMP3; - if (len != BE32 (buffer + 0x14)) + if (len != get_unaligned_be32(buffer + 0x14)) return false; comp = &vb->vblk.comp; @@ -851,7 +851,7 @@ static int ldm_parse_dgr3 (const u8 *buffer, int buflen, struct vblk *vb) return false; len += VBLK_SIZE_DGR3; - if (len != BE32 (buffer + 0x14)) + if (len != get_unaligned_be32(buffer + 0x14)) return false; dgrp = &vb->vblk.dgrp; @@ -895,7 +895,7 @@ static bool ldm_parse_dgr4 (const u8 *buffer, int buflen, struct vblk *vb) return false; len += VBLK_SIZE_DGR4; - if (len != BE32 (buffer + 0x14)) + if (len != get_unaligned_be32(buffer + 0x14)) return false; dgrp = &vb->vblk.dgrp; @@ -931,7 +931,7 @@ static bool ldm_parse_dsk3 (const u8 *buffer, int buflen, struct vblk *vb) return false; len += VBLK_SIZE_DSK3; - if (len != BE32 (buffer + 0x14)) + if (len != get_unaligned_be32(buffer + 0x14)) return false; disk = &vb->vblk.disk; @@ -968,7 +968,7 @@ static bool ldm_parse_dsk4 (const u8 *buffer, int buflen, struct vblk *vb) return false; len += VBLK_SIZE_DSK4; - if (len != BE32 (buffer + 0x14)) + if (len != get_unaligned_be32(buffer + 0x14)) return false; disk = &vb->vblk.disk; @@ -1034,14 +1034,14 @@ static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb) return false; } len += VBLK_SIZE_PRT3; - if (len > BE32(buffer + 0x14)) { + if (len > get_unaligned_be32(buffer + 0x14)) { ldm_error("len %d > BE32(buffer + 0x14) %d", len, - BE32(buffer + 0x14)); + get_unaligned_be32(buffer + 0x14)); return false; } part = &vb->vblk.part; - part->start = BE64(buffer + 0x24 + r_name); - part->volume_offset = BE64(buffer + 0x2C + r_name); + part->start = get_unaligned_be64(buffer + 0x24 + r_name); + part->volume_offset = get_unaligned_be64(buffer + 0x2C + r_name); part->size = ldm_get_vnum(buffer + 0x34 + r_name); part->parent_id = ldm_get_vnum(buffer + 0x34 + r_size); part->disk_id = ldm_get_vnum(buffer + 0x34 + r_parent); @@ -1139,9 +1139,9 @@ static bool ldm_parse_vol5(const u8 *buffer, int buflen, struct vblk *vb) return false; } len += VBLK_SIZE_VOL5; - if (len > BE32(buffer + 0x14)) { + if (len > get_unaligned_be32(buffer + 0x14)) { ldm_error("len %d > BE32(buffer + 0x14) %d", len, - BE32(buffer + 0x14)); + get_unaligned_be32(buffer + 0x14)); return false; } volu = &vb->vblk.volu; @@ -1294,9 +1294,9 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags) BUG_ON (!data || !frags); - group = BE32 (data + 0x08); - rec = BE16 (data + 0x0C); - num = BE16 (data + 0x0E); + group = get_unaligned_be32(data + 0x08); + rec = get_unaligned_be16(data + 0x0C); + num = get_unaligned_be16(data + 0x0E); if ((num < 1) || (num > 4)) { ldm_error ("A VBLK claims to have %d parts.", num); return false; @@ -1425,12 +1425,12 @@ static bool ldm_get_vblks (struct block_device *bdev, unsigned long base, } for (v = 0; v < perbuf; v++, data+=size) { /* For each vblk */ - if (MAGIC_VBLK != BE32 (data)) { + if (MAGIC_VBLK != get_unaligned_be32(data)) { ldm_error ("Expected to find a VBLK."); goto out; } - recs = BE16 (data + 0x0E); /* Number of records */ + recs = get_unaligned_be16(data + 0x0E); /* Number of records */ if (recs == 1) { if (!ldm_ldmdb_add (data, size, ldb)) goto out; /* Already logged */ diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h index 80f63b5fdd9f..30e08e809c1d 100644 --- a/fs/partitions/ldm.h +++ b/fs/partitions/ldm.h @@ -98,11 +98,6 @@ struct parsed_partitions; #define TOC_BITMAP1 "config" /* Names of the two defined */ #define TOC_BITMAP2 "log" /* bitmaps in the TOCBLOCK. */ -/* Most numbers we deal with are big-endian and won't be aligned. */ -#define BE16(x) ((u16)be16_to_cpu(get_unaligned((__be16*)(x)))) -#define BE32(x) ((u32)be32_to_cpu(get_unaligned((__be32*)(x)))) -#define BE64(x) ((u64)be64_to_cpu(get_unaligned((__be64*)(x)))) - /* Borrowed from msdos.c */ #define SYS_IND(p) (get_unaligned(&(p)->sys_ind)) diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig new file mode 100644 index 000000000000..73cd7a418f06 --- /dev/null +++ b/fs/proc/Kconfig @@ -0,0 +1,59 @@ +config PROC_FS + bool "/proc file system support" if EMBEDDED + default y + help + This is a virtual file system providing information about the status + of the system. "Virtual" means that it doesn't take up any space on + your hard disk: the files are created on the fly by the kernel when + you try to access them. Also, you cannot read the files with older + version of the program less: you need to use more or cat. + + It's totally cool; for example, "cat /proc/interrupts" gives + information about what the different IRQs are used for at the moment + (there is a small number of Interrupt ReQuest lines in your computer + that are used by the attached devices to gain the CPU's attention -- + often a source of trouble if two devices are mistakenly configured + to use the same IRQ). The program procinfo to display some + information about your system gathered from the /proc file system. + + Before you can use the /proc file system, it has to be mounted, + meaning it has to be given a location in the directory hierarchy. + That location should be /proc. A command such as "mount -t proc proc + /proc" or the equivalent line in /etc/fstab does the job. + + The /proc file system is explained in the file + <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage + ("man 5 proc"). + + This option will enlarge your kernel by about 67 KB. Several + programs depend on this, so everyone should say Y here. + +config PROC_KCORE + bool "/proc/kcore support" if !ARM + depends on PROC_FS && MMU + +config PROC_VMCORE + bool "/proc/vmcore support (EXPERIMENTAL)" + depends on PROC_FS && CRASH_DUMP + default y + help + Exports the dump image of crashed kernel in ELF format. + +config PROC_SYSCTL + bool "Sysctl support (/proc/sys)" if EMBEDDED + depends on PROC_FS + select SYSCTL + default y + ---help--- + The sysctl interface provides a means of dynamically changing + certain kernel parameters and variables on the fly without requiring + a recompile of the kernel or reboot of the system. The primary + interface is through /proc/sys. If you say Y here a tree of + modifiable sysctl entries will be generated beneath the + /proc/sys directory. They are explained in the files + in <file:Documentation/sysctl/>. Note that enabling this + option will enlarge the kernel by at least 8 KB. + + As it is generally a good thing, you should say Y here unless + building a kernel for install/rescue disks or your system is very + limited in memory. diff --git a/fs/proc/base.c b/fs/proc/base.c index 58c3e6a8e15e..a891fe4cb43b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2376,29 +2376,82 @@ static int proc_base_fill_cache(struct file *filp, void *dirent, } #ifdef CONFIG_TASK_IO_ACCOUNTING -static int proc_pid_io_accounting(struct task_struct *task, char *buffer) -{ +static int do_io_accounting(struct task_struct *task, char *buffer, int whole) +{ + u64 rchar, wchar, syscr, syscw; + struct task_io_accounting ioac; + + if (!whole) { + rchar = task->rchar; + wchar = task->wchar; + syscr = task->syscr; + syscw = task->syscw; + memcpy(&ioac, &task->ioac, sizeof(ioac)); + } else { + unsigned long flags; + struct task_struct *t = task; + rchar = wchar = syscr = syscw = 0; + memset(&ioac, 0, sizeof(ioac)); + + rcu_read_lock(); + do { + rchar += t->rchar; + wchar += t->wchar; + syscr += t->syscr; + syscw += t->syscw; + + ioac.read_bytes += t->ioac.read_bytes; + ioac.write_bytes += t->ioac.write_bytes; + ioac.cancelled_write_bytes += + t->ioac.cancelled_write_bytes; + t = next_thread(t); + } while (t != task); + rcu_read_unlock(); + + if (lock_task_sighand(task, &flags)) { + struct signal_struct *sig = task->signal; + + rchar += sig->rchar; + wchar += sig->wchar; + syscr += sig->syscr; + syscw += sig->syscw; + + ioac.read_bytes += sig->ioac.read_bytes; + ioac.write_bytes += sig->ioac.write_bytes; + ioac.cancelled_write_bytes += + sig->ioac.cancelled_write_bytes; + + unlock_task_sighand(task, &flags); + } + } + return sprintf(buffer, -#ifdef CONFIG_TASK_XACCT "rchar: %llu\n" "wchar: %llu\n" "syscr: %llu\n" "syscw: %llu\n" -#endif "read_bytes: %llu\n" "write_bytes: %llu\n" "cancelled_write_bytes: %llu\n", -#ifdef CONFIG_TASK_XACCT - (unsigned long long)task->rchar, - (unsigned long long)task->wchar, - (unsigned long long)task->syscr, - (unsigned long long)task->syscw, -#endif - (unsigned long long)task->ioac.read_bytes, - (unsigned long long)task->ioac.write_bytes, - (unsigned long long)task->ioac.cancelled_write_bytes); + (unsigned long long)rchar, + (unsigned long long)wchar, + (unsigned long long)syscr, + (unsigned long long)syscw, + (unsigned long long)ioac.read_bytes, + (unsigned long long)ioac.write_bytes, + (unsigned long long)ioac.cancelled_write_bytes); +} + +static int proc_tid_io_accounting(struct task_struct *task, char *buffer) +{ + return do_io_accounting(task, buffer, 0); } -#endif + +static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) +{ + return do_io_accounting(task, buffer, 1); +} +#endif /* CONFIG_TASK_IO_ACCOUNTING */ /* * Thread groups @@ -2470,7 +2523,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter), #endif #ifdef CONFIG_TASK_IO_ACCOUNTING - INF("io", S_IRUGO, pid_io_accounting), + INF("io", S_IRUGO, tgid_io_accounting), #endif }; @@ -2797,6 +2850,9 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_FAULT_INJECTION REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), #endif +#ifdef CONFIG_TASK_IO_ACCOUNTING + INF("io", S_IRUGO, tid_io_accounting), +#endif }; static int proc_tid_base_readdir(struct file * filp, diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 43e54e86cefd..bc0a0dd2d844 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -597,6 +597,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, ent->pde_users = 0; spin_lock_init(&ent->pde_unload_lock); ent->pde_unload_completion = NULL; + INIT_LIST_HEAD(&ent->pde_openers); out: return ent; } @@ -789,6 +790,19 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) spin_unlock(&de->pde_unload_lock); continue_removing: + spin_lock(&de->pde_unload_lock); + while (!list_empty(&de->pde_openers)) { + struct pde_opener *pdeo; + + pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh); + list_del(&pdeo->lh); + spin_unlock(&de->pde_unload_lock); + pdeo->release(pdeo->inode, pdeo->file); + kfree(pdeo); + spin_lock(&de->pde_unload_lock); + } + spin_unlock(&de->pde_unload_lock); + if (S_ISDIR(de->mode)) parent->nlink--; de->nlink = 0; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index b08d10017911..02eca2ed9dd7 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -111,27 +111,25 @@ int __init proc_init_inodecache(void) return 0; } -static int proc_remount(struct super_block *sb, int *flags, char *data) -{ - *flags |= MS_NODIRATIME; - return 0; -} - static const struct super_operations proc_sops = { .alloc_inode = proc_alloc_inode, .destroy_inode = proc_destroy_inode, .drop_inode = generic_delete_inode, .delete_inode = proc_delete_inode, .statfs = simple_statfs, - .remount_fs = proc_remount, }; -static void pde_users_dec(struct proc_dir_entry *pde) +static void __pde_users_dec(struct proc_dir_entry *pde) { - spin_lock(&pde->pde_unload_lock); pde->pde_users--; if (pde->pde_unload_completion && pde->pde_users == 0) complete(pde->pde_unload_completion); +} + +static void pde_users_dec(struct proc_dir_entry *pde) +{ + spin_lock(&pde->pde_unload_lock); + __pde_users_dec(pde); spin_unlock(&pde->pde_unload_lock); } @@ -318,36 +316,97 @@ static int proc_reg_open(struct inode *inode, struct file *file) struct proc_dir_entry *pde = PDE(inode); int rv = 0; int (*open)(struct inode *, struct file *); + int (*release)(struct inode *, struct file *); + struct pde_opener *pdeo; + + /* + * What for, you ask? Well, we can have open, rmmod, remove_proc_entry + * sequence. ->release won't be called because ->proc_fops will be + * cleared. Depending on complexity of ->release, consequences vary. + * + * We can't wait for mercy when close will be done for real, it's + * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release + * by hand in remove_proc_entry(). For this, save opener's credentials + * for later. + */ + pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL); + if (!pdeo) + return -ENOMEM; spin_lock(&pde->pde_unload_lock); if (!pde->proc_fops) { spin_unlock(&pde->pde_unload_lock); + kfree(pdeo); return rv; } pde->pde_users++; open = pde->proc_fops->open; + release = pde->proc_fops->release; spin_unlock(&pde->pde_unload_lock); if (open) rv = open(inode, file); - pde_users_dec(pde); + spin_lock(&pde->pde_unload_lock); + if (rv == 0 && release) { + /* To know what to release. */ + pdeo->inode = inode; + pdeo->file = file; + /* Strictly for "too late" ->release in proc_reg_release(). */ + pdeo->release = release; + list_add(&pdeo->lh, &pde->pde_openers); + } else + kfree(pdeo); + __pde_users_dec(pde); + spin_unlock(&pde->pde_unload_lock); return rv; } +static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde, + struct inode *inode, struct file *file) +{ + struct pde_opener *pdeo; + + list_for_each_entry(pdeo, &pde->pde_openers, lh) { + if (pdeo->inode == inode && pdeo->file == file) + return pdeo; + } + return NULL; +} + static int proc_reg_release(struct inode *inode, struct file *file) { struct proc_dir_entry *pde = PDE(inode); int rv = 0; int (*release)(struct inode *, struct file *); + struct pde_opener *pdeo; spin_lock(&pde->pde_unload_lock); + pdeo = find_pde_opener(pde, inode, file); if (!pde->proc_fops) { - spin_unlock(&pde->pde_unload_lock); + /* + * Can't simply exit, __fput() will think that everything is OK, + * and move on to freeing struct file. remove_proc_entry() will + * find slacker in opener's list and will try to do non-trivial + * things with struct file. Therefore, remove opener from list. + * + * But if opener is removed from list, who will ->release it? + */ + if (pdeo) { + list_del(&pdeo->lh); + spin_unlock(&pde->pde_unload_lock); + rv = pdeo->release(inode, file); + kfree(pdeo); + } else + spin_unlock(&pde->pde_unload_lock); return rv; } pde->pde_users++; release = pde->proc_fops->release; + if (pdeo) { + list_del(&pdeo->lh); + kfree(pdeo); + } spin_unlock(&pde->pde_unload_lock); if (release) diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 28cbca805905..442202314d53 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -63,6 +63,7 @@ extern const struct file_operations proc_smaps_operations; extern const struct file_operations proc_clear_refs_operations; extern const struct file_operations proc_pagemap_operations; extern const struct file_operations proc_net_operations; +extern const struct file_operations proc_kmsg_operations; extern const struct inode_operations proc_net_inode_operations; void free_proc_entry(struct proc_dir_entry *de); @@ -88,3 +89,10 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino, struct dentry *dentry); int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, filldir_t filldir); + +struct pde_opener { + struct inode *inode; + struct file *file; + int (*release)(struct inode *, struct file *); + struct list_head lh; +}; diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index e78c81fcf547..c2370c76fb71 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -23,6 +23,10 @@ #define CORE_STR "CORE" +#ifndef ELF_CORE_EFLAGS +#define ELF_CORE_EFLAGS 0 +#endif + static int open_kcore(struct inode * inode, struct file * filp) { return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; @@ -164,11 +168,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) elf->e_entry = 0; elf->e_phoff = sizeof(struct elfhdr); elf->e_shoff = 0; -#if defined(CONFIG_H8300) - elf->e_flags = ELF_FLAGS; -#else - elf->e_flags = 0; -#endif + elf->e_flags = ELF_CORE_EFLAGS; elf->e_ehsize = sizeof(struct elfhdr); elf->e_phentsize= sizeof(struct elf_phdr); elf->e_phnum = nphdr; diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c index ff3b90b56e9d..9fd5df3f40ce 100644 --- a/fs/proc/kmsg.c +++ b/fs/proc/kmsg.c @@ -15,6 +15,8 @@ #include <asm/uaccess.h> #include <asm/io.h> +#include "internal.h" + extern wait_queue_head_t log_wait; extern int do_syslog(int type, char __user *bug, int count); diff --git a/fs/quota.c b/fs/quota.c index db1cc9f3c7aa..7f4386ebc23a 100644 --- a/fs/quota.c +++ b/fs/quota.c @@ -186,7 +186,7 @@ static void quota_sync_sb(struct super_block *sb, int type) void sync_dquots(struct super_block *sb, int type) { - int cnt, dirty; + int cnt; if (sb) { if (sb->s_qcop->quota_sync) @@ -198,11 +198,17 @@ void sync_dquots(struct super_block *sb, int type) restart: list_for_each_entry(sb, &super_blocks, s_list) { /* This test just improves performance so it needn't be reliable... */ - for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++) - if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt) - && info_any_dirty(&sb_dqopt(sb)->info[cnt])) - dirty = 1; - if (!dirty) + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + if (type != -1 && type != cnt) + continue; + if (!sb_has_quota_enabled(sb, cnt)) + continue; + if (!info_dirty(&sb_dqopt(sb)->info[cnt]) && + list_empty(&sb_dqopt(sb)->info[cnt].dqi_dirty_list)) + continue; + break; + } + if (cnt == MAXQUOTAS) continue; sb->s_count++; spin_unlock(&sb_lock); diff --git a/fs/quota_v1.c b/fs/quota_v1.c index a6cf9269105c..5ae15b13eeb0 100644 --- a/fs/quota_v1.c +++ b/fs/quota_v1.c @@ -1,6 +1,7 @@ #include <linux/errno.h> #include <linux/fs.h> #include <linux/quota.h> +#include <linux/quotaops.h> #include <linux/dqblk_v1.h> #include <linux/quotaio_v1.h> #include <linux/kernel.h> diff --git a/fs/quota_v2.c b/fs/quota_v2.c index 234ada903633..b53827dc02d9 100644 --- a/fs/quota_v2.c +++ b/fs/quota_v2.c @@ -11,6 +11,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/slab.h> +#include <linux/quotaops.h> #include <asm/byteorder.h> diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index e396b2fa4743..c8f60ee183b5 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -34,15 +34,10 @@ ** from within kupdate, it will ignore the immediate flag */ -#include <asm/uaccess.h> -#include <asm/system.h> - #include <linux/time.h> #include <linux/semaphore.h> - #include <linux/vmalloc.h> #include <linux/reiserfs_fs.h> - #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fcntl.h> @@ -54,6 +49,9 @@ #include <linux/writeback.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> +#include <linux/uaccess.h> + +#include <asm/system.h> /* gets a struct reiserfs_journal_list * from a list head */ #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ @@ -558,13 +556,13 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, static inline void lock_journal(struct super_block *p_s_sb) { PROC_INFO_INC(p_s_sb, journal.lock_journal); - down(&SB_JOURNAL(p_s_sb)->j_lock); + mutex_lock(&SB_JOURNAL(p_s_sb)->j_mutex); } /* unlock the current transaction */ static inline void unlock_journal(struct super_block *p_s_sb) { - up(&SB_JOURNAL(p_s_sb)->j_lock); + mutex_unlock(&SB_JOURNAL(p_s_sb)->j_mutex); } static inline void get_journal_list(struct reiserfs_journal_list *jl) @@ -1045,9 +1043,9 @@ static int flush_commit_list(struct super_block *s, } /* make sure nobody is trying to flush this one at the same time */ - down(&jl->j_commit_lock); + mutex_lock(&jl->j_commit_mutex); if (!journal_list_still_alive(s, trans_id)) { - up(&jl->j_commit_lock); + mutex_unlock(&jl->j_commit_mutex); goto put_jl; } BUG_ON(jl->j_trans_id == 0); @@ -1057,7 +1055,7 @@ static int flush_commit_list(struct super_block *s, if (flushall) { atomic_set(&(jl->j_older_commits_done), 1); } - up(&jl->j_commit_lock); + mutex_unlock(&jl->j_commit_mutex); goto put_jl; } @@ -1181,7 +1179,7 @@ static int flush_commit_list(struct super_block *s, if (flushall) { atomic_set(&(jl->j_older_commits_done), 1); } - up(&jl->j_commit_lock); + mutex_unlock(&jl->j_commit_mutex); put_jl: put_journal_list(s, jl); @@ -1411,8 +1409,8 @@ static int flush_journal_list(struct super_block *s, /* if flushall == 0, the lock is already held */ if (flushall) { - down(&journal->j_flush_sem); - } else if (!down_trylock(&journal->j_flush_sem)) { + mutex_lock(&journal->j_flush_mutex); + } else if (mutex_trylock(&journal->j_flush_mutex)) { BUG(); } @@ -1642,7 +1640,7 @@ static int flush_journal_list(struct super_block *s, jl->j_state = 0; put_journal_list(s, jl); if (flushall) - up(&journal->j_flush_sem); + mutex_unlock(&journal->j_flush_mutex); put_fs_excl(); return err; } @@ -1772,12 +1770,12 @@ static int kupdate_transactions(struct super_block *s, struct reiserfs_journal *journal = SB_JOURNAL(s); chunk.nr = 0; - down(&journal->j_flush_sem); + mutex_lock(&journal->j_flush_mutex); if (!journal_list_still_alive(s, orig_trans_id)) { goto done; } - /* we've got j_flush_sem held, nobody is going to delete any + /* we've got j_flush_mutex held, nobody is going to delete any * of these lists out from underneath us */ while ((num_trans && transactions_flushed < num_trans) || @@ -1812,7 +1810,7 @@ static int kupdate_transactions(struct super_block *s, } done: - up(&journal->j_flush_sem); + mutex_unlock(&journal->j_flush_mutex); return ret; } @@ -2556,7 +2554,7 @@ static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s) INIT_LIST_HEAD(&jl->j_working_list); INIT_LIST_HEAD(&jl->j_tail_bh_list); INIT_LIST_HEAD(&jl->j_bh_list); - sema_init(&jl->j_commit_lock, 1); + mutex_init(&jl->j_commit_mutex); SB_JOURNAL(s)->j_num_lists++; get_journal_list(jl); return jl; @@ -2837,8 +2835,8 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name, journal->j_last = NULL; journal->j_first = NULL; init_waitqueue_head(&(journal->j_join_wait)); - sema_init(&journal->j_lock, 1); - sema_init(&journal->j_flush_sem, 1); + mutex_init(&journal->j_mutex); + mutex_init(&journal->j_flush_mutex); journal->j_trans_id = 10; journal->j_mount_id = 10; @@ -4030,7 +4028,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, * the new transaction is fully setup, and we've already flushed the * ordered bh list */ - down(&jl->j_commit_lock); + mutex_lock(&jl->j_commit_mutex); /* save the transaction id in case we need to commit it later */ commit_trans_id = jl->j_trans_id; @@ -4196,7 +4194,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, lock_kernel(); } BUG_ON(!list_empty(&jl->j_tail_bh_list)); - up(&jl->j_commit_lock); + mutex_unlock(&jl->j_commit_mutex); /* honor the flush wishes from the caller, simple commits can ** be done outside the journal lock, they are done below diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 1d40f2bd1970..2ec748ba0bd3 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -22,6 +22,7 @@ #include <linux/blkdev.h> #include <linux/buffer_head.h> #include <linux/exportfs.h> +#include <linux/quotaops.h> #include <linux/vfs.h> #include <linux/mnt_namespace.h> #include <linux/mount.h> @@ -182,7 +183,7 @@ static int finish_unfinished(struct super_block *s) int ret = reiserfs_quota_on_mount(s, i); if (ret < 0) reiserfs_warning(s, - "reiserfs: cannot turn on journalled quota: error %d", + "reiserfs: cannot turn on journaled quota: error %d", ret); } } @@ -876,7 +877,9 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin mount options were selected. */ unsigned long *blocks, /* strtol-ed from NNN of resize=NNN */ char **jdev_name, - unsigned int *commit_max_age) + unsigned int *commit_max_age, + char **qf_names, + unsigned int *qfmt) { int c; char *arg = NULL; @@ -992,9 +995,11 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin if (c == 'u' || c == 'g') { int qtype = c == 'u' ? USRQUOTA : GRPQUOTA; - if (sb_any_quota_enabled(s)) { + if ((sb_any_quota_enabled(s) || + sb_any_quota_suspended(s)) && + (!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) { reiserfs_warning(s, - "reiserfs_parse_options: cannot change journalled quota options when quota turned on."); + "reiserfs_parse_options: cannot change journaled quota options when quota turned on."); return 0; } if (*arg) { /* Some filename specified? */ @@ -1011,46 +1016,54 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin "reiserfs_parse_options: quotafile must be on filesystem root."); return 0; } - REISERFS_SB(s)->s_qf_names[qtype] = + qf_names[qtype] = kmalloc(strlen(arg) + 1, GFP_KERNEL); - if (!REISERFS_SB(s)->s_qf_names[qtype]) { + if (!qf_names[qtype]) { reiserfs_warning(s, "reiserfs_parse_options: not enough memory for storing quotafile name."); return 0; } - strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg); + strcpy(qf_names[qtype], arg); *mount_options |= 1 << REISERFS_QUOTA; } else { - kfree(REISERFS_SB(s)->s_qf_names[qtype]); - REISERFS_SB(s)->s_qf_names[qtype] = NULL; + if (qf_names[qtype] != + REISERFS_SB(s)->s_qf_names[qtype]) + kfree(qf_names[qtype]); + qf_names[qtype] = NULL; } } if (c == 'f') { if (!strcmp(arg, "vfsold")) - REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_OLD; + *qfmt = QFMT_VFS_OLD; else if (!strcmp(arg, "vfsv0")) - REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_V0; + *qfmt = QFMT_VFS_V0; else { reiserfs_warning(s, "reiserfs_parse_options: unknown quota format specified."); return 0; } + if ((sb_any_quota_enabled(s) || + sb_any_quota_suspended(s)) && + *qfmt != REISERFS_SB(s)->s_jquota_fmt) { + reiserfs_warning(s, + "reiserfs_parse_options: cannot change journaled quota options when quota turned on."); + return 0; + } } #else if (c == 'u' || c == 'g' || c == 'f') { reiserfs_warning(s, - "reiserfs_parse_options: journalled quota options not supported."); + "reiserfs_parse_options: journaled quota options not supported."); return 0; } #endif } #ifdef CONFIG_QUOTA - if (!REISERFS_SB(s)->s_jquota_fmt - && (REISERFS_SB(s)->s_qf_names[USRQUOTA] - || REISERFS_SB(s)->s_qf_names[GRPQUOTA])) { + if (!REISERFS_SB(s)->s_jquota_fmt && !*qfmt + && (qf_names[USRQUOTA] || qf_names[GRPQUOTA])) { reiserfs_warning(s, - "reiserfs_parse_options: journalled quota format not specified."); + "reiserfs_parse_options: journaled quota format not specified."); return 0; } /* This checking is not precise wrt the quota type but for our purposes it is sufficient */ @@ -1130,6 +1143,21 @@ static void handle_attrs(struct super_block *s) } } +#ifdef CONFIG_QUOTA +static void handle_quota_files(struct super_block *s, char **qf_names, + unsigned int *qfmt) +{ + int i; + + for (i = 0; i < MAXQUOTAS; i++) { + if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i]) + kfree(REISERFS_SB(s)->s_qf_names[i]); + REISERFS_SB(s)->s_qf_names[i] = qf_names[i]; + } + REISERFS_SB(s)->s_jquota_fmt = *qfmt; +} +#endif + static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) { struct reiserfs_super_block *rs; @@ -1141,23 +1169,30 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) struct reiserfs_journal *journal = SB_JOURNAL(s); char *new_opts = kstrdup(arg, GFP_KERNEL); int err; + char *qf_names[MAXQUOTAS]; + unsigned int qfmt = 0; #ifdef CONFIG_QUOTA int i; + + memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names)); #endif rs = SB_DISK_SUPER_BLOCK(s); if (!reiserfs_parse_options - (s, arg, &mount_options, &blocks, NULL, &commit_max_age)) { + (s, arg, &mount_options, &blocks, NULL, &commit_max_age, + qf_names, &qfmt)) { #ifdef CONFIG_QUOTA - for (i = 0; i < MAXQUOTAS; i++) { - kfree(REISERFS_SB(s)->s_qf_names[i]); - REISERFS_SB(s)->s_qf_names[i] = NULL; - } + for (i = 0; i < MAXQUOTAS; i++) + if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i]) + kfree(qf_names[i]); #endif err = -EINVAL; goto out_err; } +#ifdef CONFIG_QUOTA + handle_quota_files(s, qf_names, &qfmt); +#endif handle_attrs(s); @@ -1570,6 +1605,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) char *jdev_name; struct reiserfs_sb_info *sbi; int errval = -EINVAL; + char *qf_names[MAXQUOTAS] = {}; + unsigned int qfmt = 0; save_mount_options(s, data); @@ -1597,9 +1634,12 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) jdev_name = NULL; if (reiserfs_parse_options (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, - &commit_max_age) == 0) { + &commit_max_age, qf_names, &qfmt) == 0) { goto error; } +#ifdef CONFIG_QUOTA + handle_quota_files(s, qf_names, &qfmt); +#endif if (blocks) { SWARN(silent, s, "jmacd-7: reiserfs_fill_super: resize option " @@ -1819,7 +1859,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) return (0); - error: +error: if (jinit_done) { /* kill the commit thread, free journal ram */ journal_release_error(NULL, s); } @@ -1830,10 +1870,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) #ifdef CONFIG_QUOTA { int j; - for (j = 0; j < MAXQUOTAS; j++) { - kfree(sbi->s_qf_names[j]); - sbi->s_qf_names[j] = NULL; - } + for (j = 0; j < MAXQUOTAS; j++) + kfree(qf_names[j]); } #endif kfree(sbi); @@ -1980,7 +2018,7 @@ static int reiserfs_release_dquot(struct dquot *dquot) static int reiserfs_mark_dquot_dirty(struct dquot *dquot) { - /* Are we journalling quotas? */ + /* Are we journaling quotas? */ if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { dquot_mark_dquot_dirty(dquot); @@ -2026,6 +2064,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, int err; struct nameidata nd; struct inode *inode; + struct reiserfs_transaction_handle th; if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA))) return -EINVAL; @@ -2053,17 +2092,28 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, } mark_inode_dirty(inode); } - /* Not journalling quota? No more tests needed... */ - if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] && - !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) { - path_put(&nd.path); - return vfs_quota_on(sb, type, format_id, path, 0); - } - /* Quotafile not of fs root? */ - if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) - reiserfs_warning(sb, + /* Journaling quota? */ + if (REISERFS_SB(sb)->s_qf_names[type]) { + /* Quotafile not of fs root? */ + if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) + reiserfs_warning(sb, "reiserfs: Quota file not on filesystem root. " "Journalled quota will not work."); + } + + /* + * When we journal data on quota file, we have to flush journal to see + * all updates to the file when we bypass pagecache... + */ + if (reiserfs_file_data_log(inode)) { + /* Just start temporary transaction and finish it */ + err = journal_begin(&th, sb, 1); + if (err) + return err; + err = journal_end_sync(&th, sb, 1); + if (err) + return err; + } path_put(&nd.path); return vfs_quota_on(sb, type, format_id, path, 0); } diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c index 5e90a95ad60b..056008db1377 100644 --- a/fs/reiserfs/xattr_security.c +++ b/fs/reiserfs/xattr_security.c @@ -6,8 +6,6 @@ #include <linux/reiserfs_xattr.h> #include <asm/uaccess.h> -#define XATTR_SECURITY_PREFIX "security." - static int security_get(struct inode *inode, const char *name, void *buffer, size_t size) { diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c index 024a938ca60f..60abe2bb1f98 100644 --- a/fs/reiserfs/xattr_trusted.c +++ b/fs/reiserfs/xattr_trusted.c @@ -7,8 +7,6 @@ #include <linux/reiserfs_xattr.h> #include <asm/uaccess.h> -#define XATTR_TRUSTED_PREFIX "trusted." - static int trusted_get(struct inode *inode, const char *name, void *buffer, size_t size) { diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c index 073f39364b11..1384efcb938e 100644 --- a/fs/reiserfs/xattr_user.c +++ b/fs/reiserfs/xattr_user.c @@ -10,8 +10,6 @@ # include <linux/reiserfs_acl.h> #endif -#define XATTR_USER_PREFIX "user." - static int user_get(struct inode *inode, const char *name, void *buffer, size_t size) { diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c index 8182f0542a21..8c177eb7e344 100644 --- a/fs/smbfs/cache.c +++ b/fs/smbfs/cache.c @@ -13,7 +13,6 @@ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/mm.h> -#include <linux/dirent.h> #include <linux/smb_fs.h> #include <linux/pagemap.h> #include <linux/net.h> diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c index d517a27b7f4b..ee536e8a649a 100644 --- a/fs/smbfs/proc.c +++ b/fs/smbfs/proc.c @@ -16,7 +16,6 @@ #include <linux/stat.h> #include <linux/fcntl.h> #include <linux/dcache.h> -#include <linux/dirent.h> #include <linux/nls.h> #include <linux/smp_lock.h> #include <linux/net.h> diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 506f724055c2..227c9d700040 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -76,6 +76,7 @@ #include <linux/errno.h> #include <linux/fs.h> +#include <linux/quotaops.h> #include <linux/slab.h> #include <linux/time.h> #include <linux/stat.h> diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c index b546ba69be82..155c10b4adbd 100644 --- a/fs/vfat/namei.c +++ b/fs/vfat/namei.c @@ -621,7 +621,7 @@ shortname: memcpy(de->name, msdos_name, MSDOS_NAME); de->attr = is_dir ? ATTR_DIR : ATTR_ARCH; de->lcase = lcase; - fat_date_unix2dos(ts->tv_sec, &time, &date); + fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); de->time = de->ctime = time; de->date = de->cdate = de->adate = date; de->ctime_cs = 0; |