diff options
Diffstat (limited to 'fs')
57 files changed, 2274 insertions, 1212 deletions
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index d4fc6095466d..7c3d5f923da1 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -23,6 +23,10 @@ config BINFMT_ELF ld.so (check the file <file:Documentation/Changes> for location and latest version). +config COMPAT_BINFMT_ELF + bool + depends on COMPAT && MMU + config BINFMT_ELF_FDPIC bool "Kernel support for FDPIC ELF binaries" default y diff --git a/fs/Makefile b/fs/Makefile index 500cf15cdb4b..1e7a11bd4da1 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -39,6 +39,7 @@ obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o obj-y += binfmt_script.o obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o +obj-$(CONFIG_COMPAT_BINFMT_ELF) += compat_binfmt_elf.o obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o obj-$(CONFIG_BINFMT_SOM) += binfmt_som.o obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o @@ -397,7 +397,7 @@ void fastcall __put_ioctx(struct kioctx *ctx) * This prevents races between the aio code path referencing the * req (after submitting it) and aio_complete() freeing the req. */ -static struct kiocb *FASTCALL(__aio_get_req(struct kioctx *ctx)); +static struct kiocb *__aio_get_req(struct kioctx *ctx); static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx) { struct kiocb *req = NULL; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index f0b3171842f2..18ed6dd906c1 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -45,7 +45,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); static int load_elf_library(struct file *); -static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int); +static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *, + int, int, unsigned long); /* * If we don't support core dumping, then supply a NULL so we @@ -298,33 +299,70 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, #ifndef elf_map static unsigned long elf_map(struct file *filep, unsigned long addr, - struct elf_phdr *eppnt, int prot, int type) + struct elf_phdr *eppnt, int prot, int type, + unsigned long total_size) { unsigned long map_addr; - unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr); + unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr); + unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr); + addr = ELF_PAGESTART(addr); + size = ELF_PAGEALIGN(size); - down_write(¤t->mm->mmap_sem); /* mmap() will return -EINVAL if given a zero size, but a * segment with zero filesize is perfectly valid */ - if (eppnt->p_filesz + pageoffset) - map_addr = do_mmap(filep, ELF_PAGESTART(addr), - eppnt->p_filesz + pageoffset, prot, type, - eppnt->p_offset - pageoffset); - else - map_addr = ELF_PAGESTART(addr); + if (!size) + return addr; + + down_write(¤t->mm->mmap_sem); + /* + * total_size is the size of the ELF (interpreter) image. + * The _first_ mmap needs to know the full size, otherwise + * randomization might put this image into an overlapping + * position with the ELF binary image. (since size < total_size) + * So we first map the 'big' image - and unmap the remainder at + * the end. (which unmap is needed for ELF images with holes.) + */ + if (total_size) { + total_size = ELF_PAGEALIGN(total_size); + map_addr = do_mmap(filep, addr, total_size, prot, type, off); + if (!BAD_ADDR(map_addr)) + do_munmap(current->mm, map_addr+size, total_size-size); + } else + map_addr = do_mmap(filep, addr, size, prot, type, off); + up_write(¤t->mm->mmap_sem); return(map_addr); } #endif /* !elf_map */ +static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr) +{ + int i, first_idx = -1, last_idx = -1; + + for (i = 0; i < nr; i++) { + if (cmds[i].p_type == PT_LOAD) { + last_idx = i; + if (first_idx == -1) + first_idx = i; + } + } + if (first_idx == -1) + return 0; + + return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz - + ELF_PAGESTART(cmds[first_idx].p_vaddr); +} + + /* This is much more generalized than the library routine read function, so we keep this separate. Technically the library read function is only provided so that we can read a.out libraries that have an ELF header */ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, - struct file *interpreter, unsigned long *interp_load_addr) + struct file *interpreter, unsigned long *interp_map_addr, + unsigned long no_base) { struct elf_phdr *elf_phdata; struct elf_phdr *eppnt; @@ -332,6 +370,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, int load_addr_set = 0; unsigned long last_bss = 0, elf_bss = 0; unsigned long error = ~0UL; + unsigned long total_size; int retval, i, size; /* First of all, some simple consistency checks */ @@ -370,6 +409,12 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, goto out_close; } + total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum); + if (!total_size) { + error = -EINVAL; + goto out_close; + } + eppnt = elf_phdata; for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { if (eppnt->p_type == PT_LOAD) { @@ -387,9 +432,14 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, vaddr = eppnt->p_vaddr; if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) elf_type |= MAP_FIXED; + else if (no_base && interp_elf_ex->e_type == ET_DYN) + load_addr = -vaddr; map_addr = elf_map(interpreter, load_addr + vaddr, - eppnt, elf_prot, elf_type); + eppnt, elf_prot, elf_type, total_size); + total_size = 0; + if (!*interp_map_addr) + *interp_map_addr = map_addr; error = map_addr; if (BAD_ADDR(map_addr)) goto out_close; @@ -455,8 +505,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, goto out_close; } - *interp_load_addr = load_addr; - error = ((unsigned long)interp_elf_ex->e_entry) + load_addr; + error = load_addr; out_close: kfree(elf_phdata); @@ -546,14 +595,14 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) int load_addr_set = 0; char * elf_interpreter = NULL; unsigned int interpreter_type = INTERPRETER_NONE; - unsigned char ibcs2_interpreter = 0; unsigned long error; struct elf_phdr *elf_ppnt, *elf_phdata; unsigned long elf_bss, elf_brk; int elf_exec_fileno; int retval, i; unsigned int size; - unsigned long elf_entry, interp_load_addr = 0; + unsigned long elf_entry; + unsigned long interp_load_addr = 0; unsigned long start_code, end_code, start_data, end_data; unsigned long reloc_func_desc = 0; char passed_fileno[6]; @@ -663,14 +712,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0') goto out_free_interp; - /* If the program interpreter is one of these two, - * then assume an iBCS2 image. Otherwise assume - * a native linux image. - */ - if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 || - strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0) - ibcs2_interpreter = 1; - /* * The early SET_PERSONALITY here is so that the lookup * for the interpreter happens in the namespace of the @@ -690,7 +731,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) * switch really is going to happen - do this in * flush_thread(). - akpm */ - SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter); + SET_PERSONALITY(loc->elf_ex, 0); interpreter = open_exec(elf_interpreter); retval = PTR_ERR(interpreter); @@ -769,7 +810,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) goto out_free_dentry; } else { /* Executables without an interpreter also need a personality */ - SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter); + SET_PERSONALITY(loc->elf_ex, 0); } /* OK, we are done with that, now set up the arg stuff, @@ -803,7 +844,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) /* Do this immediately, since STACK_TOP as used in setup_arg_pages may depend on the personality. */ - SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter); + SET_PERSONALITY(loc->elf_ex, 0); if (elf_read_implies_exec(loc->elf_ex, executable_stack)) current->personality |= READ_IMPLIES_EXEC; @@ -825,9 +866,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) current->mm->start_stack = bprm->p; /* Now we do a little grungy work by mmaping the ELF image into - the correct location in memory. At this point, we assume that - the image should be loaded at fixed address, not at a variable - address. */ + the correct location in memory. */ for(i = 0, elf_ppnt = elf_phdata; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { int elf_prot = 0, elf_flags; @@ -881,11 +920,15 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) * default mmap base, as well as whatever program they * might try to exec. This is because the brk will * follow the loader, and is not movable. */ +#ifdef CONFIG_X86 + load_bias = 0; +#else load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); +#endif } error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, - elf_prot, elf_flags); + elf_prot, elf_flags, 0); if (BAD_ADDR(error)) { send_sig(SIGKILL, current, 0); retval = IS_ERR((void *)error) ? @@ -961,13 +1004,25 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) } if (elf_interpreter) { - if (interpreter_type == INTERPRETER_AOUT) + if (interpreter_type == INTERPRETER_AOUT) { elf_entry = load_aout_interp(&loc->interp_ex, interpreter); - else + } else { + unsigned long uninitialized_var(interp_map_addr); + elf_entry = load_elf_interp(&loc->interp_elf_ex, interpreter, - &interp_load_addr); + &interp_map_addr, + load_bias); + if (!IS_ERR((void *)elf_entry)) { + /* + * load_elf_interp() returns relocation + * adjustment + */ + interp_load_addr = elf_entry; + elf_entry += loc->interp_elf_ex.e_entry; + } + } if (BAD_ADDR(elf_entry)) { force_sig(SIGSEGV, current); retval = IS_ERR((void *)elf_entry) ? @@ -1021,6 +1076,12 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) current->mm->end_data = end_data; current->mm->start_stack = bprm->p; +#ifdef arch_randomize_brk + if (current->flags & PF_RANDOMIZE) + current->mm->brk = current->mm->start_brk = + arch_randomize_brk(current->mm); +#endif + if (current->personality & MMAP_PAGE_ZERO) { /* Why this, you ask??? Well SVr4 maps page 0 as read-only, and some applications "depend" upon this behavior. @@ -1325,7 +1386,8 @@ static int writenote(struct memelfnote *men, struct file *file, if (!dump_seek(file, (off))) \ goto end_coredump; -static void fill_elf_header(struct elfhdr *elf, int segs) +static void fill_elf_header(struct elfhdr *elf, int segs, + u16 machine, u32 flags, u8 osabi) { memcpy(elf->e_ident, ELFMAG, SELFMAG); elf->e_ident[EI_CLASS] = ELF_CLASS; @@ -1335,12 +1397,12 @@ static void fill_elf_header(struct elfhdr *elf, int segs) memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD); elf->e_type = ET_CORE; - elf->e_machine = ELF_ARCH; + elf->e_machine = machine; elf->e_version = EV_CURRENT; elf->e_entry = 0; elf->e_phoff = sizeof(struct elfhdr); elf->e_shoff = 0; - elf->e_flags = ELF_CORE_EFLAGS; + elf->e_flags = flags; elf->e_ehsize = sizeof(struct elfhdr); elf->e_phentsize = sizeof(struct elf_phdr); elf->e_phnum = segs; @@ -1447,6 +1509,238 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, return 0; } +static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm) +{ + elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv; + int i = 0; + do + i += 2; + while (auxv[i - 2] != AT_NULL); + fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv); +} + +#ifdef CORE_DUMP_USE_REGSET +#include <linux/regset.h> + +struct elf_thread_core_info { + struct elf_thread_core_info *next; + struct task_struct *task; + struct elf_prstatus prstatus; + struct memelfnote notes[0]; +}; + +struct elf_note_info { + struct elf_thread_core_info *thread; + struct memelfnote psinfo; + struct memelfnote auxv; + size_t size; + int thread_notes; +}; + +static int fill_thread_core_info(struct elf_thread_core_info *t, + const struct user_regset_view *view, + long signr, size_t *total) +{ + unsigned int i; + + /* + * NT_PRSTATUS is the one special case, because the regset data + * goes into the pr_reg field inside the note contents, rather + * than being the whole note contents. We fill the reset in here. + * We assume that regset 0 is NT_PRSTATUS. + */ + fill_prstatus(&t->prstatus, t->task, signr); + (void) view->regsets[0].get(t->task, &view->regsets[0], + 0, sizeof(t->prstatus.pr_reg), + &t->prstatus.pr_reg, NULL); + + fill_note(&t->notes[0], "CORE", NT_PRSTATUS, + sizeof(t->prstatus), &t->prstatus); + *total += notesize(&t->notes[0]); + + /* + * Each other regset might generate a note too. For each regset + * that has no core_note_type or is inactive, we leave t->notes[i] + * all zero and we'll know to skip writing it later. + */ + for (i = 1; i < view->n; ++i) { + const struct user_regset *regset = &view->regsets[i]; + if (regset->core_note_type && + (!regset->active || regset->active(t->task, regset))) { + int ret; + size_t size = regset->n * regset->size; + void *data = kmalloc(size, GFP_KERNEL); + if (unlikely(!data)) + return 0; + ret = regset->get(t->task, regset, + 0, size, data, NULL); + if (unlikely(ret)) + kfree(data); + else { + if (regset->core_note_type != NT_PRFPREG) + fill_note(&t->notes[i], "LINUX", + regset->core_note_type, + size, data); + else { + t->prstatus.pr_fpvalid = 1; + fill_note(&t->notes[i], "CORE", + NT_PRFPREG, size, data); + } + *total += notesize(&t->notes[i]); + } + } + } + + return 1; +} + +static int fill_note_info(struct elfhdr *elf, int phdrs, + struct elf_note_info *info, + long signr, struct pt_regs *regs) +{ + struct task_struct *dump_task = current; + const struct user_regset_view *view = task_user_regset_view(dump_task); + struct elf_thread_core_info *t; + struct elf_prpsinfo *psinfo; + struct task_struct *g, *p; + unsigned int i; + + info->size = 0; + info->thread = NULL; + + psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL); + fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo); + + if (psinfo == NULL) + return 0; + + /* + * Figure out how many notes we're going to need for each thread. + */ + info->thread_notes = 0; + for (i = 0; i < view->n; ++i) + if (view->regsets[i].core_note_type != 0) + ++info->thread_notes; + + /* + * Sanity check. We rely on regset 0 being in NT_PRSTATUS, + * since it is our one special case. + */ + if (unlikely(info->thread_notes == 0) || + unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) { + WARN_ON(1); + return 0; + } + + /* + * Initialize the ELF file header. + */ + fill_elf_header(elf, phdrs, + view->e_machine, view->e_flags, view->ei_osabi); + + /* + * Allocate a structure for each thread. + */ + rcu_read_lock(); + do_each_thread(g, p) + if (p->mm == dump_task->mm) { + t = kzalloc(offsetof(struct elf_thread_core_info, + notes[info->thread_notes]), + GFP_ATOMIC); + if (unlikely(!t)) { + rcu_read_unlock(); + return 0; + } + t->task = p; + if (p == dump_task || !info->thread) { + t->next = info->thread; + info->thread = t; + } else { + /* + * Make sure to keep the original task at + * the head of the list. + */ + t->next = info->thread->next; + info->thread->next = t; + } + } + while_each_thread(g, p); + rcu_read_unlock(); + + /* + * Now fill in each thread's information. + */ + for (t = info->thread; t != NULL; t = t->next) + if (!fill_thread_core_info(t, view, signr, &info->size)) + return 0; + + /* + * Fill in the two process-wide notes. + */ + fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm); + info->size += notesize(&info->psinfo); + + fill_auxv_note(&info->auxv, current->mm); + info->size += notesize(&info->auxv); + + return 1; +} + +static size_t get_note_info_size(struct elf_note_info *info) +{ + return info->size; +} + +/* + * Write all the notes for each thread. When writing the first thread, the + * process-wide notes are interleaved after the first thread-specific note. + */ +static int write_note_info(struct elf_note_info *info, + struct file *file, loff_t *foffset) +{ + bool first = 1; + struct elf_thread_core_info *t = info->thread; + + do { + int i; + + if (!writenote(&t->notes[0], file, foffset)) + return 0; + + if (first && !writenote(&info->psinfo, file, foffset)) + return 0; + if (first && !writenote(&info->auxv, file, foffset)) + return 0; + + for (i = 1; i < info->thread_notes; ++i) + if (t->notes[i].data && + !writenote(&t->notes[i], file, foffset)) + return 0; + + first = 0; + t = t->next; + } while (t); + + return 1; +} + +static void free_note_info(struct elf_note_info *info) +{ + struct elf_thread_core_info *threads = info->thread; + while (threads) { + unsigned int i; + struct elf_thread_core_info *t = threads; + threads = t->next; + WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus); + for (i = 1; i < info->thread_notes; ++i) + kfree(t->notes[i].data); + kfree(t); + } + kfree(info->psinfo.data); +} + +#else + /* Here is the structure in which status of each thread is captured. */ struct elf_thread_status { @@ -1499,6 +1793,176 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t) return sz; } +struct elf_note_info { + struct memelfnote *notes; + struct elf_prstatus *prstatus; /* NT_PRSTATUS */ + struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */ + struct list_head thread_list; + elf_fpregset_t *fpu; +#ifdef ELF_CORE_COPY_XFPREGS + elf_fpxregset_t *xfpu; +#endif + int thread_status_size; + int numnote; +}; + +static int fill_note_info(struct elfhdr *elf, int phdrs, + struct elf_note_info *info, + long signr, struct pt_regs *regs) +{ +#define NUM_NOTES 6 + struct list_head *t; + struct task_struct *g, *p; + + info->notes = NULL; + info->prstatus = NULL; + info->psinfo = NULL; + info->fpu = NULL; +#ifdef ELF_CORE_COPY_XFPREGS + info->xfpu = NULL; +#endif + INIT_LIST_HEAD(&info->thread_list); + + info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), + GFP_KERNEL); + if (!info->notes) + return 0; + info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); + if (!info->psinfo) + return 0; + info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); + if (!info->prstatus) + return 0; + info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); + if (!info->fpu) + return 0; +#ifdef ELF_CORE_COPY_XFPREGS + info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); + if (!info->xfpu) + return 0; +#endif + + info->thread_status_size = 0; + if (signr) { + struct elf_thread_status *tmp; + rcu_read_lock(); + do_each_thread(g, p) + if (current->mm == p->mm && current != p) { + tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); + if (!tmp) { + rcu_read_unlock(); + return 0; + } + tmp->thread = p; + list_add(&tmp->list, &info->thread_list); + } + while_each_thread(g, p); + rcu_read_unlock(); + list_for_each(t, &info->thread_list) { + struct elf_thread_status *tmp; + int sz; + + tmp = list_entry(t, struct elf_thread_status, list); + sz = elf_dump_thread_status(signr, tmp); + info->thread_status_size += sz; + } + } + /* now collect the dump for the current */ + memset(info->prstatus, 0, sizeof(*info->prstatus)); + fill_prstatus(info->prstatus, current, signr); + elf_core_copy_regs(&info->prstatus->pr_reg, regs); + + /* Set up header */ + fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI); + + /* + * Set up the notes in similar form to SVR4 core dumps made + * with info from their /proc. + */ + + fill_note(info->notes + 0, "CORE", NT_PRSTATUS, + sizeof(*info->prstatus), info->prstatus); + fill_psinfo(info->psinfo, current->group_leader, current->mm); + fill_note(info->notes + 1, "CORE", NT_PRPSINFO, + sizeof(*info->psinfo), info->psinfo); + + info->numnote = 2; + + fill_auxv_note(&info->notes[info->numnote++], current->mm); + + /* Try to dump the FPU. */ + info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, + info->fpu); + if (info->prstatus->pr_fpvalid) + fill_note(info->notes + info->numnote++, + "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu); +#ifdef ELF_CORE_COPY_XFPREGS + if (elf_core_copy_task_xfpregs(current, info->xfpu)) + fill_note(info->notes + info->numnote++, + "LINUX", ELF_CORE_XFPREG_TYPE, + sizeof(*info->xfpu), info->xfpu); +#endif + + return 1; + +#undef NUM_NOTES +} + +static size_t get_note_info_size(struct elf_note_info *info) +{ + int sz = 0; + int i; + + for (i = 0; i < info->numnote; i++) + sz += notesize(info->notes + i); + + sz += info->thread_status_size; + + return sz; +} + +static int write_note_info(struct elf_note_info *info, + struct file *file, loff_t *foffset) +{ + int i; + struct list_head *t; + + for (i = 0; i < info->numnote; i++) + if (!writenote(info->notes + i, file, foffset)) + return 0; + + /* write out the thread status notes section */ + list_for_each(t, &info->thread_list) { + struct elf_thread_status *tmp = + list_entry(t, struct elf_thread_status, list); + + for (i = 0; i < tmp->num_notes; i++) + if (!writenote(&tmp->notes[i], file, foffset)) + return 0; + } + + return 1; +} + +static void free_note_info(struct elf_note_info *info) +{ + while (!list_empty(&info->thread_list)) { + struct list_head *tmp = info->thread_list.next; + list_del(tmp); + kfree(list_entry(tmp, struct elf_thread_status, list)); + } + + kfree(info->prstatus); + kfree(info->psinfo); + kfree(info->notes); + kfree(info->fpu); +#ifdef ELF_CORE_COPY_XFPREGS + kfree(info->xfpu); +#endif +} + +#endif + static struct vm_area_struct *first_vma(struct task_struct *tsk, struct vm_area_struct *gate_vma) { @@ -1534,29 +1998,15 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma, */ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit) { -#define NUM_NOTES 6 int has_dumped = 0; mm_segment_t fs; int segs; size_t size = 0; - int i; struct vm_area_struct *vma, *gate_vma; struct elfhdr *elf = NULL; loff_t offset = 0, dataoff, foffset; - int numnote; - struct memelfnote *notes = NULL; - struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */ - struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */ - struct task_struct *g, *p; - LIST_HEAD(thread_list); - struct list_head *t; - elf_fpregset_t *fpu = NULL; -#ifdef ELF_CORE_COPY_XFPREGS - elf_fpxregset_t *xfpu = NULL; -#endif - int thread_status_size = 0; - elf_addr_t *auxv; unsigned long mm_flags; + struct elf_note_info info; /* * We no longer stop all VM operations. @@ -1574,52 +2024,6 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un elf = kmalloc(sizeof(*elf), GFP_KERNEL); if (!elf) goto cleanup; - prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL); - if (!prstatus) - goto cleanup; - psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL); - if (!psinfo) - goto cleanup; - notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL); - if (!notes) - goto cleanup; - fpu = kmalloc(sizeof(*fpu), GFP_KERNEL); - if (!fpu) - goto cleanup; -#ifdef ELF_CORE_COPY_XFPREGS - xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL); - if (!xfpu) - goto cleanup; -#endif - - if (signr) { - struct elf_thread_status *tmp; - rcu_read_lock(); - do_each_thread(g,p) - if (current->mm == p->mm && current != p) { - tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); - if (!tmp) { - rcu_read_unlock(); - goto cleanup; - } - tmp->thread = p; - list_add(&tmp->list, &thread_list); - } - while_each_thread(g,p); - rcu_read_unlock(); - list_for_each(t, &thread_list) { - struct elf_thread_status *tmp; - int sz; - - tmp = list_entry(t, struct elf_thread_status, list); - sz = elf_dump_thread_status(signr, tmp); - thread_status_size += sz; - } - } - /* now collect the dump for the current */ - memset(prstatus, 0, sizeof(*prstatus)); - fill_prstatus(prstatus, current, signr); - elf_core_copy_regs(&prstatus->pr_reg, regs); segs = current->mm->map_count; #ifdef ELF_CORE_EXTRA_PHDRS @@ -1630,42 +2034,16 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un if (gate_vma != NULL) segs++; - /* Set up header */ - fill_elf_header(elf, segs + 1); /* including notes section */ - - has_dumped = 1; - current->flags |= PF_DUMPCORE; - /* - * Set up the notes in similar form to SVR4 core dumps made - * with info from their /proc. + * Collect all the non-memory information about the process for the + * notes. This also sets up the file header. */ + if (!fill_note_info(elf, segs + 1, /* including notes section */ + &info, signr, regs)) + goto cleanup; - fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus); - fill_psinfo(psinfo, current->group_leader, current->mm); - fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo); - - numnote = 2; - - auxv = (elf_addr_t *)current->mm->saved_auxv; - - i = 0; - do - i += 2; - while (auxv[i - 2] != AT_NULL); - fill_note(¬es[numnote++], "CORE", NT_AUXV, - i * sizeof(elf_addr_t), auxv); - - /* Try to dump the FPU. */ - if ((prstatus->pr_fpvalid = - elf_core_copy_task_fpregs(current, regs, fpu))) - fill_note(notes + numnote++, - "CORE", NT_PRFPREG, sizeof(*fpu), fpu); -#ifdef ELF_CORE_COPY_XFPREGS - if (elf_core_copy_task_xfpregs(current, xfpu)) - fill_note(notes + numnote++, - "LINUX", ELF_CORE_XFPREG_TYPE, sizeof(*xfpu), xfpu); -#endif + has_dumped = 1; + current->flags |= PF_DUMPCORE; fs = get_fs(); set_fs(KERNEL_DS); @@ -1678,12 +2056,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un /* Write notes phdr entry */ { struct elf_phdr phdr; - int sz = 0; - - for (i = 0; i < numnote; i++) - sz += notesize(notes + i); - - sz += thread_status_size; + size_t sz = get_note_info_size(&info); sz += elf_coredump_extra_notes_size(); @@ -1728,23 +2101,12 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un #endif /* write out the notes section */ - for (i = 0; i < numnote; i++) - if (!writenote(notes + i, file, &foffset)) - goto end_coredump; + if (!write_note_info(&info, file, &foffset)) + goto end_coredump; if (elf_coredump_extra_notes_write(file, &foffset)) goto end_coredump; - /* write out the thread status notes section */ - list_for_each(t, &thread_list) { - struct elf_thread_status *tmp = - list_entry(t, struct elf_thread_status, list); - - for (i = 0; i < tmp->num_notes; i++) - if (!writenote(&tmp->notes[i], file, &foffset)) - goto end_coredump; - } - /* Align to page */ DUMP_SEEK(dataoff - foffset); @@ -1795,22 +2157,9 @@ end_coredump: set_fs(fs); cleanup: - while (!list_empty(&thread_list)) { - struct list_head *tmp = thread_list.next; - list_del(tmp); - kfree(list_entry(tmp, struct elf_thread_status, list)); - } - kfree(elf); - kfree(prstatus); - kfree(psinfo); - kfree(notes); - kfree(fpu); -#ifdef ELF_CORE_COPY_XFPREGS - kfree(xfpu); -#endif + free_note_info(&info); return has_dumped; -#undef NUM_NOTES } #endif /* USE_ELF_CORE_DUMP */ diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c new file mode 100644 index 000000000000..0adced2f296f --- /dev/null +++ b/fs/compat_binfmt_elf.c @@ -0,0 +1,131 @@ +/* + * 32-bit compatibility support for ELF format executables and core dumps. + * + * Copyright (C) 2007 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + * + * Red Hat Author: Roland McGrath. + * + * This file is used in a 64-bit kernel that wants to support 32-bit ELF. + * asm/elf.h is responsible for defining the compat_* and COMPAT_* macros + * used below, with definitions appropriate for 32-bit ABI compatibility. + * + * We use macros to rename the ABI types and machine-dependent + * functions used in binfmt_elf.c to compat versions. + */ + +#include <linux/elfcore-compat.h> +#include <linux/time.h> + +/* + * Rename the basic ELF layout types to refer to the 32-bit class of files. + */ +#undef ELF_CLASS +#define ELF_CLASS ELFCLASS32 + +#undef elfhdr +#undef elf_phdr +#undef elf_note +#undef elf_addr_t +#define elfhdr elf32_hdr +#define elf_phdr elf32_phdr +#define elf_note elf32_note +#define elf_addr_t Elf32_Addr + +/* + * The machine-dependent core note format types are defined in elfcore-compat.h, + * which requires asm/elf.h to define compat_elf_gregset_t et al. + */ +#define elf_prstatus compat_elf_prstatus +#define elf_prpsinfo compat_elf_prpsinfo + +/* + * Compat version of cputime_to_compat_timeval, perhaps this + * should be an inline in <linux/compat.h>. + */ +static void cputime_to_compat_timeval(const cputime_t cputime, + struct compat_timeval *value) +{ + struct timeval tv; + cputime_to_timeval(cputime, &tv); + value->tv_sec = tv.tv_sec; + value->tv_usec = tv.tv_usec; +} + +#undef cputime_to_timeval +#define cputime_to_timeval cputime_to_compat_timeval + + +/* + * To use this file, asm/elf.h must define compat_elf_check_arch. + * The other following macros can be defined if the compat versions + * differ from the native ones, or omitted when they match. + */ + +#undef ELF_ARCH +#undef elf_check_arch +#define elf_check_arch compat_elf_check_arch + +#ifdef COMPAT_ELF_PLATFORM +#undef ELF_PLATFORM +#define ELF_PLATFORM COMPAT_ELF_PLATFORM +#endif + +#ifdef COMPAT_ELF_HWCAP +#undef ELF_HWCAP +#define ELF_HWCAP COMPAT_ELF_HWCAP +#endif + +#ifdef COMPAT_ARCH_DLINFO +#undef ARCH_DLINFO +#define ARCH_DLINFO COMPAT_ARCH_DLINFO +#endif + +#ifdef COMPAT_ELF_ET_DYN_BASE +#undef ELF_ET_DYN_BASE +#define ELF_ET_DYN_BASE COMPAT_ELF_ET_DYN_BASE +#endif + +#ifdef COMPAT_ELF_EXEC_PAGESIZE +#undef ELF_EXEC_PAGESIZE +#define ELF_EXEC_PAGESIZE COMPAT_ELF_EXEC_PAGESIZE +#endif + +#ifdef COMPAT_ELF_PLAT_INIT +#undef ELF_PLAT_INIT +#define ELF_PLAT_INIT COMPAT_ELF_PLAT_INIT +#endif + +#ifdef COMPAT_SET_PERSONALITY +#undef SET_PERSONALITY +#define SET_PERSONALITY COMPAT_SET_PERSONALITY +#endif + +#ifdef compat_start_thread +#undef start_thread +#define start_thread compat_start_thread +#endif + +#ifdef compat_arch_setup_additional_pages +#undef ARCH_HAS_SETUP_ADDITIONAL_PAGES +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 +#undef arch_setup_additional_pages +#define arch_setup_additional_pages compat_arch_setup_additional_pages +#endif + +/* + * Rename a few of the symbols that binfmt_elf.c will define. + * These are all local so the names don't really matter, but it + * might make some debugging less confusing not to duplicate them. + */ +#define elf_format compat_elf_format +#define init_elf_binfmt init_compat_elf_binfmt +#define exit_elf_binfmt exit_compat_elf_binfmt + +/* + * We share all the actual code with the native (64-bit) version. + */ +#include "binfmt_elf.c" diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c index 46754553fdcc..ff97ba924333 100644 --- a/fs/dlm/dir.c +++ b/fs/dlm/dir.c @@ -49,7 +49,7 @@ static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len) spin_unlock(&ls->ls_recover_list_lock); if (!found) - de = allocate_direntry(ls, len); + de = kzalloc(sizeof(struct dlm_direntry) + len, GFP_KERNEL); return de; } @@ -62,7 +62,7 @@ void dlm_clear_free_entries(struct dlm_ls *ls) de = list_entry(ls->ls_recover_list.next, struct dlm_direntry, list); list_del(&de->list); - free_direntry(de); + kfree(de); } spin_unlock(&ls->ls_recover_list_lock); } @@ -171,7 +171,7 @@ void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int namelen } list_del(&de->list); - free_direntry(de); + kfree(de); out: write_unlock(&ls->ls_dirtbl[bucket].lock); } @@ -302,7 +302,7 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name, write_unlock(&ls->ls_dirtbl[bucket].lock); - de = allocate_direntry(ls, namelen); + de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_KERNEL); if (!de) return -ENOMEM; @@ -313,7 +313,7 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name, write_lock(&ls->ls_dirtbl[bucket].lock); tmp = search_bucket(ls, name, namelen, bucket); if (tmp) { - free_direntry(de); + kfree(de); de = tmp; } else { list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); @@ -329,49 +329,47 @@ int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen, return get_entry(ls, nodeid, name, namelen, r_nodeid); } -/* Copy the names of master rsb's into the buffer provided. - Only select names whose dir node is the given nodeid. */ +static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len) +{ + struct dlm_rsb *r; + + down_read(&ls->ls_root_sem); + list_for_each_entry(r, &ls->ls_root_list, res_root_list) { + if (len == r->res_length && !memcmp(name, r->res_name, len)) { + up_read(&ls->ls_root_sem); + return r; + } + } + up_read(&ls->ls_root_sem); + return NULL; +} + +/* Find the rsb where we left off (or start again), then send rsb names + for rsb's we're master of and whose directory node matches the requesting + node. inbuf is the rsb name last sent, inlen is the name's length */ void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, char *outbuf, int outlen, int nodeid) { struct list_head *list; - struct dlm_rsb *start_r = NULL, *r = NULL; - int offset = 0, start_namelen, error, dir_nodeid; - char *start_name; + struct dlm_rsb *r; + int offset = 0, dir_nodeid; uint16_t be_namelen; - /* - * Find the rsb where we left off (or start again) - */ - - start_namelen = inlen; - start_name = inbuf; - - if (start_namelen > 1) { - /* - * We could also use a find_rsb_root() function here that - * searched the ls_root_list. - */ - error = dlm_find_rsb(ls, start_name, start_namelen, R_MASTER, - &start_r); - DLM_ASSERT(!error && start_r, - printk("error %d\n", error);); - DLM_ASSERT(!list_empty(&start_r->res_root_list), - dlm_print_rsb(start_r);); - dlm_put_rsb(start_r); - } - - /* - * Send rsb names for rsb's we're master of and whose directory node - * matches the requesting node. - */ - down_read(&ls->ls_root_sem); - if (start_r) - list = start_r->res_root_list.next; - else + + if (inlen > 1) { + r = find_rsb_root(ls, inbuf, inlen); + if (!r) { + inbuf[inlen - 1] = '\0'; + log_error(ls, "copy_master_names from %d start %d %s", + nodeid, inlen, inbuf); + goto out; + } + list = r->res_root_list.next; + } else { list = ls->ls_root_list.next; + } for (offset = 0; list != &ls->ls_root_list; list = list->next) { r = list_entry(list, struct dlm_rsb, res_root_list); diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index d2fc2384c3be..ec61bbaf25df 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -570,5 +570,21 @@ static inline int dlm_no_directory(struct dlm_ls *ls) return (ls->ls_exflags & DLM_LSFL_NODIR) ? 1 : 0; } +int dlm_netlink_init(void); +void dlm_netlink_exit(void); +void dlm_timeout_warn(struct dlm_lkb *lkb); + +#ifdef CONFIG_DLM_DEBUG +int dlm_register_debugfs(void); +void dlm_unregister_debugfs(void); +int dlm_create_debug_file(struct dlm_ls *ls); +void dlm_delete_debug_file(struct dlm_ls *ls); +#else +static inline int dlm_register_debugfs(void) { return 0; } +static inline void dlm_unregister_debugfs(void) { } +static inline int dlm_create_debug_file(struct dlm_ls *ls) { return 0; } +static inline void dlm_delete_debug_file(struct dlm_ls *ls) { } +#endif + #endif /* __DLM_INTERNAL_DOT_H__ */ diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 3915b8e14146..ff4a198fa677 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -1,7 +1,7 @@ /****************************************************************************** ******************************************************************************* ** -** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. +** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -88,7 +88,6 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, static int receive_extralen(struct dlm_message *ms); static void do_purge(struct dlm_ls *ls, int nodeid, int pid); static void del_timeout(struct dlm_lkb *lkb); -void dlm_timeout_warn(struct dlm_lkb *lkb); /* * Lock compatibilty matrix - thanks Steve @@ -335,7 +334,7 @@ static struct dlm_rsb *create_rsb(struct dlm_ls *ls, char *name, int len) { struct dlm_rsb *r; - r = allocate_rsb(ls, len); + r = dlm_allocate_rsb(ls, len); if (!r) return NULL; @@ -478,7 +477,7 @@ static int find_rsb(struct dlm_ls *ls, char *name, int namelen, error = _search_rsb(ls, name, namelen, bucket, 0, &tmp); if (!error) { write_unlock(&ls->ls_rsbtbl[bucket].lock); - free_rsb(r); + dlm_free_rsb(r); r = tmp; goto out; } @@ -490,12 +489,6 @@ static int find_rsb(struct dlm_ls *ls, char *name, int namelen, return error; } -int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen, - unsigned int flags, struct dlm_rsb **r_ret) -{ - return find_rsb(ls, name, namelen, flags, r_ret); -} - /* This is only called to add a reference when the code already holds a valid reference to the rsb, so there's no need for locking. */ @@ -519,7 +512,7 @@ static void toss_rsb(struct kref *kref) list_move(&r->res_hashchain, &ls->ls_rsbtbl[r->res_bucket].toss); r->res_toss_time = jiffies; if (r->res_lvbptr) { - free_lvb(r->res_lvbptr); + dlm_free_lvb(r->res_lvbptr); r->res_lvbptr = NULL; } } @@ -589,7 +582,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) uint32_t lkid = 0; uint16_t bucket; - lkb = allocate_lkb(ls); + lkb = dlm_allocate_lkb(ls); if (!lkb) return -ENOMEM; @@ -683,8 +676,8 @@ static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb) /* for local/process lkbs, lvbptr points to caller's lksb */ if (lkb->lkb_lvbptr && is_master_copy(lkb)) - free_lvb(lkb->lkb_lvbptr); - free_lkb(lkb); + dlm_free_lvb(lkb->lkb_lvbptr); + dlm_free_lkb(lkb); return 1; } else { write_unlock(&ls->ls_lkbtbl[bucket].lock); @@ -988,7 +981,7 @@ static int shrink_bucket(struct dlm_ls *ls, int b) if (is_master(r)) dir_remove(r); - free_rsb(r); + dlm_free_rsb(r); count++; } else { write_unlock(&ls->ls_rsbtbl[b].lock); @@ -1171,7 +1164,7 @@ static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) return; if (!r->res_lvbptr) - r->res_lvbptr = allocate_lvb(r->res_ls); + r->res_lvbptr = dlm_allocate_lvb(r->res_ls); if (!r->res_lvbptr) return; @@ -1203,7 +1196,7 @@ static void set_lvb_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb) return; if (!r->res_lvbptr) - r->res_lvbptr = allocate_lvb(r->res_ls); + r->res_lvbptr = dlm_allocate_lvb(r->res_ls); if (!r->res_lvbptr) return; @@ -1852,7 +1845,7 @@ static void send_blocking_asts_all(struct dlm_rsb *r, struct dlm_lkb *lkb) static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb) { struct dlm_ls *ls = r->res_ls; - int error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid(); + int i, error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid(); if (rsb_flag(r, RSB_MASTER_UNCERTAIN)) { rsb_clear_flag(r, RSB_MASTER_UNCERTAIN); @@ -1886,7 +1879,7 @@ static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb) return 1; } - for (;;) { + for (i = 0; i < 2; i++) { /* It's possible for dlm_scand to remove an old rsb for this same resource from the toss list, us to create a new one, look up the master locally, and find it @@ -1900,6 +1893,8 @@ static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb) log_debug(ls, "dir_lookup error %d %s", error, r->res_name); schedule(); } + if (error && error != -EEXIST) + return error; if (ret_nodeid == our_nodeid) { r->res_first_lkid = 0; @@ -1941,8 +1936,11 @@ static void confirm_master(struct dlm_rsb *r, int error) break; case -EAGAIN: - /* the remote master didn't queue our NOQUEUE request; - make a waiting lkb the first_lkid */ + case -EBADR: + case -ENOTBLK: + /* the remote request failed and won't be retried (it was + a NOQUEUE, or has been canceled/unlocked); make a waiting + lkb the first_lkid */ r->res_first_lkid = 0; @@ -2108,17 +2106,18 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args) /* an lkb may be waiting for an rsb lookup to complete where the lookup was initiated by another lock */ - if (args->flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)) { - if (!list_empty(&lkb->lkb_rsb_lookup)) { + if (!list_empty(&lkb->lkb_rsb_lookup)) { + if (args->flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)) { log_debug(ls, "unlock on rsb_lookup %x", lkb->lkb_id); list_del_init(&lkb->lkb_rsb_lookup); queue_cast(lkb->lkb_resource, lkb, args->flags & DLM_LKF_CANCEL ? -DLM_ECANCEL : -DLM_EUNLOCK); unhold_lkb(lkb); /* undoes create_lkb() */ - rv = -EBUSY; - goto out; } + /* caller changes -EBUSY to 0 for CANCEL and FORCEUNLOCK */ + rv = -EBUSY; + goto out; } /* cancel not allowed with another cancel/unlock in progress */ @@ -2986,7 +2985,7 @@ static int receive_lvb(struct dlm_ls *ls, struct dlm_lkb *lkb, if (lkb->lkb_exflags & DLM_LKF_VALBLK) { if (!lkb->lkb_lvbptr) - lkb->lkb_lvbptr = allocate_lvb(ls); + lkb->lkb_lvbptr = dlm_allocate_lvb(ls); if (!lkb->lkb_lvbptr) return -ENOMEM; len = receive_extralen(ms); @@ -3006,11 +3005,9 @@ static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb, lkb->lkb_bastaddr = (void *) (long) (ms->m_asts & AST_BAST); lkb->lkb_astaddr = (void *) (long) (ms->m_asts & AST_COMP); - DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb);); - if (lkb->lkb_exflags & DLM_LKF_VALBLK) { /* lkb was just created so there won't be an lvb yet */ - lkb->lkb_lvbptr = allocate_lvb(ls); + lkb->lkb_lvbptr = dlm_allocate_lvb(ls); if (!lkb->lkb_lvbptr) return -ENOMEM; } @@ -3021,16 +3018,6 @@ static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb, static int receive_convert_args(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_message *ms) { - if (lkb->lkb_nodeid != ms->m_header.h_nodeid) { - log_error(ls, "convert_args nodeid %d %d lkid %x %x", - lkb->lkb_nodeid, ms->m_header.h_nodeid, - lkb->lkb_id, lkb->lkb_remid); - return -EINVAL; - } - - if (!is_master_copy(lkb)) - return -EINVAL; - if (lkb->lkb_status != DLM_LKSTS_GRANTED) return -EBUSY; @@ -3046,8 +3033,6 @@ static int receive_convert_args(struct dlm_ls *ls, struct dlm_lkb *lkb, static int receive_unlock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_message *ms) { - if (!is_master_copy(lkb)) - return -EINVAL; if (receive_lvb(ls, lkb, ms)) return -ENOMEM; return 0; @@ -3063,6 +3048,50 @@ static void setup_stub_lkb(struct dlm_ls *ls, struct dlm_message *ms) lkb->lkb_remid = ms->m_lkid; } +/* This is called after the rsb is locked so that we can safely inspect + fields in the lkb. */ + +static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms) +{ + int from = ms->m_header.h_nodeid; + int error = 0; + + switch (ms->m_type) { + case DLM_MSG_CONVERT: + case DLM_MSG_UNLOCK: + case DLM_MSG_CANCEL: + if (!is_master_copy(lkb) || lkb->lkb_nodeid != from) + error = -EINVAL; + break; + + case DLM_MSG_CONVERT_REPLY: + case DLM_MSG_UNLOCK_REPLY: + case DLM_MSG_CANCEL_REPLY: + case DLM_MSG_GRANT: + case DLM_MSG_BAST: + if (!is_process_copy(lkb) || lkb->lkb_nodeid != from) + error = -EINVAL; + break; + + case DLM_MSG_REQUEST_REPLY: + if (!is_process_copy(lkb)) + error = -EINVAL; + else if (lkb->lkb_nodeid != -1 && lkb->lkb_nodeid != from) + error = -EINVAL; + break; + + default: + error = -EINVAL; + } + + if (error) + log_error(lkb->lkb_resource->res_ls, + "ignore invalid message %d from %d %x %x %x %d", + ms->m_type, from, lkb->lkb_id, lkb->lkb_remid, + lkb->lkb_flags, lkb->lkb_nodeid); + return error; +} + static void receive_request(struct dlm_ls *ls, struct dlm_message *ms) { struct dlm_lkb *lkb; @@ -3124,17 +3153,21 @@ static void receive_convert(struct dlm_ls *ls, struct dlm_message *ms) hold_rsb(r); lock_rsb(r); + error = validate_message(lkb, ms); + if (error) + goto out; + receive_flags(lkb, ms); error = receive_convert_args(ls, lkb, ms); if (error) - goto out; + goto out_reply; reply = !down_conversion(lkb); error = do_convert(r, lkb); - out: + out_reply: if (reply) send_convert_reply(r, lkb, error); - + out: unlock_rsb(r); put_rsb(r); dlm_put_lkb(lkb); @@ -3160,15 +3193,19 @@ static void receive_unlock(struct dlm_ls *ls, struct dlm_message *ms) hold_rsb(r); lock_rsb(r); + error = validate_message(lkb, ms); + if (error) + goto out; + receive_flags(lkb, ms); error = receive_unlock_args(ls, lkb, ms); if (error) - goto out; + goto out_reply; error = do_unlock(r, lkb); - out: + out_reply: send_unlock_reply(r, lkb, error); - + out: unlock_rsb(r); put_rsb(r); dlm_put_lkb(lkb); @@ -3196,9 +3233,13 @@ static void receive_cancel(struct dlm_ls *ls, struct dlm_message *ms) hold_rsb(r); lock_rsb(r); + error = validate_message(lkb, ms); + if (error) + goto out; + error = do_cancel(r, lkb); send_cancel_reply(r, lkb, error); - + out: unlock_rsb(r); put_rsb(r); dlm_put_lkb(lkb); @@ -3217,22 +3258,26 @@ static void receive_grant(struct dlm_ls *ls, struct dlm_message *ms) error = find_lkb(ls, ms->m_remid, &lkb); if (error) { - log_error(ls, "receive_grant no lkb"); + log_debug(ls, "receive_grant from %d no lkb %x", + ms->m_header.h_nodeid, ms->m_remid); return; } - DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); r = lkb->lkb_resource; hold_rsb(r); lock_rsb(r); + error = validate_message(lkb, ms); + if (error) + goto out; + receive_flags_reply(lkb, ms); if (is_altmode(lkb)) munge_altmode(lkb, ms); grant_lock_pc(r, lkb, ms); queue_cast(r, lkb, 0); - + out: unlock_rsb(r); put_rsb(r); dlm_put_lkb(lkb); @@ -3246,18 +3291,22 @@ static void receive_bast(struct dlm_ls *ls, struct dlm_message *ms) error = find_lkb(ls, ms->m_remid, &lkb); if (error) { - log_error(ls, "receive_bast no lkb"); + log_debug(ls, "receive_bast from %d no lkb %x", + ms->m_header.h_nodeid, ms->m_remid); return; } - DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); r = lkb->lkb_resource; hold_rsb(r); lock_rsb(r); - queue_bast(r, lkb, ms->m_bastmode); + error = validate_message(lkb, ms); + if (error) + goto out; + queue_bast(r, lkb, ms->m_bastmode); + out: unlock_rsb(r); put_rsb(r); dlm_put_lkb(lkb); @@ -3323,15 +3372,19 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) error = find_lkb(ls, ms->m_remid, &lkb); if (error) { - log_error(ls, "receive_request_reply no lkb"); + log_debug(ls, "receive_request_reply from %d no lkb %x", + ms->m_header.h_nodeid, ms->m_remid); return; } - DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); r = lkb->lkb_resource; hold_rsb(r); lock_rsb(r); + error = validate_message(lkb, ms); + if (error) + goto out; + mstype = lkb->lkb_wait_type; error = remove_from_waiters(lkb, DLM_MSG_REQUEST_REPLY); if (error) @@ -3383,6 +3436,7 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) if (is_overlap(lkb)) { /* we'll ignore error in cancel/unlock reply */ queue_cast_overlap(r, lkb); + confirm_master(r, result); unhold_lkb(lkb); /* undoes create_lkb() */ } else _request_lock(r, lkb); @@ -3463,6 +3517,10 @@ static void _receive_convert_reply(struct dlm_lkb *lkb, struct dlm_message *ms) hold_rsb(r); lock_rsb(r); + error = validate_message(lkb, ms); + if (error) + goto out; + /* stub reply can happen with waiters_mutex held */ error = remove_from_waiters_ms(lkb, ms); if (error) @@ -3481,10 +3539,10 @@ static void receive_convert_reply(struct dlm_ls *ls, struct dlm_message *ms) error = find_lkb(ls, ms->m_remid, &lkb); if (error) { - log_error(ls, "receive_convert_reply no lkb"); + log_debug(ls, "receive_convert_reply from %d no lkb %x", + ms->m_header.h_nodeid, ms->m_remid); return; } - DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); _receive_convert_reply(lkb, ms); dlm_put_lkb(lkb); @@ -3498,6 +3556,10 @@ static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms) hold_rsb(r); lock_rsb(r); + error = validate_message(lkb, ms); + if (error) + goto out; + /* stub reply can happen with waiters_mutex held */ error = remove_from_waiters_ms(lkb, ms); if (error) @@ -3529,10 +3591,10 @@ static void receive_unlock_reply(struct dlm_ls *ls, struct dlm_message *ms) error = find_lkb(ls, ms->m_remid, &lkb); if (error) { - log_error(ls, "receive_unlock_reply no lkb"); + log_debug(ls, "receive_unlock_reply from %d no lkb %x", + ms->m_header.h_nodeid, ms->m_remid); return; } - DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); _receive_unlock_reply(lkb, ms); dlm_put_lkb(lkb); @@ -3546,6 +3608,10 @@ static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms) hold_rsb(r); lock_rsb(r); + error = validate_message(lkb, ms); + if (error) + goto out; + /* stub reply can happen with waiters_mutex held */ error = remove_from_waiters_ms(lkb, ms); if (error) @@ -3577,10 +3643,10 @@ static void receive_cancel_reply(struct dlm_ls *ls, struct dlm_message *ms) error = find_lkb(ls, ms->m_remid, &lkb); if (error) { - log_error(ls, "receive_cancel_reply no lkb"); + log_debug(ls, "receive_cancel_reply from %d no lkb %x", + ms->m_header.h_nodeid, ms->m_remid); return; } - DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); _receive_cancel_reply(lkb, ms); dlm_put_lkb(lkb); @@ -3640,6 +3706,13 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms) { + if (!dlm_is_member(ls, ms->m_header.h_nodeid)) { + log_debug(ls, "ignore non-member message %d from %d %x %x %d", + ms->m_type, ms->m_header.h_nodeid, ms->m_lkid, + ms->m_remid, ms->m_result); + return; + } + switch (ms->m_type) { /* messages sent to a master node */ @@ -3778,8 +3851,9 @@ void dlm_receive_buffer(struct dlm_header *hd, int nodeid) ls = dlm_find_lockspace_global(hd->h_lockspace); if (!ls) { - log_print("invalid h_lockspace %x from %d cmd %d type %d", - hd->h_lockspace, nodeid, hd->h_cmd, type); + if (dlm_config.ci_log_debug) + log_print("invalid lockspace %x from %d cmd %d type %d", + hd->h_lockspace, nodeid, hd->h_cmd, type); if (hd->h_cmd == DLM_RCOM && type == DLM_RCOM_STATUS) dlm_send_ls_not_ready(nodeid, rc); @@ -3806,6 +3880,7 @@ static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb) ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; ls->ls_stub_ms.m_result = -EINPROGRESS; ls->ls_stub_ms.m_flags = lkb->lkb_flags; + ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; _receive_convert_reply(lkb, &ls->ls_stub_ms); /* Same special case as in receive_rcom_lock_args() */ @@ -3847,6 +3922,7 @@ static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb) void dlm_recover_waiters_pre(struct dlm_ls *ls) { struct dlm_lkb *lkb, *safe; + int wait_type, stub_unlock_result, stub_cancel_result; mutex_lock(&ls->ls_waiters_mutex); @@ -3865,7 +3941,33 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) if (!waiter_needs_recovery(ls, lkb)) continue; - switch (lkb->lkb_wait_type) { + wait_type = lkb->lkb_wait_type; + stub_unlock_result = -DLM_EUNLOCK; + stub_cancel_result = -DLM_ECANCEL; + + /* Main reply may have been received leaving a zero wait_type, + but a reply for the overlapping op may not have been + received. In that case we need to fake the appropriate + reply for the overlap op. */ + + if (!wait_type) { + if (is_overlap_cancel(lkb)) { + wait_type = DLM_MSG_CANCEL; + if (lkb->lkb_grmode == DLM_LOCK_IV) + stub_cancel_result = 0; + } + if (is_overlap_unlock(lkb)) { + wait_type = DLM_MSG_UNLOCK; + if (lkb->lkb_grmode == DLM_LOCK_IV) + stub_unlock_result = -ENOENT; + } + + log_debug(ls, "rwpre overlap %x %x %d %d %d", + lkb->lkb_id, lkb->lkb_flags, wait_type, + stub_cancel_result, stub_unlock_result); + } + + switch (wait_type) { case DLM_MSG_REQUEST: lkb->lkb_flags |= DLM_IFL_RESEND; @@ -3878,8 +3980,9 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) case DLM_MSG_UNLOCK: hold_lkb(lkb); ls->ls_stub_ms.m_type = DLM_MSG_UNLOCK_REPLY; - ls->ls_stub_ms.m_result = -DLM_EUNLOCK; + ls->ls_stub_ms.m_result = stub_unlock_result; ls->ls_stub_ms.m_flags = lkb->lkb_flags; + ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; _receive_unlock_reply(lkb, &ls->ls_stub_ms); dlm_put_lkb(lkb); break; @@ -3887,15 +3990,16 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) case DLM_MSG_CANCEL: hold_lkb(lkb); ls->ls_stub_ms.m_type = DLM_MSG_CANCEL_REPLY; - ls->ls_stub_ms.m_result = -DLM_ECANCEL; + ls->ls_stub_ms.m_result = stub_cancel_result; ls->ls_stub_ms.m_flags = lkb->lkb_flags; + ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; _receive_cancel_reply(lkb, &ls->ls_stub_ms); dlm_put_lkb(lkb); break; default: - log_error(ls, "invalid lkb wait_type %d", - lkb->lkb_wait_type); + log_error(ls, "invalid lkb wait_type %d %d", + lkb->lkb_wait_type, wait_type); } schedule(); } @@ -4184,7 +4288,7 @@ static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, lkb->lkb_astaddr = (void *) (long) (rl->rl_asts & AST_COMP); if (lkb->lkb_exflags & DLM_LKF_VALBLK) { - lkb->lkb_lvbptr = allocate_lvb(ls); + lkb->lkb_lvbptr = dlm_allocate_lvb(ls); if (!lkb->lkb_lvbptr) return -ENOMEM; lvblen = rc->rc_header.h_length - sizeof(struct dlm_rcom) - @@ -4259,7 +4363,7 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc) put_rsb(r); out: if (error) - log_print("recover_master_copy %d %x", error, rl->rl_lkid); + log_debug(ls, "recover_master_copy %d %x", error, rl->rl_lkid); rl->rl_result = error; return error; } @@ -4342,7 +4446,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, } } - /* After ua is attached to lkb it will be freed by free_lkb(). + /* After ua is attached to lkb it will be freed by dlm_free_lkb(). When DLM_IFL_USER is set, the dlm knows that this is a userspace lock and that lkb_astparam is the dlm_user_args structure. */ @@ -4679,6 +4783,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) } list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) { + lkb->lkb_ast_type = 0; list_del(&lkb->lkb_astqueue); dlm_put_lkb(lkb); } diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index ada04680a1e5..27b6ed302911 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h @@ -19,8 +19,6 @@ void dlm_print_lkb(struct dlm_lkb *lkb); void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms); void dlm_receive_buffer(struct dlm_header *hd, int nodeid); int dlm_modes_compat(int mode1, int mode2); -int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen, - unsigned int flags, struct dlm_rsb **r_ret); void dlm_put_rsb(struct dlm_rsb *r); void dlm_hold_rsb(struct dlm_rsb *r); int dlm_put_lkb(struct dlm_lkb *lkb); diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 5c108c49cb8c..b180fdc51085 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -24,14 +24,6 @@ #include "recover.h" #include "requestqueue.h" -#ifdef CONFIG_DLM_DEBUG -int dlm_create_debug_file(struct dlm_ls *ls); -void dlm_delete_debug_file(struct dlm_ls *ls); -#else -static inline int dlm_create_debug_file(struct dlm_ls *ls) { return 0; } -static inline void dlm_delete_debug_file(struct dlm_ls *ls) { } -#endif - static int ls_count; static struct mutex ls_lock; static struct list_head lslist; @@ -684,9 +676,9 @@ static int release_lockspace(struct dlm_ls *ls, int force) dlm_del_ast(lkb); if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY) - free_lvb(lkb->lkb_lvbptr); + dlm_free_lvb(lkb->lkb_lvbptr); - free_lkb(lkb); + dlm_free_lkb(lkb); } } dlm_astd_resume(); @@ -704,7 +696,7 @@ static int release_lockspace(struct dlm_ls *ls, int force) res_hashchain); list_del(&rsb->res_hashchain); - free_rsb(rsb); + dlm_free_rsb(rsb); } head = &ls->ls_rsbtbl[i].toss; @@ -712,7 +704,7 @@ static int release_lockspace(struct dlm_ls *ls, int force) rsb = list_entry(head->next, struct dlm_rsb, res_hashchain); list_del(&rsb->res_hashchain); - free_rsb(rsb); + dlm_free_rsb(rsb); } } diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index e9923ca9c2d9..7c1e5e5cccd8 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -864,7 +864,7 @@ static void sctp_init_assoc(struct connection *con) static void tcp_connect_to_sock(struct connection *con) { int result = -EHOSTUNREACH; - struct sockaddr_storage saddr; + struct sockaddr_storage saddr, src_addr; int addr_len; struct socket *sock; @@ -898,6 +898,17 @@ static void tcp_connect_to_sock(struct connection *con) con->connect_action = tcp_connect_to_sock; add_sock(sock, con); + /* Bind to our cluster-known address connecting to avoid + routing problems */ + memcpy(&src_addr, dlm_local_addr[0], sizeof(src_addr)); + make_sockaddr(&src_addr, 0, &addr_len); + result = sock->ops->bind(sock, (struct sockaddr *) &src_addr, + addr_len); + if (result < 0) { + log_print("could not bind for connect: %d", result); + /* This *may* not indicate a critical error */ + } + make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len); log_print("connecting to %d", con->nodeid); @@ -1426,6 +1437,8 @@ void dlm_lowcomms_stop(void) con = __nodeid2con(i, 0); if (con) { close_connection(con, true); + if (con->othercon) + kmem_cache_free(con_cache, con->othercon); kmem_cache_free(con_cache, con); } } diff --git a/fs/dlm/main.c b/fs/dlm/main.c index eca2907f2386..58487fb95a4c 100644 --- a/fs/dlm/main.c +++ b/fs/dlm/main.c @@ -18,16 +18,6 @@ #include "memory.h" #include "config.h" -#ifdef CONFIG_DLM_DEBUG -int dlm_register_debugfs(void); -void dlm_unregister_debugfs(void); -#else -static inline int dlm_register_debugfs(void) { return 0; } -static inline void dlm_unregister_debugfs(void) { } -#endif -int dlm_netlink_init(void); -void dlm_netlink_exit(void); - static int __init init_dlm(void) { int error; diff --git a/fs/dlm/member.c b/fs/dlm/member.c index e9cdcab306e2..fa17f5a27883 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -1,7 +1,7 @@ /****************************************************************************** ******************************************************************************* ** -** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. +** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -70,7 +70,7 @@ static void dlm_remove_member(struct dlm_ls *ls, struct dlm_member *memb) ls->ls_num_nodes--; } -static int dlm_is_member(struct dlm_ls *ls, int nodeid) +int dlm_is_member(struct dlm_ls *ls, int nodeid) { struct dlm_member *memb; diff --git a/fs/dlm/member.h b/fs/dlm/member.h index 927c08c19214..7a26fca1e0b5 100644 --- a/fs/dlm/member.h +++ b/fs/dlm/member.h @@ -1,7 +1,7 @@ /****************************************************************************** ******************************************************************************* ** -** Copyright (C) 2005 Red Hat, Inc. All rights reserved. +** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -19,6 +19,7 @@ void dlm_clear_members(struct dlm_ls *ls); void dlm_clear_members_gone(struct dlm_ls *ls); int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv,int *neg_out); int dlm_is_removed(struct dlm_ls *ls, int nodeid); +int dlm_is_member(struct dlm_ls *ls, int nodeid); #endif /* __MEMBER_DOT_H__ */ diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c index ecf0e5cb2035..f7783867491a 100644 --- a/fs/dlm/memory.c +++ b/fs/dlm/memory.c @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -35,7 +35,7 @@ void dlm_memory_exit(void) kmem_cache_destroy(lkb_cache); } -char *allocate_lvb(struct dlm_ls *ls) +char *dlm_allocate_lvb(struct dlm_ls *ls) { char *p; @@ -43,7 +43,7 @@ char *allocate_lvb(struct dlm_ls *ls) return p; } -void free_lvb(char *p) +void dlm_free_lvb(char *p) { kfree(p); } @@ -51,7 +51,7 @@ void free_lvb(char *p) /* FIXME: have some minimal space built-in to rsb for the name and kmalloc a separate name if needed, like dentries are done */ -struct dlm_rsb *allocate_rsb(struct dlm_ls *ls, int namelen) +struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen) { struct dlm_rsb *r; @@ -61,14 +61,14 @@ struct dlm_rsb *allocate_rsb(struct dlm_ls *ls, int namelen) return r; } -void free_rsb(struct dlm_rsb *r) +void dlm_free_rsb(struct dlm_rsb *r) { if (r->res_lvbptr) - free_lvb(r->res_lvbptr); + dlm_free_lvb(r->res_lvbptr); kfree(r); } -struct dlm_lkb *allocate_lkb(struct dlm_ls *ls) +struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls) { struct dlm_lkb *lkb; @@ -76,7 +76,7 @@ struct dlm_lkb *allocate_lkb(struct dlm_ls *ls) return lkb; } -void free_lkb(struct dlm_lkb *lkb) +void dlm_free_lkb(struct dlm_lkb *lkb) { if (lkb->lkb_flags & DLM_IFL_USER) { struct dlm_user_args *ua; @@ -90,19 +90,3 @@ void free_lkb(struct dlm_lkb *lkb) kmem_cache_free(lkb_cache, lkb); } -struct dlm_direntry *allocate_direntry(struct dlm_ls *ls, int namelen) -{ - struct dlm_direntry *de; - - DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN, - printk("namelen = %d\n", namelen);); - - de = kzalloc(sizeof(*de) + namelen, GFP_KERNEL); - return de; -} - -void free_direntry(struct dlm_direntry *de) -{ - kfree(de); -} - diff --git a/fs/dlm/memory.h b/fs/dlm/memory.h index 6ead158ccc5c..485fb29143bd 100644 --- a/fs/dlm/memory.h +++ b/fs/dlm/memory.h @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -16,14 +16,12 @@ int dlm_memory_init(void); void dlm_memory_exit(void); -struct dlm_rsb *allocate_rsb(struct dlm_ls *ls, int namelen); -void free_rsb(struct dlm_rsb *r); -struct dlm_lkb *allocate_lkb(struct dlm_ls *ls); -void free_lkb(struct dlm_lkb *l); -struct dlm_direntry *allocate_direntry(struct dlm_ls *ls, int namelen); -void free_direntry(struct dlm_direntry *de); -char *allocate_lvb(struct dlm_ls *ls); -void free_lvb(char *l); +struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen); +void dlm_free_rsb(struct dlm_rsb *r); +struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls); +void dlm_free_lkb(struct dlm_lkb *l); +char *dlm_allocate_lvb(struct dlm_ls *ls); +void dlm_free_lvb(char *l); #endif /* __MEMORY_DOT_H__ */ diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c index f8c69dda16a0..e69926e984db 100644 --- a/fs/dlm/midcomms.c +++ b/fs/dlm/midcomms.c @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -58,8 +58,12 @@ static void copy_from_cb(void *dst, const void *base, unsigned offset, int dlm_process_incoming_buffer(int nodeid, const void *base, unsigned offset, unsigned len, unsigned limit) { - unsigned char __tmp[DLM_INBUF_LEN]; - struct dlm_header *msg = (struct dlm_header *) __tmp; + union { + unsigned char __buf[DLM_INBUF_LEN]; + /* this is to force proper alignment on some arches */ + struct dlm_header dlm; + } __tmp; + struct dlm_header *msg = &__tmp.dlm; int ret = 0; int err = 0; uint16_t msglen; @@ -100,8 +104,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base, in the buffer on the stack (which should work for most ordinary messages). */ - if (msglen > sizeof(__tmp) && - msg == (struct dlm_header *) __tmp) { + if (msglen > DLM_INBUF_LEN && msg == &__tmp.dlm) { msg = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL); if (msg == NULL) return ret; @@ -119,7 +122,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base, dlm_receive_buffer(msg, nodeid); } - if (msg != (struct dlm_header *) __tmp) + if (msg != &__tmp.dlm) kfree(msg); return err ? err : ret; diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index ae2fd97fa4ad..026824cd3acb 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. +** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -197,11 +197,6 @@ static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) spin_unlock(&ls->ls_rcom_spin); } -static void receive_rcom_status_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) -{ - receive_sync_reply(ls, rc_in); -} - int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) { struct dlm_rcom *rc; @@ -254,11 +249,6 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in) send_rcom(ls, mh, rc); } -static void receive_rcom_names_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) -{ - receive_sync_reply(ls, rc_in); -} - int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid) { struct dlm_rcom *rc; @@ -381,11 +371,6 @@ static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in) send_rcom(ls, mh, rc); } -static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) -{ - dlm_recover_process_copy(ls, rc_in); -} - /* If the lockspace doesn't exist then still send a status message back; it's possible that it just doesn't have its global_id yet. */ @@ -481,11 +466,11 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) break; case DLM_RCOM_STATUS_REPLY: - receive_rcom_status_reply(ls, rc); + receive_sync_reply(ls, rc); break; case DLM_RCOM_NAMES_REPLY: - receive_rcom_names_reply(ls, rc); + receive_sync_reply(ls, rc); break; case DLM_RCOM_LOOKUP_REPLY: @@ -493,11 +478,11 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) break; case DLM_RCOM_LOCK_REPLY: - receive_rcom_lock_reply(ls, rc); + dlm_recover_process_copy(ls, rc); break; default: - DLM_ASSERT(0, printk("rc_type=%x\n", rc->rc_type);); + log_error(ls, "receive_rcom bad type %d", rc->rc_type); } out: return; diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index c2cc7694cd16..df075dc300fa 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c @@ -629,7 +629,7 @@ static void recover_lvb(struct dlm_rsb *r) goto out; if (!r->res_lvbptr) { - r->res_lvbptr = allocate_lvb(r->res_ls); + r->res_lvbptr = dlm_allocate_lvb(r->res_ls); if (!r->res_lvbptr) goto out; } @@ -731,6 +731,20 @@ int dlm_create_root_list(struct dlm_ls *ls) list_add(&r->res_root_list, &ls->ls_root_list); dlm_hold_rsb(r); } + + /* If we're using a directory, add tossed rsbs to the root + list; they'll have entries created in the new directory, + but no other recovery steps should do anything with them. */ + + if (dlm_no_directory(ls)) { + read_unlock(&ls->ls_rsbtbl[i].lock); + continue; + } + + list_for_each_entry(r, &ls->ls_rsbtbl[i].toss, res_hashchain) { + list_add(&r->res_root_list, &ls->ls_root_list); + dlm_hold_rsb(r); + } read_unlock(&ls->ls_rsbtbl[i].lock); } out: @@ -750,6 +764,11 @@ void dlm_release_root_list(struct dlm_ls *ls) up_write(&ls->ls_root_sem); } +/* If not using a directory, clear the entire toss list, there's no benefit to + caching the master value since it's fixed. If we are using a dir, keep the + rsb's we're the master of. Recovery will add them to the root list and from + there they'll be entered in the rebuilt directory. */ + void dlm_clear_toss_list(struct dlm_ls *ls) { struct dlm_rsb *r, *safe; @@ -759,8 +778,10 @@ void dlm_clear_toss_list(struct dlm_ls *ls) write_lock(&ls->ls_rsbtbl[i].lock); list_for_each_entry_safe(r, safe, &ls->ls_rsbtbl[i].toss, res_hashchain) { - list_del(&r->res_hashchain); - free_rsb(r); + if (dlm_no_directory(ls) || !is_master(r)) { + list_del(&r->res_hashchain); + dlm_free_rsb(r); + } } write_unlock(&ls->ls_rsbtbl[i].lock); } diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 4b89e20eebe7..997f9531d594 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c @@ -67,17 +67,18 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) dlm_astd_resume(); /* - * This list of root rsb's will be the basis of most of the recovery - * routines. + * Free non-master tossed rsb's. Master rsb's are kept on toss + * list and put on root list to be included in resdir recovery. */ - dlm_create_root_list(ls); + dlm_clear_toss_list(ls); /* - * Free all the tossed rsb's so we don't have to recover them. + * This list of root rsb's will be the basis of most of the recovery + * routines. */ - dlm_clear_toss_list(ls); + dlm_create_root_list(ls); /* * Add or remove nodes from the lockspace's ls_nodes list. diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 4f741546f4bb..7cbc6826239b 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -24,8 +24,7 @@ #include "lvb_table.h" #include "user.h" -static const char *name_prefix="dlm"; -static struct miscdevice ctl_device; +static const char name_prefix[] = "dlm"; static const struct file_operations device_fops; #ifdef CONFIG_COMPAT @@ -82,7 +81,8 @@ struct dlm_lock_result32 { }; static void compat_input(struct dlm_write_request *kb, - struct dlm_write_request32 *kb32) + struct dlm_write_request32 *kb32, + int max_namelen) { kb->version[0] = kb32->version[0]; kb->version[1] = kb32->version[1]; @@ -112,7 +112,11 @@ static void compat_input(struct dlm_write_request *kb, kb->i.lock.bastaddr = (void *)(long)kb32->i.lock.bastaddr; kb->i.lock.lksb = (void *)(long)kb32->i.lock.lksb; memcpy(kb->i.lock.lvb, kb32->i.lock.lvb, DLM_USER_LVB_LEN); - memcpy(kb->i.lock.name, kb32->i.lock.name, kb->i.lock.namelen); + if (kb->i.lock.namelen <= max_namelen) + memcpy(kb->i.lock.name, kb32->i.lock.name, + kb->i.lock.namelen); + else + kb->i.lock.namelen = max_namelen; } } @@ -236,12 +240,12 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type) spin_unlock(&proc->asts_spin); if (eol) { - spin_lock(&ua->proc->locks_spin); + spin_lock(&proc->locks_spin); if (!list_empty(&lkb->lkb_ownqueue)) { list_del_init(&lkb->lkb_ownqueue); dlm_put_lkb(lkb); } - spin_unlock(&ua->proc->locks_spin); + spin_unlock(&proc->locks_spin); } out: mutex_unlock(&ls->ls_clear_proc_locks); @@ -529,7 +533,8 @@ static ssize_t device_write(struct file *file, const char __user *buf, if (proc) set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags); - compat_input(kbuf, k32buf); + compat_input(kbuf, k32buf, + count - sizeof(struct dlm_write_request32)); kfree(k32buf); } #endif @@ -896,14 +901,16 @@ static const struct file_operations ctl_device_fops = { .owner = THIS_MODULE, }; +static struct miscdevice ctl_device = { + .name = "dlm-control", + .fops = &ctl_device_fops, + .minor = MISC_DYNAMIC_MINOR, +}; + int dlm_user_init(void) { int error; - ctl_device.name = "dlm-control"; - ctl_device.fops = &ctl_device_fops; - ctl_device.minor = MISC_DYNAMIC_MINOR; - error = misc_register(&ctl_device); if (error) log_print("misc_register failed for control device"); diff --git a/fs/dlm/util.c b/fs/dlm/util.c index 963889cf6740..4d9c1f4e1bd1 100644 --- a/fs/dlm/util.c +++ b/fs/dlm/util.c @@ -1,7 +1,7 @@ /****************************************************************************** ******************************************************************************* ** -** Copyright (C) 2005 Red Hat, Inc. All rights reserved. +** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -14,6 +14,14 @@ #include "rcom.h" #include "util.h" +#define DLM_ERRNO_EDEADLK 35 +#define DLM_ERRNO_EBADR 53 +#define DLM_ERRNO_EBADSLT 57 +#define DLM_ERRNO_EPROTO 71 +#define DLM_ERRNO_EOPNOTSUPP 95 +#define DLM_ERRNO_ETIMEDOUT 110 +#define DLM_ERRNO_EINPROGRESS 115 + static void header_out(struct dlm_header *hd) { hd->h_version = cpu_to_le32(hd->h_version); @@ -30,11 +38,54 @@ static void header_in(struct dlm_header *hd) hd->h_length = le16_to_cpu(hd->h_length); } -void dlm_message_out(struct dlm_message *ms) +/* higher errno values are inconsistent across architectures, so select + one set of values for on the wire */ + +static int to_dlm_errno(int err) +{ + switch (err) { + case -EDEADLK: + return -DLM_ERRNO_EDEADLK; + case -EBADR: + return -DLM_ERRNO_EBADR; + case -EBADSLT: + return -DLM_ERRNO_EBADSLT; + case -EPROTO: + return -DLM_ERRNO_EPROTO; + case -EOPNOTSUPP: + return -DLM_ERRNO_EOPNOTSUPP; + case -ETIMEDOUT: + return -DLM_ERRNO_ETIMEDOUT; + case -EINPROGRESS: + return -DLM_ERRNO_EINPROGRESS; + } + return err; +} + +static int from_dlm_errno(int err) { - struct dlm_header *hd = (struct dlm_header *) ms; + switch (err) { + case -DLM_ERRNO_EDEADLK: + return -EDEADLK; + case -DLM_ERRNO_EBADR: + return -EBADR; + case -DLM_ERRNO_EBADSLT: + return -EBADSLT; + case -DLM_ERRNO_EPROTO: + return -EPROTO; + case -DLM_ERRNO_EOPNOTSUPP: + return -EOPNOTSUPP; + case -DLM_ERRNO_ETIMEDOUT: + return -ETIMEDOUT; + case -DLM_ERRNO_EINPROGRESS: + return -EINPROGRESS; + } + return err; +} - header_out(hd); +void dlm_message_out(struct dlm_message *ms) +{ + header_out(&ms->m_header); ms->m_type = cpu_to_le32(ms->m_type); ms->m_nodeid = cpu_to_le32(ms->m_nodeid); @@ -53,14 +104,12 @@ void dlm_message_out(struct dlm_message *ms) ms->m_rqmode = cpu_to_le32(ms->m_rqmode); ms->m_bastmode = cpu_to_le32(ms->m_bastmode); ms->m_asts = cpu_to_le32(ms->m_asts); - ms->m_result = cpu_to_le32(ms->m_result); + ms->m_result = cpu_to_le32(to_dlm_errno(ms->m_result)); } void dlm_message_in(struct dlm_message *ms) { - struct dlm_header *hd = (struct dlm_header *) ms; - - header_in(hd); + header_in(&ms->m_header); ms->m_type = le32_to_cpu(ms->m_type); ms->m_nodeid = le32_to_cpu(ms->m_nodeid); @@ -79,7 +128,7 @@ void dlm_message_in(struct dlm_message *ms) ms->m_rqmode = le32_to_cpu(ms->m_rqmode); ms->m_bastmode = le32_to_cpu(ms->m_bastmode); ms->m_asts = le32_to_cpu(ms->m_asts); - ms->m_result = le32_to_cpu(ms->m_result); + ms->m_result = from_dlm_errno(le32_to_cpu(ms->m_result)); } static void rcom_lock_out(struct rcom_lock *rl) @@ -126,10 +175,9 @@ static void rcom_config_in(struct rcom_config *rf) void dlm_rcom_out(struct dlm_rcom *rc) { - struct dlm_header *hd = (struct dlm_header *) rc; int type = rc->rc_type; - header_out(hd); + header_out(&rc->rc_header); rc->rc_type = cpu_to_le32(rc->rc_type); rc->rc_result = cpu_to_le32(rc->rc_result); @@ -137,7 +185,7 @@ void dlm_rcom_out(struct dlm_rcom *rc) rc->rc_seq = cpu_to_le64(rc->rc_seq); rc->rc_seq_reply = cpu_to_le64(rc->rc_seq_reply); - if (type == DLM_RCOM_LOCK) + if ((type == DLM_RCOM_LOCK) || (type == DLM_RCOM_LOCK_REPLY)) rcom_lock_out((struct rcom_lock *) rc->rc_buf); else if (type == DLM_RCOM_STATUS_REPLY) @@ -146,9 +194,9 @@ void dlm_rcom_out(struct dlm_rcom *rc) void dlm_rcom_in(struct dlm_rcom *rc) { - struct dlm_header *hd = (struct dlm_header *) rc; + int type; - header_in(hd); + header_in(&rc->rc_header); rc->rc_type = le32_to_cpu(rc->rc_type); rc->rc_result = le32_to_cpu(rc->rc_result); @@ -156,10 +204,12 @@ void dlm_rcom_in(struct dlm_rcom *rc) rc->rc_seq = le64_to_cpu(rc->rc_seq); rc->rc_seq_reply = le64_to_cpu(rc->rc_seq_reply); - if (rc->rc_type == DLM_RCOM_LOCK) + type = rc->rc_type; + + if ((type == DLM_RCOM_LOCK) || (type == DLM_RCOM_LOCK_REPLY)) rcom_lock_in((struct rcom_lock *) rc->rc_buf); - else if (rc->rc_type == DLM_RCOM_STATUS_REPLY) + else if (type == DLM_RCOM_STATUS_REPLY) rcom_config_in((struct rcom_config *) rc->rc_buf); } diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 0f69c416eebc..a5432bbbfb88 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -347,7 +347,8 @@ restart: break; } retry = __process_buffer(journal, jh, bhs,&batch_count); - if (!retry && lock_need_resched(&journal->j_list_lock)){ + if (!retry && (need_resched() || + spin_needbreak(&journal->j_list_lock))) { spin_unlock(&journal->j_list_lock); retry = 1; break; diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 610264b99a8e..31853eb65b4c 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -265,7 +265,7 @@ write_out_data: put_bh(bh); } - if (lock_need_resched(&journal->j_list_lock)) { + if (need_resched() || spin_needbreak(&journal->j_list_lock)) { spin_unlock(&journal->j_list_lock); goto write_out_data; } diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 1b7f282c1ae9..6914598022ce 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -353,7 +353,8 @@ restart: } retry = __process_buffer(journal, jh, bhs, &batch_count, transaction); - if (!retry && lock_need_resched(&journal->j_list_lock)){ + if (!retry && (need_resched() || + spin_needbreak(&journal->j_list_lock))) { spin_unlock(&journal->j_list_lock); retry = 1; break; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index da8d0eb3b7b9..4f302d279279 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -341,7 +341,7 @@ write_out_data: put_bh(bh); } - if (lock_need_resched(&journal->j_list_lock)) { + if (need_resched() || spin_needbreak(&journal->j_list_lock)) { spin_unlock(&journal->j_list_lock); goto write_out_data; } diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index d070b18e539d..0b45fd3a4bfd 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -41,6 +41,48 @@ struct nlm_wait { static LIST_HEAD(nlm_blocked); +/** + * nlmclnt_init - Set up per-NFS mount point lockd data structures + * @nlm_init: pointer to arguments structure + * + * Returns pointer to an appropriate nlm_host struct, + * or an ERR_PTR value. + */ +struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init) +{ + struct nlm_host *host; + u32 nlm_version = (nlm_init->nfs_version == 2) ? 1 : 4; + int status; + + status = lockd_up(nlm_init->protocol); + if (status < 0) + return ERR_PTR(status); + + host = nlmclnt_lookup_host((struct sockaddr_in *)nlm_init->address, + nlm_init->protocol, nlm_version, + nlm_init->hostname, + strlen(nlm_init->hostname)); + if (host == NULL) { + lockd_down(); + return ERR_PTR(-ENOLCK); + } + + return host; +} +EXPORT_SYMBOL_GPL(nlmclnt_init); + +/** + * nlmclnt_done - Release resources allocated by nlmclnt_init() + * @host: nlm_host structure reserved by nlmclnt_init() + * + */ +void nlmclnt_done(struct nlm_host *host) +{ + nlm_release_host(host); + lockd_down(); +} +EXPORT_SYMBOL_GPL(nlmclnt_done); + /* * Queue up a lock for blocking so that the GRANTED request can see it */ diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index a10343bed160..b6b74a60e1eb 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -145,34 +145,21 @@ static void nlmclnt_release_lockargs(struct nlm_rqst *req) BUG_ON(req->a_args.lock.fl.fl_ops != NULL); } -/* - * This is the main entry point for the NLM client. +/** + * nlmclnt_proc - Perform a single client-side lock request + * @host: address of a valid nlm_host context representing the NLM server + * @cmd: fcntl-style file lock operation to perform + * @fl: address of arguments for the lock operation + * */ -int -nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl) +int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl) { - struct rpc_clnt *client = NFS_CLIENT(inode); - struct sockaddr_in addr; - struct nfs_server *nfssrv = NFS_SERVER(inode); - struct nlm_host *host; struct nlm_rqst *call; sigset_t oldset; unsigned long flags; - int status, vers; - - vers = (NFS_PROTO(inode)->version == 3) ? 4 : 1; - if (NFS_PROTO(inode)->version > 3) { - printk(KERN_NOTICE "NFSv4 file locking not implemented!\n"); - return -ENOLCK; - } - - rpc_peeraddr(client, (struct sockaddr *) &addr, sizeof(addr)); - host = nlmclnt_lookup_host(&addr, client->cl_xprt->prot, vers, - nfssrv->nfs_client->cl_hostname, - strlen(nfssrv->nfs_client->cl_hostname)); - if (host == NULL) - return -ENOLCK; + int status; + nlm_get_host(host); call = nlm_alloc_call(host); if (call == NULL) return -ENOMEM; @@ -219,7 +206,7 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl) dprintk("lockd: clnt proc returns %d\n", status); return status; } -EXPORT_SYMBOL(nlmclnt_proc); +EXPORT_SYMBOL_GPL(nlmclnt_proc); /* * Allocate an NLM RPC call struct @@ -257,7 +244,7 @@ void nlm_release_call(struct nlm_rqst *call) static void nlmclnt_rpc_release(void *data) { - return nlm_release_call(data); + nlm_release_call(data); } static int nlm_wait_on_grace(wait_queue_head_t *queue) diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 633653bff944..3e459e18cc31 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -612,8 +612,7 @@ const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie) * called with BKL held. */ static char buf[2*NLM_MAXCOOKIELEN+1]; - int i; - int len = sizeof(buf); + unsigned int i, len = sizeof(buf); char *p = buf; len--; /* allow for trailing \0 */ diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index a796be5051bf..9b6bbf1b9787 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -73,8 +73,6 @@ static void nfs_callback_svc(struct svc_rqst *rqstp) complete(&nfs_callback_info.started); for(;;) { - char buf[RPC_MAX_ADDRBUFLEN]; - if (signalled()) { if (nfs_callback_info.users == 0) break; @@ -92,8 +90,6 @@ static void nfs_callback_svc(struct svc_rqst *rqstp) __FUNCTION__, -err); break; } - dprintk("%s: request from %s\n", __FUNCTION__, - svc_print_addr(rqstp, buf, sizeof(buf))); svc_process(rqstp); } @@ -168,12 +164,11 @@ void nfs_callback_down(void) static int nfs_callback_authenticate(struct svc_rqst *rqstp) { - struct sockaddr_in *addr = svc_addr_in(rqstp); struct nfs_client *clp; char buf[RPC_MAX_ADDRBUFLEN]; /* Don't talk to strangers */ - clp = nfs_find_client(addr, 4); + clp = nfs_find_client(svc_addr(rqstp), 4); if (clp == NULL) return SVC_DROP; diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index c2bb14e053e1..bb25d2135ff1 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -38,7 +38,7 @@ struct cb_compound_hdr_res { }; struct cb_getattrargs { - struct sockaddr_in *addr; + struct sockaddr *addr; struct nfs_fh fh; uint32_t bitmap[2]; }; @@ -53,7 +53,7 @@ struct cb_getattrres { }; struct cb_recallargs { - struct sockaddr_in *addr; + struct sockaddr *addr; struct nfs_fh fh; nfs4_stateid stateid; uint32_t truncate; diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 72e55d83756d..15f7785048d3 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -12,7 +12,9 @@ #include "delegation.h" #include "internal.h" +#ifdef NFS_DEBUG #define NFSDBG_FACILITY NFSDBG_CALLBACK +#endif __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res) { @@ -20,12 +22,16 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres * struct nfs_delegation *delegation; struct nfs_inode *nfsi; struct inode *inode; - + res->bitmap[0] = res->bitmap[1] = 0; res->status = htonl(NFS4ERR_BADHANDLE); clp = nfs_find_client(args->addr, 4); if (clp == NULL) goto out; + + dprintk("NFS: GETATTR callback request from %s\n", + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); + inode = nfs_delegation_find_inode(clp, &args->fh); if (inode == NULL) goto out_putclient; @@ -65,23 +71,32 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy) clp = nfs_find_client(args->addr, 4); if (clp == NULL) goto out; - inode = nfs_delegation_find_inode(clp, &args->fh); - if (inode == NULL) - goto out_putclient; - /* Set up a helper thread to actually return the delegation */ - switch(nfs_async_inode_return_delegation(inode, &args->stateid)) { - case 0: - res = 0; - break; - case -ENOENT: - res = htonl(NFS4ERR_BAD_STATEID); - break; - default: - res = htonl(NFS4ERR_RESOURCE); - } - iput(inode); -out_putclient: - nfs_put_client(clp); + + dprintk("NFS: RECALL callback request from %s\n", + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); + + do { + struct nfs_client *prev = clp; + + inode = nfs_delegation_find_inode(clp, &args->fh); + if (inode != NULL) { + /* Set up a helper thread to actually return the delegation */ + switch(nfs_async_inode_return_delegation(inode, &args->stateid)) { + case 0: + res = 0; + break; + case -ENOENT: + if (res != 0) + res = htonl(NFS4ERR_BAD_STATEID); + break; + default: + res = htonl(NFS4ERR_RESOURCE); + } + iput(inode); + } + clp = nfs_find_client_next(prev); + nfs_put_client(prev); + } while (clp != NULL); out: dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res)); return res; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 058ade7efe79..c63eb720b68b 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -139,7 +139,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound if (unlikely(status != 0)) return status; /* We do not like overly long tags! */ - if (hdr->taglen > CB_OP_TAGLEN_MAXSZ-12 || hdr->taglen < 0) { + if (hdr->taglen > CB_OP_TAGLEN_MAXSZ - 12) { printk("NFSv4 CALLBACK %s: client sent tag of length %u\n", __FUNCTION__, hdr->taglen); return htonl(NFS4ERR_RESOURCE); @@ -176,7 +176,7 @@ static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr status = decode_fh(xdr, &args->fh); if (unlikely(status != 0)) goto out; - args->addr = svc_addr_in(rqstp); + args->addr = svc_addr(rqstp); status = decode_bitmap(xdr, args->bitmap); out: dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(status)); @@ -188,7 +188,7 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, __be32 *p; __be32 status; - args->addr = svc_addr_in(rqstp); + args->addr = svc_addr(rqstp); status = decode_stateid(xdr, &args->stateid); if (unlikely(status != 0)) goto out; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index a6f625497612..685c43f810c1 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -34,6 +34,8 @@ #include <linux/nfs_idmap.h> #include <linux/vfs.h> #include <linux/inet.h> +#include <linux/in6.h> +#include <net/ipv6.h> #include <linux/nfs_xdr.h> #include <asm/system.h> @@ -93,22 +95,30 @@ struct rpc_program nfsacl_program = { }; #endif /* CONFIG_NFS_V3_ACL */ +struct nfs_client_initdata { + const char *hostname; + const struct sockaddr *addr; + size_t addrlen; + const struct nfs_rpc_ops *rpc_ops; + int proto; +}; + /* * Allocate a shared client record * * Since these are allocated/deallocated very rarely, we don't * bother putting them in a slab cache... */ -static struct nfs_client *nfs_alloc_client(const char *hostname, - const struct sockaddr_in *addr, - int nfsversion) +static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) { struct nfs_client *clp; if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) goto error_0; - if (nfsversion == 4) { + clp->rpc_ops = cl_init->rpc_ops; + + if (cl_init->rpc_ops->version == 4) { if (nfs_callback_up() < 0) goto error_2; __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); @@ -117,11 +127,11 @@ static struct nfs_client *nfs_alloc_client(const char *hostname, atomic_set(&clp->cl_count, 1); clp->cl_cons_state = NFS_CS_INITING; - clp->cl_nfsversion = nfsversion; - memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr)); + memcpy(&clp->cl_addr, cl_init->addr, cl_init->addrlen); + clp->cl_addrlen = cl_init->addrlen; - if (hostname) { - clp->cl_hostname = kstrdup(hostname, GFP_KERNEL); + if (cl_init->hostname) { + clp->cl_hostname = kstrdup(cl_init->hostname, GFP_KERNEL); if (!clp->cl_hostname) goto error_3; } @@ -129,6 +139,8 @@ static struct nfs_client *nfs_alloc_client(const char *hostname, INIT_LIST_HEAD(&clp->cl_superblocks); clp->cl_rpcclient = ERR_PTR(-EINVAL); + clp->cl_proto = cl_init->proto; + #ifdef CONFIG_NFS_V4 init_rwsem(&clp->cl_sem); INIT_LIST_HEAD(&clp->cl_delegations); @@ -166,7 +178,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp) */ static void nfs_free_client(struct nfs_client *clp) { - dprintk("--> nfs_free_client(%d)\n", clp->cl_nfsversion); + dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version); nfs4_shutdown_client(clp); @@ -203,76 +215,148 @@ void nfs_put_client(struct nfs_client *clp) } } +static int nfs_sockaddr_match_ipaddr4(const struct sockaddr_in *sa1, + const struct sockaddr_in *sa2) +{ + return sa1->sin_addr.s_addr == sa2->sin_addr.s_addr; +} + +static int nfs_sockaddr_match_ipaddr6(const struct sockaddr_in6 *sa1, + const struct sockaddr_in6 *sa2) +{ + return ipv6_addr_equal(&sa1->sin6_addr, &sa2->sin6_addr); +} + +static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, + const struct sockaddr *sa2) +{ + switch (sa1->sa_family) { + case AF_INET: + return nfs_sockaddr_match_ipaddr4((const struct sockaddr_in *)sa1, + (const struct sockaddr_in *)sa2); + case AF_INET6: + return nfs_sockaddr_match_ipaddr6((const struct sockaddr_in6 *)sa1, + (const struct sockaddr_in6 *)sa2); + } + BUG(); +} + /* - * Find a client by address - * - caller must hold nfs_client_lock + * Find a client by IP address and protocol version + * - returns NULL if no such client */ -static struct nfs_client *__nfs_find_client(const struct sockaddr_in *addr, int nfsversion, int match_port) +struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion) { struct nfs_client *clp; + spin_lock(&nfs_client_lock); list_for_each_entry(clp, &nfs_client_list, cl_share_link) { + struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; + /* Don't match clients that failed to initialise properly */ - if (clp->cl_cons_state < 0) + if (clp->cl_cons_state != NFS_CS_READY) continue; /* Different NFS versions cannot share the same nfs_client */ - if (clp->cl_nfsversion != nfsversion) + if (clp->rpc_ops->version != nfsversion) continue; - if (memcmp(&clp->cl_addr.sin_addr, &addr->sin_addr, - sizeof(clp->cl_addr.sin_addr)) != 0) + if (addr->sa_family != clap->sa_family) + continue; + /* Match only the IP address, not the port number */ + if (!nfs_sockaddr_match_ipaddr(addr, clap)) continue; - if (!match_port || clp->cl_addr.sin_port == addr->sin_port) - goto found; + atomic_inc(&clp->cl_count); + spin_unlock(&nfs_client_lock); + return clp; } - + spin_unlock(&nfs_client_lock); return NULL; - -found: - atomic_inc(&clp->cl_count); - return clp; } /* * Find a client by IP address and protocol version * - returns NULL if no such client */ -struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversion) +struct nfs_client *nfs_find_client_next(struct nfs_client *clp) { - struct nfs_client *clp; + struct sockaddr *sap = (struct sockaddr *)&clp->cl_addr; + u32 nfsvers = clp->rpc_ops->version; spin_lock(&nfs_client_lock); - clp = __nfs_find_client(addr, nfsversion, 0); + list_for_each_entry_continue(clp, &nfs_client_list, cl_share_link) { + struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; + + /* Don't match clients that failed to initialise properly */ + if (clp->cl_cons_state != NFS_CS_READY) + continue; + + /* Different NFS versions cannot share the same nfs_client */ + if (clp->rpc_ops->version != nfsvers) + continue; + + if (sap->sa_family != clap->sa_family) + continue; + /* Match only the IP address, not the port number */ + if (!nfs_sockaddr_match_ipaddr(sap, clap)) + continue; + + atomic_inc(&clp->cl_count); + spin_unlock(&nfs_client_lock); + return clp; + } spin_unlock(&nfs_client_lock); - if (clp != NULL && clp->cl_cons_state != NFS_CS_READY) { - nfs_put_client(clp); - clp = NULL; + return NULL; +} + +/* + * Find an nfs_client on the list that matches the initialisation data + * that is supplied. + */ +static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *data) +{ + struct nfs_client *clp; + + list_for_each_entry(clp, &nfs_client_list, cl_share_link) { + /* Don't match clients that failed to initialise properly */ + if (clp->cl_cons_state < 0) + continue; + + /* Different NFS versions cannot share the same nfs_client */ + if (clp->rpc_ops != data->rpc_ops) + continue; + + if (clp->cl_proto != data->proto) + continue; + + /* Match the full socket address */ + if (memcmp(&clp->cl_addr, data->addr, sizeof(clp->cl_addr)) != 0) + continue; + + atomic_inc(&clp->cl_count); + return clp; } - return clp; + return NULL; } /* * Look up a client by IP address and protocol version * - creates a new record if one doesn't yet exist */ -static struct nfs_client *nfs_get_client(const char *hostname, - const struct sockaddr_in *addr, - int nfsversion) +static struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) { struct nfs_client *clp, *new = NULL; int error; - dprintk("--> nfs_get_client(%s,"NIPQUAD_FMT":%d,%d)\n", - hostname ?: "", NIPQUAD(addr->sin_addr), - addr->sin_port, nfsversion); + dprintk("--> nfs_get_client(%s,v%u)\n", + cl_init->hostname ?: "", cl_init->rpc_ops->version); /* see if the client already exists */ do { spin_lock(&nfs_client_lock); - clp = __nfs_find_client(addr, nfsversion, 1); + clp = nfs_match_client(cl_init); if (clp) goto found_client; if (new) @@ -280,7 +364,7 @@ static struct nfs_client *nfs_get_client(const char *hostname, spin_unlock(&nfs_client_lock); - new = nfs_alloc_client(hostname, addr, nfsversion); + new = nfs_alloc_client(cl_init); } while (new); return ERR_PTR(-ENOMEM); @@ -344,12 +428,16 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, switch (proto) { case XPRT_TRANSPORT_TCP: case XPRT_TRANSPORT_RDMA: - if (!to->to_initval) + if (to->to_initval == 0) to->to_initval = 60 * HZ; if (to->to_initval > NFS_MAX_TCP_TIMEOUT) to->to_initval = NFS_MAX_TCP_TIMEOUT; to->to_increment = to->to_initval; to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); + if (to->to_maxval > NFS_MAX_TCP_TIMEOUT) + to->to_maxval = NFS_MAX_TCP_TIMEOUT; + if (to->to_maxval < to->to_initval) + to->to_maxval = to->to_initval; to->to_exponential = 0; break; case XPRT_TRANSPORT_UDP: @@ -367,19 +455,17 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, /* * Create an RPC client handle */ -static int nfs_create_rpc_client(struct nfs_client *clp, int proto, - unsigned int timeo, - unsigned int retrans, - rpc_authflavor_t flavor, - int flags) +static int nfs_create_rpc_client(struct nfs_client *clp, + const struct rpc_timeout *timeparms, + rpc_authflavor_t flavor, + int flags) { - struct rpc_timeout timeparms; struct rpc_clnt *clnt = NULL; struct rpc_create_args args = { - .protocol = proto, + .protocol = clp->cl_proto, .address = (struct sockaddr *)&clp->cl_addr, - .addrsize = sizeof(clp->cl_addr), - .timeout = &timeparms, + .addrsize = clp->cl_addrlen, + .timeout = timeparms, .servername = clp->cl_hostname, .program = &nfs_program, .version = clp->rpc_ops->version, @@ -390,10 +476,6 @@ static int nfs_create_rpc_client(struct nfs_client *clp, int proto, if (!IS_ERR(clp->cl_rpcclient)) return 0; - nfs_init_timeout_values(&timeparms, proto, timeo, retrans); - clp->retrans_timeo = timeparms.to_initval; - clp->retrans_count = timeparms.to_retries; - clnt = rpc_create(&args); if (IS_ERR(clnt)) { dprintk("%s: cannot create RPC client. Error = %ld\n", @@ -411,7 +493,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp, int proto, static void nfs_destroy_server(struct nfs_server *server) { if (!(server->flags & NFS_MOUNT_NONLM)) - lockd_down(); /* release rpc.lockd */ + nlmclnt_done(server->nlm_host); } /* @@ -419,20 +501,29 @@ static void nfs_destroy_server(struct nfs_server *server) */ static int nfs_start_lockd(struct nfs_server *server) { - int error = 0; + struct nlm_host *host; + struct nfs_client *clp = server->nfs_client; + struct nlmclnt_initdata nlm_init = { + .hostname = clp->cl_hostname, + .address = (struct sockaddr *)&clp->cl_addr, + .addrlen = clp->cl_addrlen, + .protocol = server->flags & NFS_MOUNT_TCP ? + IPPROTO_TCP : IPPROTO_UDP, + .nfs_version = clp->rpc_ops->version, + }; - if (server->nfs_client->cl_nfsversion > 3) - goto out; + if (nlm_init.nfs_version > 3) + return 0; if (server->flags & NFS_MOUNT_NONLM) - goto out; - error = lockd_up((server->flags & NFS_MOUNT_TCP) ? - IPPROTO_TCP : IPPROTO_UDP); - if (error < 0) - server->flags |= NFS_MOUNT_NONLM; - else - server->destroy = nfs_destroy_server; -out: - return error; + return 0; + + host = nlmclnt_init(&nlm_init); + if (IS_ERR(host)) + return PTR_ERR(host); + + server->nlm_host = host; + server->destroy = nfs_destroy_server; + return 0; } /* @@ -441,7 +532,7 @@ out: #ifdef CONFIG_NFS_V3_ACL static void nfs_init_server_aclclient(struct nfs_server *server) { - if (server->nfs_client->cl_nfsversion != 3) + if (server->nfs_client->rpc_ops->version != 3) goto out_noacl; if (server->flags & NFS_MOUNT_NOACL) goto out_noacl; @@ -468,7 +559,9 @@ static inline void nfs_init_server_aclclient(struct nfs_server *server) /* * Create a general RPC client */ -static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t pseudoflavour) +static int nfs_init_server_rpcclient(struct nfs_server *server, + const struct rpc_timeout *timeo, + rpc_authflavor_t pseudoflavour) { struct nfs_client *clp = server->nfs_client; @@ -478,6 +571,11 @@ static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t return PTR_ERR(server->client); } + memcpy(&server->client->cl_timeout_default, + timeo, + sizeof(server->client->cl_timeout_default)); + server->client->cl_timeout = &server->client->cl_timeout_default; + if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) { struct rpc_auth *auth; @@ -502,6 +600,7 @@ static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t * Initialise an NFS2 or NFS3 client */ static int nfs_init_client(struct nfs_client *clp, + const struct rpc_timeout *timeparms, const struct nfs_parsed_mount_data *data) { int error; @@ -512,18 +611,11 @@ static int nfs_init_client(struct nfs_client *clp, return 0; } - /* Check NFS protocol revision and initialize RPC op vector */ - clp->rpc_ops = &nfs_v2_clientops; -#ifdef CONFIG_NFS_V3 - if (clp->cl_nfsversion == 3) - clp->rpc_ops = &nfs_v3_clientops; -#endif /* * Create a client RPC handle for doing FSSTAT with UNIX auth only * - RFC 2623, sec 2.3.2 */ - error = nfs_create_rpc_client(clp, data->nfs_server.protocol, - data->timeo, data->retrans, RPC_AUTH_UNIX, 0); + error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, 0); if (error < 0) goto error; nfs_mark_client_ready(clp, NFS_CS_READY); @@ -541,25 +633,34 @@ error: static int nfs_init_server(struct nfs_server *server, const struct nfs_parsed_mount_data *data) { + struct nfs_client_initdata cl_init = { + .hostname = data->nfs_server.hostname, + .addr = (const struct sockaddr *)&data->nfs_server.address, + .addrlen = data->nfs_server.addrlen, + .rpc_ops = &nfs_v2_clientops, + .proto = data->nfs_server.protocol, + }; + struct rpc_timeout timeparms; struct nfs_client *clp; - int error, nfsvers = 2; + int error; dprintk("--> nfs_init_server()\n"); #ifdef CONFIG_NFS_V3 if (data->flags & NFS_MOUNT_VER3) - nfsvers = 3; + cl_init.rpc_ops = &nfs_v3_clientops; #endif /* Allocate or find a client reference we can use */ - clp = nfs_get_client(data->nfs_server.hostname, - &data->nfs_server.address, nfsvers); + clp = nfs_get_client(&cl_init); if (IS_ERR(clp)) { dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); return PTR_ERR(clp); } - error = nfs_init_client(clp, data); + nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, + data->timeo, data->retrans); + error = nfs_init_client(clp, &timeparms, data); if (error < 0) goto error; @@ -583,7 +684,7 @@ static int nfs_init_server(struct nfs_server *server, if (error < 0) goto error; - error = nfs_init_server_rpcclient(server, data->auth_flavors[0]); + error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]); if (error < 0) goto error; @@ -729,6 +830,9 @@ static struct nfs_server *nfs_alloc_server(void) INIT_LIST_HEAD(&server->client_link); INIT_LIST_HEAD(&server->master_link); + init_waitqueue_head(&server->active_wq); + atomic_set(&server->active, 0); + server->io_stats = nfs_alloc_iostats(); if (!server->io_stats) { kfree(server); @@ -840,7 +944,7 @@ error: * Initialise an NFS4 client record */ static int nfs4_init_client(struct nfs_client *clp, - int proto, int timeo, int retrans, + const struct rpc_timeout *timeparms, const char *ip_addr, rpc_authflavor_t authflavour) { @@ -855,7 +959,7 @@ static int nfs4_init_client(struct nfs_client *clp, /* Check NFS protocol revision and initialize RPC op vector */ clp->rpc_ops = &nfs_v4_clientops; - error = nfs_create_rpc_client(clp, proto, timeo, retrans, authflavour, + error = nfs_create_rpc_client(clp, timeparms, authflavour, RPC_CLNT_CREATE_DISCRTRY); if (error < 0) goto error; @@ -882,23 +986,32 @@ error: * Set up an NFS4 client */ static int nfs4_set_client(struct nfs_server *server, - const char *hostname, const struct sockaddr_in *addr, + const char *hostname, + const struct sockaddr *addr, + const size_t addrlen, const char *ip_addr, rpc_authflavor_t authflavour, - int proto, int timeo, int retrans) + int proto, const struct rpc_timeout *timeparms) { + struct nfs_client_initdata cl_init = { + .hostname = hostname, + .addr = addr, + .addrlen = addrlen, + .rpc_ops = &nfs_v4_clientops, + .proto = proto, + }; struct nfs_client *clp; int error; dprintk("--> nfs4_set_client()\n"); /* Allocate or find a client reference we can use */ - clp = nfs_get_client(hostname, addr, 4); + clp = nfs_get_client(&cl_init); if (IS_ERR(clp)) { error = PTR_ERR(clp); goto error; } - error = nfs4_init_client(clp, proto, timeo, retrans, ip_addr, authflavour); + error = nfs4_init_client(clp, timeparms, ip_addr, authflavour); if (error < 0) goto error_put; @@ -919,10 +1032,26 @@ error: static int nfs4_init_server(struct nfs_server *server, const struct nfs_parsed_mount_data *data) { + struct rpc_timeout timeparms; int error; dprintk("--> nfs4_init_server()\n"); + nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, + data->timeo, data->retrans); + + /* Get a client record */ + error = nfs4_set_client(server, + data->nfs_server.hostname, + (const struct sockaddr *)&data->nfs_server.address, + data->nfs_server.addrlen, + data->client_address, + data->auth_flavors[0], + data->nfs_server.protocol, + &timeparms); + if (error < 0) + goto error; + /* Initialise the client representation from the mount data */ server->flags = data->flags & NFS_MOUNT_FLAGMASK; server->caps |= NFS_CAP_ATOMIC_OPEN; @@ -937,8 +1066,9 @@ static int nfs4_init_server(struct nfs_server *server, server->acdirmin = data->acdirmin * HZ; server->acdirmax = data->acdirmax * HZ; - error = nfs_init_server_rpcclient(server, data->auth_flavors[0]); + error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]); +error: /* Done */ dprintk("<-- nfs4_init_server() = %d\n", error); return error; @@ -961,17 +1091,6 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, if (!server) return ERR_PTR(-ENOMEM); - /* Get a client record */ - error = nfs4_set_client(server, - data->nfs_server.hostname, - &data->nfs_server.address, - data->client_address, - data->auth_flavors[0], - data->nfs_server.protocol, - data->timeo, data->retrans); - if (error < 0) - goto error; - /* set up the general RPC client */ error = nfs4_init_server(server, data); if (error < 0) @@ -1039,12 +1158,13 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, /* Get a client representation. * Note: NFSv4 always uses TCP, */ - error = nfs4_set_client(server, data->hostname, data->addr, - parent_client->cl_ipaddr, - data->authflavor, - parent_server->client->cl_xprt->prot, - parent_client->retrans_timeo, - parent_client->retrans_count); + error = nfs4_set_client(server, data->hostname, + data->addr, + data->addrlen, + parent_client->cl_ipaddr, + data->authflavor, + parent_server->client->cl_xprt->prot, + parent_server->client->cl_timeout); if (error < 0) goto error; @@ -1052,7 +1172,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, nfs_server_copy_userdata(server, parent_server); server->caps |= NFS_CAP_ATOMIC_OPEN; - error = nfs_init_server_rpcclient(server, data->authflavor); + error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor); if (error < 0) goto error; @@ -1121,7 +1241,9 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, server->fsid = fattr->fsid; - error = nfs_init_server_rpcclient(server, source->client->cl_auth->au_flavor); + error = nfs_init_server_rpcclient(server, + source->client->cl_timeout, + source->client->cl_auth->au_flavor); if (error < 0) goto out_free_server; if (!IS_ERR(source->client_acl)) @@ -1263,10 +1385,10 @@ static int nfs_server_list_show(struct seq_file *m, void *v) /* display one transport per line on subsequent lines */ clp = list_entry(v, struct nfs_client, cl_share_link); - seq_printf(m, "v%d %02x%02x%02x%02x %4hx %3d %s\n", - clp->cl_nfsversion, - NIPQUAD(clp->cl_addr.sin_addr), - ntohs(clp->cl_addr.sin_port), + seq_printf(m, "v%u %s %s %3d %s\n", + clp->rpc_ops->version, + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), atomic_read(&clp->cl_count), clp->cl_hostname); @@ -1342,10 +1464,10 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) (unsigned long long) server->fsid.major, (unsigned long long) server->fsid.minor); - seq_printf(m, "v%d %02x%02x%02x%02x %4hx %-7s %-17s\n", - clp->cl_nfsversion, - NIPQUAD(clp->cl_addr.sin_addr), - ntohs(clp->cl_addr.sin_port), + seq_printf(m, "v%u %s %s %-7s %-17s\n", + clp->rpc_ops->version, + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), dev, fsid); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 11833f4caeaa..b9eadd18ba70 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -125,6 +125,32 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st put_rpccred(oldcred); } +static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) +{ + int res = 0; + + res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid, issync); + nfs_free_delegation(delegation); + return res; +} + +static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid) +{ + struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation); + + if (delegation == NULL) + goto nomatch; + if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data, + sizeof(delegation->stateid.data)) != 0) + goto nomatch; + list_del_rcu(&delegation->super_list); + nfsi->delegation_state = 0; + rcu_assign_pointer(nfsi->delegation, NULL); + return delegation; +nomatch: + return NULL; +} + /* * Set up a delegation on an inode */ @@ -133,6 +159,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; + struct nfs_delegation *freeme = NULL; int status = 0; delegation = kmalloc(sizeof(*delegation), GFP_KERNEL); @@ -147,41 +174,45 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct delegation->inode = inode; spin_lock(&clp->cl_lock); - if (rcu_dereference(nfsi->delegation) == NULL) { - list_add_rcu(&delegation->super_list, &clp->cl_delegations); - nfsi->delegation_state = delegation->type; - rcu_assign_pointer(nfsi->delegation, delegation); - delegation = NULL; - } else { + if (rcu_dereference(nfsi->delegation) != NULL) { if (memcmp(&delegation->stateid, &nfsi->delegation->stateid, - sizeof(delegation->stateid)) != 0 || - delegation->type != nfsi->delegation->type) { - printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n", - __FUNCTION__, NIPQUAD(clp->cl_addr.sin_addr)); - status = -EIO; + sizeof(delegation->stateid)) == 0 && + delegation->type == nfsi->delegation->type) { + goto out; + } + /* + * Deal with broken servers that hand out two + * delegations for the same file. + */ + dfprintk(FILE, "%s: server %s handed out " + "a duplicate delegation!\n", + __FUNCTION__, clp->cl_hostname); + if (delegation->type <= nfsi->delegation->type) { + freeme = delegation; + delegation = NULL; + goto out; } + freeme = nfs_detach_delegation_locked(nfsi, NULL); } + list_add_rcu(&delegation->super_list, &clp->cl_delegations); + nfsi->delegation_state = delegation->type; + rcu_assign_pointer(nfsi->delegation, delegation); + delegation = NULL; /* Ensure we revalidate the attributes and page cache! */ spin_lock(&inode->i_lock); nfsi->cache_validity |= NFS_INO_REVAL_FORCED; spin_unlock(&inode->i_lock); +out: spin_unlock(&clp->cl_lock); if (delegation != NULL) nfs_free_delegation(delegation); + if (freeme != NULL) + nfs_do_return_delegation(inode, freeme, 0); return status; } -static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation) -{ - int res = 0; - - res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid); - nfs_free_delegation(delegation); - return res; -} - /* Sync all data to disk upon delegation return */ static void nfs_msync_inode(struct inode *inode) { @@ -207,24 +238,28 @@ static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegat up_read(&clp->cl_sem); nfs_msync_inode(inode); - return nfs_do_return_delegation(inode, delegation); + return nfs_do_return_delegation(inode, delegation, 1); } -static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid) +/* + * This function returns the delegation without reclaiming opens + * or protecting against delegation reclaims. + * It is therefore really only safe to be called from + * nfs4_clear_inode() + */ +void nfs_inode_return_delegation_noreclaim(struct inode *inode) { - struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation); + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_delegation *delegation; - if (delegation == NULL) - goto nomatch; - if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data, - sizeof(delegation->stateid.data)) != 0) - goto nomatch; - list_del_rcu(&delegation->super_list); - nfsi->delegation_state = 0; - rcu_assign_pointer(nfsi->delegation, NULL); - return delegation; -nomatch: - return NULL; + if (rcu_dereference(nfsi->delegation) != NULL) { + spin_lock(&clp->cl_lock); + delegation = nfs_detach_delegation_locked(nfsi, NULL); + spin_unlock(&clp->cl_lock); + if (delegation != NULL) + nfs_do_return_delegation(inode, delegation, 0); + } } int nfs_inode_return_delegation(struct inode *inode) @@ -314,8 +349,9 @@ void nfs_expire_all_delegations(struct nfs_client *clp) __module_get(THIS_MODULE); atomic_inc(&clp->cl_count); task = kthread_run(nfs_do_expire_all_delegations, clp, - "%u.%u.%u.%u-delegreturn", - NIPQUAD(clp->cl_addr.sin_addr)); + "%s-delegreturn", + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR)); if (!IS_ERR(task)) return; nfs_put_client(clp); @@ -386,7 +422,7 @@ static int recall_thread(void *data) nfs_msync_inode(inode); if (delegation != NULL) - nfs_do_return_delegation(inode, delegation); + nfs_do_return_delegation(inode, delegation, 1); iput(inode); module_put_and_exit(0); } diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 5874ce7fdbae..f1c5e2a5d88e 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -29,6 +29,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); int nfs_inode_return_delegation(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); +void nfs_inode_return_delegation_noreclaim(struct inode *inode); struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); void nfs_return_all_delegations(struct super_block *sb); @@ -39,7 +40,7 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp); void nfs_delegation_reap_unclaimed(struct nfs_client *clp); /* NFSv4 delegation-related procedures */ -int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid); +int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f697b5c74b7c..476cb0f837fd 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -192,7 +192,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) /* We requested READDIRPLUS, but the server doesn't grok it */ if (error == -ENOTSUPP && desc->plus) { NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS; - clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); + clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); desc->plus = 0; goto again; } @@ -537,12 +537,6 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) lock_kernel(); - res = nfs_revalidate_mapping_nolock(inode, filp->f_mapping); - if (res < 0) { - unlock_kernel(); - return res; - } - /* * filp->f_pos points to the dirent entry number. * *desc->dir_cookie has the cookie for the next entry. We have @@ -564,6 +558,10 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) desc->entry = &my_entry; nfs_block_sillyrename(dentry); + res = nfs_revalidate_mapping_nolock(inode, filp->f_mapping); + if (res < 0) + goto out; + while(!desc->entry->eof) { res = readdir_search_pagecache(desc); @@ -579,7 +577,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) break; } if (res == -ETOOSMALL && desc->plus) { - clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); + clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); nfs_zap_caches(inode); desc->plus = 0; desc->entry->eof = 0; @@ -594,6 +592,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) break; } } +out: nfs_unblock_sillyrename(dentry); unlock_kernel(); if (res > 0) @@ -639,6 +638,21 @@ static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) return 0; } +/** + * nfs_force_lookup_revalidate - Mark the directory as having changed + * @dir - pointer to directory inode + * + * This forces the revalidation code in nfs_lookup_revalidate() to do a + * full lookup on all child dentries of 'dir' whenever a change occurs + * on the server that might have invalidated our dcache. + * + * The caller should be holding dir->i_lock + */ +void nfs_force_lookup_revalidate(struct inode *dir) +{ + NFS_I(dir)->cache_change_attribute = jiffies; +} + /* * A check for whether or not the parent directory has changed. * In the case it has, we assume that the dentries are untrustworthy @@ -827,6 +841,10 @@ static int nfs_dentry_delete(struct dentry *dentry) dentry->d_parent->d_name.name, dentry->d_name.name, dentry->d_flags); + /* Unhash any dentry with a stale inode */ + if (dentry->d_inode != NULL && NFS_STALE(dentry->d_inode)) + return 1; + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { /* Unhash it, so that ->d_iput() would be called */ return 1; @@ -846,7 +864,6 @@ static int nfs_dentry_delete(struct dentry *dentry) */ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) { - nfs_inode_return_delegation(inode); if (S_ISDIR(inode->i_mode)) /* drop any readdir cache as it could easily be old */ NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; @@ -1268,6 +1285,12 @@ out_err: return error; } +static void nfs_dentry_handle_enoent(struct dentry *dentry) +{ + if (dentry->d_inode != NULL && !d_unhashed(dentry)) + d_delete(dentry); +} + static int nfs_rmdir(struct inode *dir, struct dentry *dentry) { int error; @@ -1280,6 +1303,8 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) /* Ensure the VFS deletes this inode */ if (error == 0 && dentry->d_inode != NULL) clear_nlink(dentry->d_inode); + else if (error == -ENOENT) + nfs_dentry_handle_enoent(dentry); unlock_kernel(); return error; @@ -1386,6 +1411,8 @@ static int nfs_safe_remove(struct dentry *dentry) nfs_mark_for_revalidate(inode); } else error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); + if (error == -ENOENT) + nfs_dentry_handle_enoent(dentry); out: return error; } @@ -1422,7 +1449,7 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); error = nfs_safe_remove(dentry); - if (!error) { + if (!error || error == -ENOENT) { nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); } else if (need_rehash) d_rehash(dentry); @@ -1635,7 +1662,8 @@ out: d_move(old_dentry, new_dentry); nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir)); - } + } else if (error == -ENOENT) + nfs_dentry_handle_enoent(old_dentry); /* new dentry created? */ if (dentry) @@ -1666,13 +1694,19 @@ int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) restart: spin_lock(&nfs_access_lru_lock); list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) { + struct rw_semaphore *s_umount; struct inode *inode; if (nr_to_scan-- == 0) break; + s_umount = &nfsi->vfs_inode.i_sb->s_umount; + if (!down_read_trylock(s_umount)) + continue; inode = igrab(&nfsi->vfs_inode); - if (inode == NULL) + if (inode == NULL) { + up_read(s_umount); continue; + } spin_lock(&inode->i_lock); if (list_empty(&nfsi->access_cache_entry_lru)) goto remove_lru_entry; @@ -1691,6 +1725,7 @@ remove_lru_entry: spin_unlock(&inode->i_lock); spin_unlock(&nfs_access_lru_lock); iput(inode); + up_read(s_umount); goto restart; } spin_unlock(&nfs_access_lru_lock); @@ -1731,7 +1766,7 @@ static void __nfs_access_zap_cache(struct inode *inode) void nfs_access_zap_cache(struct inode *inode) { /* Remove from global LRU init */ - if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) { + if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) { spin_lock(&nfs_access_lru_lock); list_del_init(&NFS_I(inode)->access_cache_inode_lru); spin_unlock(&nfs_access_lru_lock); @@ -1845,7 +1880,7 @@ static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *s smp_mb__after_atomic_inc(); /* Add inode to global LRU list */ - if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) { + if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) { spin_lock(&nfs_access_lru_lock); list_add_tail(&NFS_I(inode)->access_cache_inode_lru, &nfs_access_lru_list); spin_unlock(&nfs_access_lru_lock); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 3c9d16b4f80c..f8e165c7d5a6 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -188,12 +188,17 @@ static void nfs_direct_req_release(struct nfs_direct_req *dreq) static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq) { ssize_t result = -EIOCBQUEUED; + struct rpc_clnt *clnt; + sigset_t oldset; /* Async requests don't wait here */ if (dreq->iocb) goto out; + clnt = NFS_CLIENT(dreq->inode); + rpc_clnt_sigmask(clnt, &oldset); result = wait_for_completion_interruptible(&dreq->completion); + rpc_clnt_sigunmask(clnt, &oldset); if (!result) result = dreq->error; @@ -272,6 +277,16 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, unsigned long user_addr = (unsigned long)iov->iov_base; size_t count = iov->iov_len; size_t rsize = NFS_SERVER(inode)->rsize; + struct rpc_task *task; + struct rpc_message msg = { + .rpc_cred = ctx->cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = NFS_CLIENT(inode), + .rpc_message = &msg, + .callback_ops = &nfs_read_direct_ops, + .flags = RPC_TASK_ASYNC, + }; unsigned int pgbase; int result; ssize_t started = 0; @@ -311,7 +326,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, data->req = (struct nfs_page *) dreq; data->inode = inode; - data->cred = ctx->cred; + data->cred = msg.rpc_cred; data->args.fh = NFS_FH(inode); data->args.context = ctx; data->args.offset = pos; @@ -321,14 +336,16 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, data->res.fattr = &data->fattr; data->res.eof = 0; data->res.count = bytes; + msg.rpc_argp = &data->args; + msg.rpc_resp = &data->res; - rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, - &nfs_read_direct_ops, data); - NFS_PROTO(inode)->read_setup(data); - - data->task.tk_cookie = (unsigned long) inode; + task_setup_data.task = &data->task; + task_setup_data.callback_data = data; + NFS_PROTO(inode)->read_setup(data, &msg); - rpc_execute(&data->task); + task = rpc_run_task(&task_setup_data); + if (!IS_ERR(task)) + rpc_put_task(task); dprintk("NFS: %5u initiated direct read call " "(req %s/%Ld, %zu bytes @ offset %Lu)\n", @@ -391,9 +408,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { ssize_t result = 0; - sigset_t oldset; struct inode *inode = iocb->ki_filp->f_mapping->host; - struct rpc_clnt *clnt = NFS_CLIENT(inode); struct nfs_direct_req *dreq; dreq = nfs_direct_req_alloc(); @@ -405,11 +420,9 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - rpc_clnt_sigmask(clnt, &oldset); result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos); if (!result) result = nfs_direct_wait(dreq); - rpc_clnt_sigunmask(clnt, &oldset); nfs_direct_req_release(dreq); return result; @@ -431,6 +444,15 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) struct inode *inode = dreq->inode; struct list_head *p; struct nfs_write_data *data; + struct rpc_task *task; + struct rpc_message msg = { + .rpc_cred = dreq->ctx->cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = NFS_CLIENT(inode), + .callback_ops = &nfs_write_direct_ops, + .flags = RPC_TASK_ASYNC, + }; dreq->count = 0; get_dreq(dreq); @@ -440,6 +462,9 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) get_dreq(dreq); + /* Use stable writes */ + data->args.stable = NFS_FILE_SYNC; + /* * Reset data->res. */ @@ -451,17 +476,18 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) * Reuse data->task; data->args should not have changed * since the original request was sent. */ - rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, - &nfs_write_direct_ops, data); - NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE); - - data->task.tk_priority = RPC_PRIORITY_NORMAL; - data->task.tk_cookie = (unsigned long) inode; + task_setup_data.task = &data->task; + task_setup_data.callback_data = data; + msg.rpc_argp = &data->args; + msg.rpc_resp = &data->res; + NFS_PROTO(inode)->write_setup(data, &msg); /* * We're called via an RPC callback, so BKL is already held. */ - rpc_execute(&data->task); + task = rpc_run_task(&task_setup_data); + if (!IS_ERR(task)) + rpc_put_task(task); dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n", data->task.tk_pid, @@ -504,9 +530,23 @@ static const struct rpc_call_ops nfs_commit_direct_ops = { static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) { struct nfs_write_data *data = dreq->commit_data; + struct rpc_task *task; + struct rpc_message msg = { + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = dreq->ctx->cred, + }; + struct rpc_task_setup task_setup_data = { + .task = &data->task, + .rpc_client = NFS_CLIENT(dreq->inode), + .rpc_message = &msg, + .callback_ops = &nfs_commit_direct_ops, + .callback_data = data, + .flags = RPC_TASK_ASYNC, + }; data->inode = dreq->inode; - data->cred = dreq->ctx->cred; + data->cred = msg.rpc_cred; data->args.fh = NFS_FH(data->inode); data->args.offset = 0; @@ -515,18 +555,16 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) data->res.fattr = &data->fattr; data->res.verf = &data->verf; - rpc_init_task(&data->task, NFS_CLIENT(dreq->inode), RPC_TASK_ASYNC, - &nfs_commit_direct_ops, data); - NFS_PROTO(data->inode)->commit_setup(data, 0); + NFS_PROTO(data->inode)->commit_setup(data, &msg); - data->task.tk_priority = RPC_PRIORITY_NORMAL; - data->task.tk_cookie = (unsigned long)data->inode; /* Note: task.tk_ops->rpc_release will free dreq->commit_data */ dreq->commit_data = NULL; dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); - rpc_execute(&data->task); + task = rpc_run_task(&task_setup_data); + if (!IS_ERR(task)) + rpc_put_task(task); } static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) @@ -641,6 +679,16 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, struct inode *inode = ctx->path.dentry->d_inode; unsigned long user_addr = (unsigned long)iov->iov_base; size_t count = iov->iov_len; + struct rpc_task *task; + struct rpc_message msg = { + .rpc_cred = ctx->cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = NFS_CLIENT(inode), + .rpc_message = &msg, + .callback_ops = &nfs_write_direct_ops, + .flags = RPC_TASK_ASYNC, + }; size_t wsize = NFS_SERVER(inode)->wsize; unsigned int pgbase; int result; @@ -683,25 +731,27 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, data->req = (struct nfs_page *) dreq; data->inode = inode; - data->cred = ctx->cred; + data->cred = msg.rpc_cred; data->args.fh = NFS_FH(inode); data->args.context = ctx; data->args.offset = pos; data->args.pgbase = pgbase; data->args.pages = data->pagevec; data->args.count = bytes; + data->args.stable = sync; data->res.fattr = &data->fattr; data->res.count = bytes; data->res.verf = &data->verf; - rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, - &nfs_write_direct_ops, data); - NFS_PROTO(inode)->write_setup(data, sync); - - data->task.tk_priority = RPC_PRIORITY_NORMAL; - data->task.tk_cookie = (unsigned long) inode; + task_setup_data.task = &data->task; + task_setup_data.callback_data = data; + msg.rpc_argp = &data->args; + msg.rpc_resp = &data->res; + NFS_PROTO(inode)->write_setup(data, &msg); - rpc_execute(&data->task); + task = rpc_run_task(&task_setup_data); + if (!IS_ERR(task)) + rpc_put_task(task); dprintk("NFS: %5u initiated direct write call " "(req %s/%Ld, %zu bytes @ offset %Lu)\n", @@ -767,12 +817,10 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, size_t count) { ssize_t result = 0; - sigset_t oldset; struct inode *inode = iocb->ki_filp->f_mapping->host; - struct rpc_clnt *clnt = NFS_CLIENT(inode); struct nfs_direct_req *dreq; size_t wsize = NFS_SERVER(inode)->wsize; - int sync = 0; + int sync = NFS_UNSTABLE; dreq = nfs_direct_req_alloc(); if (!dreq) @@ -780,18 +828,16 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, nfs_alloc_commit_data(dreq); if (dreq->commit_data == NULL || count < wsize) - sync = FLUSH_STABLE; + sync = NFS_FILE_SYNC; dreq->inode = inode; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - rpc_clnt_sigmask(clnt, &oldset); result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync); if (!result) result = nfs_direct_wait(dreq); - rpc_clnt_sigunmask(clnt, &oldset); nfs_direct_req_release(dreq); return result; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index b3bb89f7d5d2..ef57a5ae5904 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -349,7 +349,9 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, unlock_page(page); page_cache_release(page); - return status < 0 ? status : copied; + if (status < 0) + return status; + return copied; } static void nfs_invalidate_page(struct page *page, unsigned long offset) @@ -392,35 +394,27 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) struct file *filp = vma->vm_file; unsigned pagelen; int ret = -EINVAL; - void *fsdata; struct address_space *mapping; - loff_t offset; lock_page(page); mapping = page->mapping; - if (mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping) { - unlock_page(page); - return -EINVAL; - } + if (mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping) + goto out_unlock; + + ret = 0; pagelen = nfs_page_length(page); - offset = (loff_t)page->index << PAGE_CACHE_SHIFT; - unlock_page(page); + if (pagelen == 0) + goto out_unlock; - /* - * we can use mapping after releasing the page lock, because: - * we hold mmap_sem on the fault path, which should pin the vma - * which should pin the file, which pins the dentry which should - * hold a reference on inode. - */ + ret = nfs_flush_incompatible(filp, page); + if (ret != 0) + goto out_unlock; - if (pagelen) { - struct page *page2 = NULL; - ret = nfs_write_begin(filp, mapping, offset, pagelen, - 0, &page2, &fsdata); - if (!ret) - ret = nfs_write_end(filp, mapping, offset, pagelen, - pagelen, page2, fsdata); - } + ret = nfs_updatepage(filp, page, 0, pagelen); + if (ret == 0) + ret = pagelen; +out_unlock: + unlock_page(page); return ret; } diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index d11eb055265c..8ae5dba2d4e5 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -72,39 +72,39 @@ module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int, &nfs_idmap_cache_timeout, 0644); struct idmap_hashent { - unsigned long ih_expires; - __u32 ih_id; - int ih_namelen; - char ih_name[IDMAP_NAMESZ]; + unsigned long ih_expires; + __u32 ih_id; + size_t ih_namelen; + char ih_name[IDMAP_NAMESZ]; }; struct idmap_hashtable { - __u8 h_type; - struct idmap_hashent h_entries[IDMAP_HASH_SZ]; + __u8 h_type; + struct idmap_hashent h_entries[IDMAP_HASH_SZ]; }; struct idmap { - struct dentry *idmap_dentry; - wait_queue_head_t idmap_wq; - struct idmap_msg idmap_im; - struct mutex idmap_lock; /* Serializes upcalls */ - struct mutex idmap_im_lock; /* Protects the hashtable */ - struct idmap_hashtable idmap_user_hash; - struct idmap_hashtable idmap_group_hash; + struct dentry *idmap_dentry; + wait_queue_head_t idmap_wq; + struct idmap_msg idmap_im; + struct mutex idmap_lock; /* Serializes upcalls */ + struct mutex idmap_im_lock; /* Protects the hashtable */ + struct idmap_hashtable idmap_user_hash; + struct idmap_hashtable idmap_group_hash; }; -static ssize_t idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *, - char __user *, size_t); -static ssize_t idmap_pipe_downcall(struct file *, const char __user *, - size_t); -static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); +static ssize_t idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *, + char __user *, size_t); +static ssize_t idmap_pipe_downcall(struct file *, const char __user *, + size_t); +static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); static unsigned int fnvhash32(const void *, size_t); static struct rpc_pipe_ops idmap_upcall_ops = { - .upcall = idmap_pipe_upcall, - .downcall = idmap_pipe_downcall, - .destroy_msg = idmap_pipe_destroy_msg, + .upcall = idmap_pipe_upcall, + .downcall = idmap_pipe_downcall, + .destroy_msg = idmap_pipe_destroy_msg, }; int @@ -115,19 +115,20 @@ nfs_idmap_new(struct nfs_client *clp) BUG_ON(clp->cl_idmap != NULL); - if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL) - return -ENOMEM; + idmap = kzalloc(sizeof(*idmap), GFP_KERNEL); + if (idmap == NULL) + return -ENOMEM; - idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, "idmap", - idmap, &idmap_upcall_ops, 0); - if (IS_ERR(idmap->idmap_dentry)) { + idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, "idmap", + idmap, &idmap_upcall_ops, 0); + if (IS_ERR(idmap->idmap_dentry)) { error = PTR_ERR(idmap->idmap_dentry); kfree(idmap); return error; } - mutex_init(&idmap->idmap_lock); - mutex_init(&idmap->idmap_im_lock); + mutex_init(&idmap->idmap_lock); + mutex_init(&idmap->idmap_im_lock); init_waitqueue_head(&idmap->idmap_wq); idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER; idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP; @@ -192,7 +193,7 @@ idmap_lookup_id(struct idmap_hashtable *h, __u32 id) * pretty trivial. */ static inline struct idmap_hashent * -idmap_alloc_name(struct idmap_hashtable *h, char *name, unsigned len) +idmap_alloc_name(struct idmap_hashtable *h, char *name, size_t len) { return idmap_name_hash(h, name, len); } @@ -285,7 +286,7 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h, memset(im, 0, sizeof(*im)); mutex_unlock(&idmap->idmap_im_lock); mutex_unlock(&idmap->idmap_lock); - return (ret); + return ret; } /* @@ -354,42 +355,40 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h, /* RPC pipefs upcall/downcall routines */ static ssize_t idmap_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg, - char __user *dst, size_t buflen) + char __user *dst, size_t buflen) { - char *data = (char *)msg->data + msg->copied; - ssize_t mlen = msg->len - msg->copied; - ssize_t left; - - if (mlen > buflen) - mlen = buflen; - - left = copy_to_user(dst, data, mlen); - if (left < 0) { - msg->errno = left; - return left; + char *data = (char *)msg->data + msg->copied; + size_t mlen = min(msg->len, buflen); + unsigned long left; + + left = copy_to_user(dst, data, mlen); + if (left == mlen) { + msg->errno = -EFAULT; + return -EFAULT; } + mlen -= left; msg->copied += mlen; msg->errno = 0; - return mlen; + return mlen; } static ssize_t idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { - struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); + struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); struct idmap *idmap = (struct idmap *)rpci->private; struct idmap_msg im_in, *im = &idmap->idmap_im; struct idmap_hashtable *h; struct idmap_hashent *he = NULL; - int namelen_in; + size_t namelen_in; int ret; - if (mlen != sizeof(im_in)) - return (-ENOSPC); + if (mlen != sizeof(im_in)) + return -ENOSPC; - if (copy_from_user(&im_in, src, mlen) != 0) - return (-EFAULT); + if (copy_from_user(&im_in, src, mlen) != 0) + return -EFAULT; mutex_lock(&idmap->idmap_im_lock); @@ -487,7 +486,7 @@ static unsigned int fnvhash32(const void *buf, size_t buflen) hash ^= (unsigned int)*p; } - return (hash); + return hash; } int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index db5d96dc6107..3f332e54e760 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -192,7 +192,7 @@ void nfs_invalidate_atime(struct inode *inode) */ static void nfs_invalidate_inode(struct inode *inode) { - set_bit(NFS_INO_STALE, &NFS_FLAGS(inode)); + set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); nfs_zap_caches_locked(inode); } @@ -229,7 +229,7 @@ nfs_init_locked(struct inode *inode, void *opaque) struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque; struct nfs_fattr *fattr = desc->fattr; - NFS_FILEID(inode) = fattr->fileid; + set_nfs_fileid(inode, fattr->fileid); nfs_copy_fh(NFS_FH(inode), desc->fh); return 0; } @@ -291,7 +291,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) inode->i_fop = &nfs_dir_operations; if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) && fattr->size <= NFS_LIMIT_READDIRPLUS) - set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); + set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); /* Deal with crossing mountpoints */ if (!nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) { if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) @@ -461,9 +461,18 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; int err; - /* Flush out writes to the server in order to update c/mtime */ - if (S_ISREG(inode->i_mode)) + /* + * Flush out writes to the server in order to update c/mtime. + * + * Hold the i_mutex to suspend application writes temporarily; + * this prevents long-running writing applications from blocking + * nfs_wb_nocommit. + */ + if (S_ISREG(inode->i_mode)) { + mutex_lock(&inode->i_mutex); nfs_wb_nocommit(inode); + mutex_unlock(&inode->i_mutex); + } /* * We may force a getattr if the user cares about atime. @@ -659,7 +668,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) if (status == -ESTALE) { nfs_zap_caches(inode); if (!S_ISDIR(inode->i_mode)) - set_bit(NFS_INO_STALE, &NFS_FLAGS(inode)); + set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); } goto out; } @@ -814,8 +823,9 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (S_ISDIR(inode->i_mode)) nfsi->cache_validity |= NFS_INO_INVALID_DATA; } - if (inode->i_size == fattr->pre_size && nfsi->npages == 0) - inode->i_size = fattr->size; + if (inode->i_size == nfs_size_to_loff_t(fattr->pre_size) && + nfsi->npages == 0) + inode->i_size = nfs_size_to_loff_t(fattr->size); } } @@ -1019,7 +1029,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) dprintk("NFS: mtime change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; - nfsi->cache_change_attribute = now; + if (S_ISDIR(inode->i_mode)) + nfs_force_lookup_revalidate(inode); } /* If ctime has changed we should definitely clear access+acl caches */ if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) @@ -1028,7 +1039,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) dprintk("NFS: change_attr change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; - nfsi->cache_change_attribute = now; + if (S_ISDIR(inode->i_mode)) + nfs_force_lookup_revalidate(inode); } /* Check if our cached file size is stale */ @@ -1133,7 +1145,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) void nfs4_clear_inode(struct inode *inode) { /* If we are holding a delegation, return it! */ - nfs_inode_return_delegation(inode); + nfs_inode_return_delegation_noreclaim(inode); /* First call standard NFS clear_inode() code */ nfs_clear_inode(inode); } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index f3acf48412be..0f5619611b8d 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -21,7 +21,8 @@ struct nfs_clone_mount { struct nfs_fattr *fattr; char *hostname; char *mnt_path; - struct sockaddr_in *addr; + struct sockaddr *addr; + size_t addrlen; rpc_authflavor_t authflavor; }; @@ -41,19 +42,19 @@ struct nfs_parsed_mount_data { char *client_address; struct { - struct sockaddr_in address; + struct sockaddr_storage address; + size_t addrlen; char *hostname; - unsigned int program; unsigned int version; unsigned short port; int protocol; } mount_server; struct { - struct sockaddr_in address; + struct sockaddr_storage address; + size_t addrlen; char *hostname; char *export_path; - unsigned int program; int protocol; } nfs_server; }; @@ -62,7 +63,8 @@ struct nfs_parsed_mount_data { extern struct rpc_program nfs_program; extern void nfs_put_client(struct nfs_client *); -extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int); +extern struct nfs_client *nfs_find_client(const struct sockaddr *, u32); +extern struct nfs_client *nfs_find_client_next(struct nfs_client *); extern struct nfs_server *nfs_create_server( const struct nfs_parsed_mount_data *, struct nfs_fh *); @@ -160,6 +162,8 @@ extern struct rpc_stat nfs_rpcstat; extern int __init register_nfs_fs(void); extern void __exit unregister_nfs_fs(void); +extern void nfs_sb_active(struct nfs_server *server); +extern void nfs_sb_deactive(struct nfs_server *server); /* namespace.c */ extern char *nfs_path(const char *base, diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index acfc56f9edc0..be4ce1c3a3d8 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -188,7 +188,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, { #ifdef CONFIG_NFS_V4 struct vfsmount *mnt = NULL; - switch (server->nfs_client->cl_nfsversion) { + switch (server->nfs_client->rpc_ops->version) { case 2: case 3: mnt = vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata); diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 668ab96c7b59..1f7ea675e0c5 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -262,7 +262,9 @@ static int nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) { struct kvec *iov = req->rq_rcv_buf.head; - int status, count, recvd, hdrlen; + size_t hdrlen; + u32 count, recvd; + int status; if ((status = ntohl(*p++))) return -nfs_stat_to_errno(status); @@ -273,7 +275,7 @@ nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len < hdrlen) { dprintk("NFS: READ reply header overflowed:" - "length %d > %Zu\n", hdrlen, iov->iov_len); + "length %Zu > %Zu\n", hdrlen, iov->iov_len); return -errno_NFSERR_IO; } else if (iov->iov_len != hdrlen) { dprintk("NFS: READ header is short. iovec will be shifted.\n"); @@ -283,11 +285,11 @@ nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) recvd = req->rq_rcv_buf.len - hdrlen; if (count > recvd) { dprintk("NFS: server cheating in read reply: " - "count %d > recvd %d\n", count, recvd); + "count %u > recvd %u\n", count, recvd); count = recvd; } - dprintk("RPC: readres OK count %d\n", count); + dprintk("RPC: readres OK count %u\n", count); if (count < res->count) res->count = count; @@ -423,9 +425,10 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) struct xdr_buf *rcvbuf = &req->rq_rcv_buf; struct kvec *iov = rcvbuf->head; struct page **page; - int hdrlen, recvd; + size_t hdrlen; + unsigned int pglen, recvd; + u32 len; int status, nr; - unsigned int len, pglen; __be32 *end, *entry, *kaddr; if ((status = ntohl(*p++))) @@ -434,7 +437,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len < hdrlen) { dprintk("NFS: READDIR reply header overflowed:" - "length %d > %Zu\n", hdrlen, iov->iov_len); + "length %Zu > %Zu\n", hdrlen, iov->iov_len); return -errno_NFSERR_IO; } else if (iov->iov_len != hdrlen) { dprintk("NFS: READDIR header is short. iovec will be shifted.\n"); @@ -576,7 +579,8 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy) { struct xdr_buf *rcvbuf = &req->rq_rcv_buf; struct kvec *iov = rcvbuf->head; - int hdrlen, len, recvd; + size_t hdrlen; + u32 len, recvd; char *kaddr; int status; @@ -584,14 +588,14 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy) return -nfs_stat_to_errno(status); /* Convert length of symlink */ len = ntohl(*p++); - if (len >= rcvbuf->page_len || len <= 0) { + if (len >= rcvbuf->page_len) { dprintk("nfs: server returned giant symlink!\n"); return -ENAMETOOLONG; } hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len < hdrlen) { dprintk("NFS: READLINK reply header overflowed:" - "length %d > %Zu\n", hdrlen, iov->iov_len); + "length %Zu > %Zu\n", hdrlen, iov->iov_len); return -errno_NFSERR_IO; } else if (iov->iov_len != hdrlen) { dprintk("NFS: READLINK header is short. iovec will be shifted.\n"); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 4cdc2361a669..b353c1a05bfd 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -732,16 +732,9 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) return 0; } -static void nfs3_proc_read_setup(struct nfs_read_data *data) +static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) { - struct rpc_message msg = { - .rpc_proc = &nfs3_procedures[NFS3PROC_READ], - .rpc_argp = &data->args, - .rpc_resp = &data->res, - .rpc_cred = data->cred, - }; - - rpc_call_setup(&data->task, &msg, 0); + msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; } static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) @@ -753,24 +746,9 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) return 0; } -static void nfs3_proc_write_setup(struct nfs_write_data *data, int how) +static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) { - struct rpc_message msg = { - .rpc_proc = &nfs3_procedures[NFS3PROC_WRITE], - .rpc_argp = &data->args, - .rpc_resp = &data->res, - .rpc_cred = data->cred, - }; - - data->args.stable = NFS_UNSTABLE; - if (how & FLUSH_STABLE) { - data->args.stable = NFS_FILE_SYNC; - if (NFS_I(data->inode)->ncommit) - data->args.stable = NFS_DATA_SYNC; - } - - /* Finalize the task. */ - rpc_call_setup(&data->task, &msg, 0); + msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; } static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) @@ -781,22 +759,17 @@ static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) return 0; } -static void nfs3_proc_commit_setup(struct nfs_write_data *data, int how) +static void nfs3_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) { - struct rpc_message msg = { - .rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT], - .rpc_argp = &data->args, - .rpc_resp = &data->res, - .rpc_cred = data->cred, - }; - - rpc_call_setup(&data->task, &msg, 0); + msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT]; } static int nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) { - return nlmclnt_proc(filp->f_path.dentry->d_inode, cmd, fl); + struct inode *inode = filp->f_path.dentry->d_inode; + + return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl); } const struct nfs_rpc_ops nfs_v3_clientops = { diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 616d3267b7e7..3917e2fa4e40 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -506,9 +506,9 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res struct xdr_buf *rcvbuf = &req->rq_rcv_buf; struct kvec *iov = rcvbuf->head; struct page **page; - int hdrlen, recvd; + size_t hdrlen; + u32 len, recvd, pglen; int status, nr; - unsigned int len, pglen; __be32 *entry, *end, *kaddr; status = ntohl(*p++); @@ -527,7 +527,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len < hdrlen) { dprintk("NFS: READDIR reply header overflowed:" - "length %d > %Zu\n", hdrlen, iov->iov_len); + "length %Zu > %Zu\n", hdrlen, iov->iov_len); return -errno_NFSERR_IO; } else if (iov->iov_len != hdrlen) { dprintk("NFS: READDIR header is short. iovec will be shifted.\n"); @@ -549,7 +549,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res len = ntohl(*p++); /* string length */ p += XDR_QUADLEN(len) + 2; /* name + cookie */ if (len > NFS3_MAXNAMLEN) { - dprintk("NFS: giant filename in readdir (len %x)!\n", + dprintk("NFS: giant filename in readdir (len 0x%x)!\n", len); goto err_unmap; } @@ -570,7 +570,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res len = ntohl(*p++); if (len > NFS3_FHSIZE) { dprintk("NFS: giant filehandle in " - "readdir (len %x)!\n", len); + "readdir (len 0x%x)!\n", len); goto err_unmap; } p += XDR_QUADLEN(len); @@ -815,7 +815,8 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) { struct xdr_buf *rcvbuf = &req->rq_rcv_buf; struct kvec *iov = rcvbuf->head; - int hdrlen, len, recvd; + size_t hdrlen; + u32 len, recvd; char *kaddr; int status; @@ -827,7 +828,7 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) /* Convert length of symlink */ len = ntohl(*p++); - if (len >= rcvbuf->page_len || len <= 0) { + if (len >= rcvbuf->page_len) { dprintk("nfs: server returned giant symlink!\n"); return -ENAMETOOLONG; } @@ -835,7 +836,7 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len < hdrlen) { dprintk("NFS: READLINK reply header overflowed:" - "length %d > %Zu\n", hdrlen, iov->iov_len); + "length %Zu > %Zu\n", hdrlen, iov->iov_len); return -errno_NFSERR_IO; } else if (iov->iov_len != hdrlen) { dprintk("NFS: READLINK header is short. " @@ -863,7 +864,9 @@ static int nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) { struct kvec *iov = req->rq_rcv_buf.head; - int status, count, ocount, recvd, hdrlen; + size_t hdrlen; + u32 count, ocount, recvd; + int status; status = ntohl(*p++); p = xdr_decode_post_op_attr(p, res->fattr); @@ -871,7 +874,7 @@ nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) if (status != 0) return -nfs_stat_to_errno(status); - /* Decode reply could and EOF flag. NFSv3 is somewhat redundant + /* Decode reply count and EOF flag. NFSv3 is somewhat redundant * in that it puts the count both in the res struct and in the * opaque data count. */ count = ntohl(*p++); @@ -886,7 +889,7 @@ nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len < hdrlen) { dprintk("NFS: READ reply header overflowed:" - "length %d > %Zu\n", hdrlen, iov->iov_len); + "length %Zu > %Zu\n", hdrlen, iov->iov_len); return -errno_NFSERR_IO; } else if (iov->iov_len != hdrlen) { dprintk("NFS: READ header is short. iovec will be shifted.\n"); @@ -896,7 +899,7 @@ nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) recvd = req->rq_rcv_buf.len - hdrlen; if (count > recvd) { dprintk("NFS: server cheating in read reply: " - "count %d > recvd %d\n", count, recvd); + "count %u > recvd %u\n", count, recvd); count = recvd; res->eof = 0; } diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index dd5fef20c702..5f9ba41ed5bf 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -114,10 +114,7 @@ static inline int valid_ipaddr4(const char *buf) * nfs_follow_referral - set up mountpoint when hitting a referral on moved error * @mnt_parent - mountpoint of parent directory * @dentry - parent directory - * @fspath - fs path returned in fs_locations - * @mntpath - mount path to new server - * @hostname - hostname of new server - * @addr - host addr of new server + * @locations - array of NFSv4 server location information * */ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, @@ -131,7 +128,8 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, }; char *page = NULL, *page2 = NULL; - int loc, s, error; + unsigned int s; + int loc, error; if (locations == NULL || locations->nlocations <= 0) goto out; @@ -174,7 +172,10 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, s = 0; while (s < location->nservers) { - struct sockaddr_in addr = {}; + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_port = htons(NFS_PORT), + }; if (location->servers[s].len <= 0 || valid_ipaddr4(location->servers[s].data) < 0) { @@ -183,10 +184,9 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, } mountdata.hostname = location->servers[s].data; - addr.sin_addr.s_addr = in_aton(mountdata.hostname); - addr.sin_family = AF_INET; - addr.sin_port = htons(NFS_PORT); - mountdata.addr = &addr; + addr.sin_addr.s_addr = in_aton(mountdata.hostname), + mountdata.addr = (struct sockaddr *)&addr; + mountdata.addrlen = sizeof(addr); snprintf(page, PAGE_SIZE, "%s:%s", mountdata.hostname, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9e2e1c7291db..5c189bd57eb2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -210,7 +210,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) spin_lock(&dir->i_lock); nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; if (!cinfo->atomic || cinfo->before != nfsi->change_attr) - nfsi->cache_change_attribute = jiffies; + nfs_force_lookup_revalidate(dir); nfsi->change_attr = cinfo->after; spin_unlock(&dir->i_lock); } @@ -718,19 +718,6 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state return err; } -static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata) -{ - struct nfs4_opendata *data = calldata; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM], - .rpc_argp = &data->c_arg, - .rpc_resp = &data->c_res, - .rpc_cred = data->owner->so_cred, - }; - data->timestamp = jiffies; - rpc_call_setup(task, &msg, 0); -} - static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) { struct nfs4_opendata *data = calldata; @@ -767,7 +754,6 @@ out_free: } static const struct rpc_call_ops nfs4_open_confirm_ops = { - .rpc_call_prepare = nfs4_open_confirm_prepare, .rpc_call_done = nfs4_open_confirm_done, .rpc_release = nfs4_open_confirm_release, }; @@ -779,12 +765,26 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data) { struct nfs_server *server = NFS_SERVER(data->dir->d_inode); struct rpc_task *task; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM], + .rpc_argp = &data->c_arg, + .rpc_resp = &data->c_res, + .rpc_cred = data->owner->so_cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = server->client, + .rpc_message = &msg, + .callback_ops = &nfs4_open_confirm_ops, + .callback_data = data, + .flags = RPC_TASK_ASYNC, + }; int status; kref_get(&data->kref); data->rpc_done = 0; data->rpc_status = 0; - task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data); + data->timestamp = jiffies; + task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); status = nfs4_wait_for_completion_rpc_task(task); @@ -801,13 +801,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) { struct nfs4_opendata *data = calldata; struct nfs4_state_owner *sp = data->owner; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN], - .rpc_argp = &data->o_arg, - .rpc_resp = &data->o_res, - .rpc_cred = sp->so_cred, - }; - + if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0) return; /* @@ -832,11 +826,11 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) data->o_arg.id = sp->so_owner_id.id; data->o_arg.clientid = sp->so_client->cl_clientid; if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { - msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; + task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; nfs_copy_fh(&data->o_res.fh, data->o_arg.fh); } data->timestamp = jiffies; - rpc_call_setup(task, &msg, 0); + rpc_call_start(task); return; out_no_action: task->tk_action = NULL; @@ -908,13 +902,26 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) struct nfs_openargs *o_arg = &data->o_arg; struct nfs_openres *o_res = &data->o_res; struct rpc_task *task; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN], + .rpc_argp = o_arg, + .rpc_resp = o_res, + .rpc_cred = data->owner->so_cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = server->client, + .rpc_message = &msg, + .callback_ops = &nfs4_open_ops, + .callback_data = data, + .flags = RPC_TASK_ASYNC, + }; int status; kref_get(&data->kref); data->rpc_done = 0; data->rpc_status = 0; data->cancelled = 0; - task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data); + task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); status = nfs4_wait_for_completion_rpc_task(task); @@ -1244,12 +1251,6 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) { struct nfs4_closedata *calldata = data; struct nfs4_state *state = calldata->state; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE], - .rpc_argp = &calldata->arg, - .rpc_resp = &calldata->res, - .rpc_cred = state->owner->so_cred, - }; int clear_rd, clear_wr, clear_rdwr; if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) @@ -1276,14 +1277,14 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) } nfs_fattr_init(calldata->res.fattr); if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0) { - msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; + task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; calldata->arg.open_flags = FMODE_READ; } else if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0) { - msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; + task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; calldata->arg.open_flags = FMODE_WRITE; } calldata->timestamp = jiffies; - rpc_call_setup(task, &msg, 0); + rpc_call_start(task); } static const struct rpc_call_ops nfs4_close_ops = { @@ -1309,6 +1310,16 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) struct nfs4_closedata *calldata; struct nfs4_state_owner *sp = state->owner; struct rpc_task *task; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE], + .rpc_cred = state->owner->so_cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = server->client, + .rpc_message = &msg, + .callback_ops = &nfs4_close_ops, + .flags = RPC_TASK_ASYNC, + }; int status = -ENOMEM; calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); @@ -1328,7 +1339,10 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) calldata->path.mnt = mntget(path->mnt); calldata->path.dentry = dget(path->dentry); - task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata); + msg.rpc_argp = &calldata->arg, + msg.rpc_resp = &calldata->res, + task_setup_data.callback_data = calldata; + task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); status = 0; @@ -2414,18 +2428,10 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) return 0; } -static void nfs4_proc_read_setup(struct nfs_read_data *data) +static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) { - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ], - .rpc_argp = &data->args, - .rpc_resp = &data->res, - .rpc_cred = data->cred, - }; - data->timestamp = jiffies; - - rpc_call_setup(&data->task, &msg, 0); + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; } static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) @@ -2443,33 +2449,15 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) return 0; } -static void nfs4_proc_write_setup(struct nfs_write_data *data, int how) +static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) { - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE], - .rpc_argp = &data->args, - .rpc_resp = &data->res, - .rpc_cred = data->cred, - }; - struct inode *inode = data->inode; - struct nfs_server *server = NFS_SERVER(inode); - int stable; - - if (how & FLUSH_STABLE) { - if (!NFS_I(inode)->ncommit) - stable = NFS_FILE_SYNC; - else - stable = NFS_DATA_SYNC; - } else - stable = NFS_UNSTABLE; - data->args.stable = stable; + struct nfs_server *server = NFS_SERVER(data->inode); + data->args.bitmask = server->attr_bitmask; data->res.server = server; - data->timestamp = jiffies; - /* Finalize the task. */ - rpc_call_setup(&data->task, &msg, 0); + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; } static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) @@ -2484,20 +2472,13 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) return 0; } -static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how) +static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) { - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT], - .rpc_argp = &data->args, - .rpc_resp = &data->res, - .rpc_cred = data->cred, - }; struct nfs_server *server = NFS_SERVER(data->inode); data->args.bitmask = server->attr_bitmask; data->res.server = server; - - rpc_call_setup(&data->task, &msg, 0); + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; } /* @@ -2910,14 +2891,20 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short po for(;;) { setclientid.sc_name_len = scnprintf(setclientid.sc_name, - sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u", - clp->cl_ipaddr, NIPQUAD(clp->cl_addr.sin_addr), + sizeof(setclientid.sc_name), "%s/%s %s %s %u", + clp->cl_ipaddr, + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR), + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_PROTO), cred->cr_ops->cr_name, clp->cl_id_uniquifier); setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, - sizeof(setclientid.sc_netid), "tcp"); + sizeof(setclientid.sc_netid), + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_NETID)); setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, - sizeof(setclientid.sc_uaddr), "%s.%d.%d", + sizeof(setclientid.sc_uaddr), "%s.%u.%u", clp->cl_ipaddr, port >> 8, port & 255); status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); @@ -2981,25 +2968,11 @@ struct nfs4_delegreturndata { struct nfs4_delegreturnres res; struct nfs_fh fh; nfs4_stateid stateid; - struct rpc_cred *cred; unsigned long timestamp; struct nfs_fattr fattr; int rpc_status; }; -static void nfs4_delegreturn_prepare(struct rpc_task *task, void *calldata) -{ - struct nfs4_delegreturndata *data = calldata; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN], - .rpc_argp = &data->args, - .rpc_resp = &data->res, - .rpc_cred = data->cred, - }; - nfs_fattr_init(data->res.fattr); - rpc_call_setup(task, &msg, 0); -} - static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) { struct nfs4_delegreturndata *data = calldata; @@ -3010,24 +2983,30 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) static void nfs4_delegreturn_release(void *calldata) { - struct nfs4_delegreturndata *data = calldata; - - put_rpccred(data->cred); kfree(calldata); } static const struct rpc_call_ops nfs4_delegreturn_ops = { - .rpc_call_prepare = nfs4_delegreturn_prepare, .rpc_call_done = nfs4_delegreturn_done, .rpc_release = nfs4_delegreturn_release, }; -static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid) +static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync) { struct nfs4_delegreturndata *data; struct nfs_server *server = NFS_SERVER(inode); struct rpc_task *task; - int status; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN], + .rpc_cred = cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = server->client, + .rpc_message = &msg, + .callback_ops = &nfs4_delegreturn_ops, + .flags = RPC_TASK_ASYNC, + }; + int status = 0; data = kmalloc(sizeof(*data), GFP_KERNEL); if (data == NULL) @@ -3039,30 +3018,37 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co memcpy(&data->stateid, stateid, sizeof(data->stateid)); data->res.fattr = &data->fattr; data->res.server = server; - data->cred = get_rpccred(cred); + nfs_fattr_init(data->res.fattr); data->timestamp = jiffies; data->rpc_status = 0; - task = rpc_run_task(NFS_CLIENT(inode), RPC_TASK_ASYNC, &nfs4_delegreturn_ops, data); + task_setup_data.callback_data = data; + msg.rpc_argp = &data->args, + msg.rpc_resp = &data->res, + task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); + if (!issync) + goto out; status = nfs4_wait_for_completion_rpc_task(task); - if (status == 0) { - status = data->rpc_status; - if (status == 0) - nfs_refresh_inode(inode, &data->fattr); - } + if (status != 0) + goto out; + status = data->rpc_status; + if (status != 0) + goto out; + nfs_refresh_inode(inode, &data->fattr); +out: rpc_put_task(task); return status; } -int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid) +int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync) { struct nfs_server *server = NFS_SERVER(inode); struct nfs4_exception exception = { }; int err; do { - err = _nfs4_proc_delegreturn(inode, cred, stateid); + err = _nfs4_proc_delegreturn(inode, cred, stateid, issync); switch (err) { case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: @@ -3230,12 +3216,6 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) static void nfs4_locku_prepare(struct rpc_task *task, void *data) { struct nfs4_unlockdata *calldata = data; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU], - .rpc_argp = &calldata->arg, - .rpc_resp = &calldata->res, - .rpc_cred = calldata->lsp->ls_state->owner->so_cred, - }; if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) return; @@ -3245,7 +3225,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) return; } calldata->timestamp = jiffies; - rpc_call_setup(task, &msg, 0); + rpc_call_start(task); } static const struct rpc_call_ops nfs4_locku_ops = { @@ -3260,6 +3240,16 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, struct nfs_seqid *seqid) { struct nfs4_unlockdata *data; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU], + .rpc_cred = ctx->cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = NFS_CLIENT(lsp->ls_state->inode), + .rpc_message = &msg, + .callback_ops = &nfs4_locku_ops, + .flags = RPC_TASK_ASYNC, + }; /* Ensure this is an unlock - when canceling a lock, the * canceled lock is passed in, and it won't be an unlock. @@ -3272,7 +3262,10 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, return ERR_PTR(-ENOMEM); } - return rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data); + msg.rpc_argp = &data->arg, + msg.rpc_resp = &data->res, + task_setup_data.callback_data = data; + return rpc_run_task(&task_setup_data); } static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) @@ -3331,15 +3324,12 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, p->arg.fh = NFS_FH(inode); p->arg.fl = &p->fl; - if (!(lsp->ls_seqid.flags & NFS_SEQID_CONFIRMED)) { - p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid); - if (p->arg.open_seqid == NULL) - goto out_free; - - } + p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid); + if (p->arg.open_seqid == NULL) + goto out_free; p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid); if (p->arg.lock_seqid == NULL) - goto out_free; + goto out_free_seqid; p->arg.lock_stateid = &lsp->ls_stateid; p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; p->arg.lock_owner.id = lsp->ls_id.id; @@ -3348,9 +3338,9 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, p->ctx = get_nfs_open_context(ctx); memcpy(&p->fl, fl, sizeof(p->fl)); return p; +out_free_seqid: + nfs_free_seqid(p->arg.open_seqid); out_free: - if (p->arg.open_seqid != NULL) - nfs_free_seqid(p->arg.open_seqid); kfree(p); return NULL; } @@ -3359,31 +3349,20 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) { struct nfs4_lockdata *data = calldata; struct nfs4_state *state = data->lsp->ls_state; - struct nfs4_state_owner *sp = state->owner; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK], - .rpc_argp = &data->arg, - .rpc_resp = &data->res, - .rpc_cred = sp->so_cred, - }; dprintk("%s: begin!\n", __FUNCTION__); + if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0) + return; /* Do we need to do an open_to_lock_owner? */ if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) { if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) return; data->arg.open_stateid = &state->stateid; data->arg.new_lock_owner = 1; - /* Retest in case we raced... */ - if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) - goto do_rpc; - } - if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0) - return; - data->arg.new_lock_owner = 0; -do_rpc: + } else + data->arg.new_lock_owner = 0; data->timestamp = jiffies; - rpc_call_setup(task, &msg, 0); + rpc_call_start(task); dprintk("%s: done!, ret = %d\n", __FUNCTION__, data->rpc_status); } @@ -3419,6 +3398,7 @@ static void nfs4_lock_release(void *calldata) struct nfs4_lockdata *data = calldata; dprintk("%s: begin!\n", __FUNCTION__); + nfs_free_seqid(data->arg.open_seqid); if (data->cancelled != 0) { struct rpc_task *task; task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp, @@ -3428,8 +3408,6 @@ static void nfs4_lock_release(void *calldata) dprintk("%s: cancelling lock!\n", __FUNCTION__); } else nfs_free_seqid(data->arg.lock_seqid); - if (data->arg.open_seqid != NULL) - nfs_free_seqid(data->arg.open_seqid); nfs4_put_lock_state(data->lsp); put_nfs_open_context(data->ctx); kfree(data); @@ -3446,6 +3424,16 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f { struct nfs4_lockdata *data; struct rpc_task *task; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK], + .rpc_cred = state->owner->so_cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = NFS_CLIENT(state->inode), + .rpc_message = &msg, + .callback_ops = &nfs4_lock_ops, + .flags = RPC_TASK_ASYNC, + }; int ret; dprintk("%s: begin!\n", __FUNCTION__); @@ -3457,8 +3445,10 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f data->arg.block = 1; if (reclaim != 0) data->arg.reclaim = 1; - task = rpc_run_task(NFS_CLIENT(state->inode), RPC_TASK_ASYNC, - &nfs4_lock_ops, data); + msg.rpc_argp = &data->arg, + msg.rpc_resp = &data->res, + task_setup_data.callback_data = data; + task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); ret = nfs4_wait_for_completion_rpc_task(task); @@ -3631,10 +3621,6 @@ int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf, if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) return -EOPNOTSUPP; - if (!S_ISREG(inode->i_mode) && - (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) - return -EPERM; - return nfs4_proc_set_acl(inode, buf, buflen); } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 5a39c6f78acf..f9c7432471dc 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -644,27 +644,26 @@ void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t f struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter) { - struct rpc_sequence *sequence = counter->sequence; struct nfs_seqid *new; new = kmalloc(sizeof(*new), GFP_KERNEL); if (new != NULL) { new->sequence = counter; - spin_lock(&sequence->lock); - list_add_tail(&new->list, &sequence->list); - spin_unlock(&sequence->lock); + INIT_LIST_HEAD(&new->list); } return new; } void nfs_free_seqid(struct nfs_seqid *seqid) { - struct rpc_sequence *sequence = seqid->sequence->sequence; + if (!list_empty(&seqid->list)) { + struct rpc_sequence *sequence = seqid->sequence->sequence; - spin_lock(&sequence->lock); - list_del(&seqid->list); - spin_unlock(&sequence->lock); - rpc_wake_up(&sequence->wait); + spin_lock(&sequence->lock); + list_del(&seqid->list); + spin_unlock(&sequence->lock); + rpc_wake_up(&sequence->wait); + } kfree(seqid); } @@ -675,6 +674,7 @@ void nfs_free_seqid(struct nfs_seqid *seqid) */ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) { + BUG_ON(list_first_entry(&seqid->sequence->sequence->list, struct nfs_seqid, list) != seqid); switch (status) { case 0: break; @@ -726,15 +726,15 @@ int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) struct rpc_sequence *sequence = seqid->sequence->sequence; int status = 0; - if (sequence->list.next == &seqid->list) - goto out; spin_lock(&sequence->lock); - if (sequence->list.next != &seqid->list) { - rpc_sleep_on(&sequence->wait, task, NULL, NULL); - status = -EAGAIN; - } + if (list_empty(&seqid->list)) + list_add_tail(&seqid->list, &sequence->list); + if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid) + goto unlock; + rpc_sleep_on(&sequence->wait, task, NULL, NULL); + status = -EAGAIN; +unlock: spin_unlock(&sequence->lock); -out: return status; } @@ -758,8 +758,9 @@ static void nfs4_recover_state(struct nfs_client *clp) __module_get(THIS_MODULE); atomic_inc(&clp->cl_count); - task = kthread_run(reclaimer, clp, "%u.%u.%u.%u-reclaim", - NIPQUAD(clp->cl_addr.sin_addr)); + task = kthread_run(reclaimer, clp, "%s-reclaim", + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR)); if (!IS_ERR(task)) return; nfs4_clear_recover_bit(clp); @@ -970,8 +971,8 @@ out: module_put_and_exit(0); return 0; out_error: - printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n", - NIPQUAD(clp->cl_addr.sin_addr), -status); + printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %s" + " with error %d\n", clp->cl_hostname, -status); set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); goto out; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 51dd3804866f..db1ed9c46ede 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -116,10 +116,12 @@ static int nfs4_stat_to_errno(int); #define decode_renew_maxsz (op_decode_hdr_maxsz) #define encode_setclientid_maxsz \ (op_encode_hdr_maxsz + \ - 4 /*server->ip_addr*/ + \ - 1 /*Netid*/ + \ - 6 /*uaddr*/ + \ - 6 + (NFS4_VERIFIER_SIZE >> 2)) + XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \ + XDR_QUADLEN(NFS4_SETCLIENTID_NAMELEN) + \ + 1 /* sc_prog */ + \ + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \ + XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \ + 1) /* sc_cb_ident */ #define decode_setclientid_maxsz \ (op_decode_hdr_maxsz + \ 2 + \ @@ -2515,14 +2517,12 @@ static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uin static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) { - int n; + u32 n; __be32 *p; int status = 0; READ_BUF(4); READ32(n); - if (n < 0) - goto out_eio; if (n == 0) goto root_path; dprintk("path "); @@ -2579,13 +2579,11 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st goto out_eio; res->nlocations = 0; while (res->nlocations < n) { - int m; + u32 m; struct nfs4_fs_location *loc = &res->locations[res->nlocations]; READ_BUF(4); READ32(m); - if (m <= 0) - goto out_eio; loc->nservers = 0; dprintk("%s: servers ", __FUNCTION__); @@ -2598,8 +2596,12 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st if (loc->nservers < NFS4_FS_LOCATION_MAXSERVERS) loc->nservers++; else { - int i; - dprintk("%s: using first %d of %d servers returned for location %d\n", __FUNCTION__, NFS4_FS_LOCATION_MAXSERVERS, m, res->nlocations); + unsigned int i; + dprintk("%s: using first %u of %u servers " + "returned for location %u\n", + __FUNCTION__, + NFS4_FS_LOCATION_MAXSERVERS, + m, res->nlocations); for (i = loc->nservers; i < m; i++) { unsigned int len; char *data; @@ -3476,10 +3478,11 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n struct xdr_buf *rcvbuf = &req->rq_rcv_buf; struct page *page = *rcvbuf->pages; struct kvec *iov = rcvbuf->head; - unsigned int nr, pglen = rcvbuf->page_len; + size_t hdrlen; + u32 recvd, pglen = rcvbuf->page_len; __be32 *end, *entry, *p, *kaddr; - uint32_t len, attrlen, xlen; - int hdrlen, recvd, status; + unsigned int nr; + int status; status = decode_op_hdr(xdr, OP_READDIR); if (status) @@ -3503,6 +3506,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n end = p + ((pglen + readdir->pgbase) >> 2); entry = p; for (nr = 0; *p++; nr++) { + u32 len, attrlen, xlen; if (end - p < 3) goto short_pkt; dprintk("cookie = %Lu, ", *((unsigned long long *)p)); @@ -3551,7 +3555,8 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) { struct xdr_buf *rcvbuf = &req->rq_rcv_buf; struct kvec *iov = rcvbuf->head; - int hdrlen, len, recvd; + size_t hdrlen; + u32 len, recvd; __be32 *p; char *kaddr; int status; @@ -3646,7 +3651,8 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U))) return -EIO; if (likely(bitmap[0] & FATTR4_WORD0_ACL)) { - int hdrlen, recvd; + size_t hdrlen; + u32 recvd; /* We ignore &savep and don't do consistency checks on * the attr length. Let userspace figure it out.... */ diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 345bb9b4765b..3b3dbb94393d 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -111,13 +111,14 @@ void nfs_unlock_request(struct nfs_page *req) * nfs_set_page_tag_locked - Tag a request as locked * @req: */ -static int nfs_set_page_tag_locked(struct nfs_page *req) +int nfs_set_page_tag_locked(struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode); - if (!nfs_lock_request(req)) + if (!nfs_lock_request_dontget(req)) return 0; - radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); + if (req->wb_page != NULL) + radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); return 1; } @@ -132,9 +133,10 @@ void nfs_clear_page_tag_locked(struct nfs_page *req) if (req->wb_page != NULL) { spin_lock(&inode->i_lock); radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); + nfs_unlock_request(req); spin_unlock(&inode->i_lock); - } - nfs_unlock_request(req); + } else + nfs_unlock_request(req); } /** @@ -421,6 +423,7 @@ int nfs_scan_list(struct nfs_inode *nfsi, goto out; idx_start = req->wb_index + 1; if (nfs_set_page_tag_locked(req)) { + kref_get(&req->wb_kref); nfs_list_remove_request(req); radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, tag); diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 4f80d88e9fee..5ccf7faee19c 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -565,16 +565,9 @@ static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) return 0; } -static void nfs_proc_read_setup(struct nfs_read_data *data) +static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) { - struct rpc_message msg = { - .rpc_proc = &nfs_procedures[NFSPROC_READ], - .rpc_argp = &data->args, - .rpc_resp = &data->res, - .rpc_cred = data->cred, - }; - - rpc_call_setup(&data->task, &msg, 0); + msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; } static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) @@ -584,24 +577,15 @@ static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) return 0; } -static void nfs_proc_write_setup(struct nfs_write_data *data, int how) +static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) { - struct rpc_message msg = { - .rpc_proc = &nfs_procedures[NFSPROC_WRITE], - .rpc_argp = &data->args, - .rpc_resp = &data->res, - .rpc_cred = data->cred, - }; - /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ data->args.stable = NFS_FILE_SYNC; - - /* Finalize the task. */ - rpc_call_setup(&data->task, &msg, 0); + msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; } static void -nfs_proc_commit_setup(struct nfs_write_data *data, int how) +nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) { BUG(); } @@ -609,7 +593,9 @@ nfs_proc_commit_setup(struct nfs_write_data *data, int how) static int nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl) { - return nlmclnt_proc(filp->f_path.dentry->d_inode, cmd, fl); + struct inode *inode = filp->f_path.dentry->d_inode; + + return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl); } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 4587a86adaac..8fd6dfbe1bc3 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -160,12 +160,26 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, const struct rpc_call_ops *call_ops, unsigned int count, unsigned int offset) { - struct inode *inode; - int flags; + struct inode *inode = req->wb_context->path.dentry->d_inode; + int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; + struct rpc_task *task; + struct rpc_message msg = { + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = req->wb_context->cred, + }; + struct rpc_task_setup task_setup_data = { + .task = &data->task, + .rpc_client = NFS_CLIENT(inode), + .rpc_message = &msg, + .callback_ops = call_ops, + .callback_data = data, + .flags = RPC_TASK_ASYNC | swap_flags, + }; data->req = req; - data->inode = inode = req->wb_context->path.dentry->d_inode; - data->cred = req->wb_context->cred; + data->inode = inode; + data->cred = msg.rpc_cred; data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; @@ -180,11 +194,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, nfs_fattr_init(&data->fattr); /* Set up the initial task struct. */ - flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); - rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data); - NFS_PROTO(inode)->read_setup(data); - - data->task.tk_cookie = (unsigned long)inode; + NFS_PROTO(inode)->read_setup(data, &msg); dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n", data->task.tk_pid, @@ -192,6 +202,10 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, (long long)NFS_FILEID(inode), count, (unsigned long long)data->args.offset); + + task = rpc_run_task(&task_setup_data); + if (!IS_ERR(task)) + rpc_put_task(task); } static void @@ -208,19 +222,6 @@ nfs_async_read_error(struct list_head *head) } /* - * Start an async read operation - */ -static void nfs_execute_read(struct nfs_read_data *data) -{ - struct rpc_clnt *clnt = NFS_CLIENT(data->inode); - sigset_t oldset; - - rpc_clnt_sigmask(clnt, &oldset); - rpc_execute(&data->task); - rpc_clnt_sigunmask(clnt, &oldset); -} - -/* * Generate multiple requests to fill a single page. * * We optimize to reduce the number of read operations on the wire. If we @@ -274,7 +275,6 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne rsize, offset); offset += rsize; nbytes -= rsize; - nfs_execute_read(data); } while (nbytes != 0); return 0; @@ -312,8 +312,6 @@ static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned req = nfs_list_entry(data->pages.next); nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0); - - nfs_execute_read(data); return 0; out_bad: nfs_async_read_error(head); @@ -338,7 +336,7 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count); if (task->tk_status == -ESTALE) { - set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode)); + set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags); nfs_mark_for_revalidate(data->inode); } return 0; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 0b0c72a072ff..22c49c02897d 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -45,6 +45,8 @@ #include <linux/nfs_idmap.h> #include <linux/vfs.h> #include <linux/inet.h> +#include <linux/in6.h> +#include <net/ipv6.h> #include <linux/nfs_xdr.h> #include <linux/magic.h> #include <linux/parser.h> @@ -83,11 +85,11 @@ enum { Opt_actimeo, Opt_namelen, Opt_mountport, - Opt_mountprog, Opt_mountvers, - Opt_nfsprog, Opt_nfsvers, + Opt_mountvers, + Opt_nfsvers, /* Mount options that take string arguments */ - Opt_sec, Opt_proto, Opt_mountproto, + Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, Opt_addr, Opt_mountaddr, Opt_clientaddr, /* Mount options that are ignored */ @@ -137,9 +139,7 @@ static match_table_t nfs_mount_option_tokens = { { Opt_userspace, "retry=%u" }, { Opt_namelen, "namlen=%u" }, { Opt_mountport, "mountport=%u" }, - { Opt_mountprog, "mountprog=%u" }, { Opt_mountvers, "mountvers=%u" }, - { Opt_nfsprog, "nfsprog=%u" }, { Opt_nfsvers, "nfsvers=%u" }, { Opt_nfsvers, "vers=%u" }, @@ -148,7 +148,7 @@ static match_table_t nfs_mount_option_tokens = { { Opt_mountproto, "mountproto=%s" }, { Opt_addr, "addr=%s" }, { Opt_clientaddr, "clientaddr=%s" }, - { Opt_userspace, "mounthost=%s" }, + { Opt_mounthost, "mounthost=%s" }, { Opt_mountaddr, "mountaddr=%s" }, { Opt_err, NULL } @@ -202,6 +202,7 @@ static int nfs_get_sb(struct file_system_type *, int, const char *, void *, stru static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); static void nfs_kill_super(struct super_block *); +static void nfs_put_super(struct super_block *); static struct file_system_type nfs_fs_type = { .owner = THIS_MODULE, @@ -223,6 +224,7 @@ static const struct super_operations nfs_sops = { .alloc_inode = nfs_alloc_inode, .destroy_inode = nfs_destroy_inode, .write_inode = nfs_write_inode, + .put_super = nfs_put_super, .statfs = nfs_statfs, .clear_inode = nfs_clear_inode, .umount_begin = nfs_umount_begin, @@ -325,6 +327,28 @@ void __exit unregister_nfs_fs(void) unregister_filesystem(&nfs_fs_type); } +void nfs_sb_active(struct nfs_server *server) +{ + atomic_inc(&server->active); +} + +void nfs_sb_deactive(struct nfs_server *server) +{ + if (atomic_dec_and_test(&server->active)) + wake_up(&server->active_wq); +} + +static void nfs_put_super(struct super_block *sb) +{ + struct nfs_server *server = NFS_SB(sb); + /* + * Make sure there are no outstanding ops to this server. + * If so, wait for them to finish before allowing the + * unmount to continue. + */ + wait_event(server->active_wq, atomic_read(&server->active) == 0); +} + /* * Deliver file system statistics to userspace */ @@ -455,8 +479,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, } seq_printf(m, ",proto=%s", rpc_peeraddr2str(nfss->client, RPC_DISPLAY_PROTO)); - seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ); - seq_printf(m, ",retrans=%u", clp->retrans_count); + seq_printf(m, ",timeo=%lu", 10U * nfss->client->cl_timeout->to_initval / HZ); + seq_printf(m, ",retrans=%u", nfss->client->cl_timeout->to_retries); seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor)); } @@ -469,8 +493,9 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) nfs_show_mount_options(m, nfss, 0); - seq_printf(m, ",addr="NIPQUAD_FMT, - NIPQUAD(nfss->nfs_client->cl_addr.sin_addr)); + seq_printf(m, ",addr=%s", + rpc_peeraddr2str(nfss->nfs_client->cl_rpcclient, + RPC_DISPLAY_ADDR)); return 0; } @@ -507,7 +532,7 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) seq_printf(m, ",namelen=%d", nfss->namelen); #ifdef CONFIG_NFS_V4 - if (nfss->nfs_client->cl_nfsversion == 4) { + if (nfss->nfs_client->rpc_ops->version == 4) { seq_printf(m, "\n\tnfsv4:\t"); seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); @@ -575,16 +600,40 @@ static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags) } /* - * Sanity-check a server address provided by the mount command + * Set the port number in an address. Be agnostic about the address family. + */ +static void nfs_set_port(struct sockaddr *sap, unsigned short port) +{ + switch (sap->sa_family) { + case AF_INET: { + struct sockaddr_in *ap = (struct sockaddr_in *)sap; + ap->sin_port = htons(port); + break; + } + case AF_INET6: { + struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap; + ap->sin6_port = htons(port); + break; + } + } +} + +/* + * Sanity-check a server address provided by the mount command. + * + * Address family must be initialized, and address must not be + * the ANY address for that family. */ static int nfs_verify_server_address(struct sockaddr *addr) { switch (addr->sa_family) { case AF_INET: { - struct sockaddr_in *sa = (struct sockaddr_in *) addr; - if (sa->sin_addr.s_addr != INADDR_ANY) - return 1; - break; + struct sockaddr_in *sa = (struct sockaddr_in *)addr; + return sa->sin_addr.s_addr != INADDR_ANY; + } + case AF_INET6: { + struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr; + return !ipv6_addr_any(sa); } } @@ -592,6 +641,40 @@ static int nfs_verify_server_address(struct sockaddr *addr) } /* + * Parse string addresses passed in via a mount option, + * and construct a sockaddr based on the result. + * + * If address parsing fails, set the sockaddr's address + * family to AF_UNSPEC to force nfs_verify_server_address() + * to punt the mount. + */ +static void nfs_parse_server_address(char *value, + struct sockaddr *sap, + size_t *len) +{ + if (strchr(value, ':')) { + struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap; + u8 *addr = (u8 *)&ap->sin6_addr.in6_u; + + ap->sin6_family = AF_INET6; + *len = sizeof(*ap); + if (in6_pton(value, -1, addr, '\0', NULL)) + return; + } else { + struct sockaddr_in *ap = (struct sockaddr_in *)sap; + u8 *addr = (u8 *)&ap->sin_addr.s_addr; + + ap->sin_family = AF_INET; + *len = sizeof(*ap); + if (in4_pton(value, -1, addr, '\0', NULL)) + return; + } + + sap->sa_family = AF_UNSPEC; + *len = 0; +} + +/* * Error-check and convert a string of mount options from user space into * a data structure */ @@ -599,6 +682,7 @@ static int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt) { char *p, *string; + unsigned short port = 0; if (!raw) { dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); @@ -701,7 +785,7 @@ static int nfs_parse_mount_options(char *raw, return 0; if (option < 0 || option > 65535) return 0; - mnt->nfs_server.address.sin_port = htons(option); + port = option; break; case Opt_rsize: if (match_int(args, &mnt->rsize)) @@ -763,13 +847,6 @@ static int nfs_parse_mount_options(char *raw, return 0; mnt->mount_server.port = option; break; - case Opt_mountprog: - if (match_int(args, &option)) - return 0; - if (option < 0) - return 0; - mnt->mount_server.program = option; - break; case Opt_mountvers: if (match_int(args, &option)) return 0; @@ -777,13 +854,6 @@ static int nfs_parse_mount_options(char *raw, return 0; mnt->mount_server.version = option; break; - case Opt_nfsprog: - if (match_int(args, &option)) - return 0; - if (option < 0) - return 0; - mnt->nfs_server.program = option; - break; case Opt_nfsvers: if (match_int(args, &option)) return 0; @@ -927,24 +997,32 @@ static int nfs_parse_mount_options(char *raw, string = match_strdup(args); if (string == NULL) goto out_nomem; - mnt->nfs_server.address.sin_family = AF_INET; - mnt->nfs_server.address.sin_addr.s_addr = - in_aton(string); + nfs_parse_server_address(string, (struct sockaddr *) + &mnt->nfs_server.address, + &mnt->nfs_server.addrlen); kfree(string); break; case Opt_clientaddr: string = match_strdup(args); if (string == NULL) goto out_nomem; + kfree(mnt->client_address); mnt->client_address = string; break; + case Opt_mounthost: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + kfree(mnt->mount_server.hostname); + mnt->mount_server.hostname = string; + break; case Opt_mountaddr: string = match_strdup(args); if (string == NULL) goto out_nomem; - mnt->mount_server.address.sin_family = AF_INET; - mnt->mount_server.address.sin_addr.s_addr = - in_aton(string); + nfs_parse_server_address(string, (struct sockaddr *) + &mnt->mount_server.address, + &mnt->mount_server.addrlen); kfree(string); break; @@ -957,6 +1035,8 @@ static int nfs_parse_mount_options(char *raw, } } + nfs_set_port((struct sockaddr *)&mnt->nfs_server.address, port); + return 1; out_nomem: @@ -987,7 +1067,8 @@ out_unknown: static int nfs_try_mount(struct nfs_parsed_mount_data *args, struct nfs_fh *root_fh) { - struct sockaddr_in sin; + struct sockaddr *sap = (struct sockaddr *)&args->mount_server.address; + char *hostname; int status; if (args->mount_server.version == 0) { @@ -997,25 +1078,32 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, args->mount_server.version = NFS_MNT_VERSION; } + if (args->mount_server.hostname) + hostname = args->mount_server.hostname; + else + hostname = args->nfs_server.hostname; + /* * Construct the mount server's address. */ - if (args->mount_server.address.sin_addr.s_addr != INADDR_ANY) - sin = args->mount_server.address; - else - sin = args->nfs_server.address; + if (args->mount_server.address.ss_family == AF_UNSPEC) { + memcpy(sap, &args->nfs_server.address, + args->nfs_server.addrlen); + args->mount_server.addrlen = args->nfs_server.addrlen; + } + /* * autobind will be used if mount_server.port == 0 */ - sin.sin_port = htons(args->mount_server.port); + nfs_set_port(sap, args->mount_server.port); /* * Now ask the mount server to map our export path * to a file handle. */ - status = nfs_mount((struct sockaddr *) &sin, - sizeof(sin), - args->nfs_server.hostname, + status = nfs_mount(sap, + args->mount_server.addrlen, + hostname, args->nfs_server.export_path, args->mount_server.version, args->mount_server.protocol, @@ -1023,8 +1111,8 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, if (status == 0) return 0; - dfprintk(MOUNT, "NFS: unable to mount server " NIPQUAD_FMT - ", error %d\n", NIPQUAD(sin.sin_addr.s_addr), status); + dfprintk(MOUNT, "NFS: unable to mount server %s, error %d", + hostname, status); return status; } @@ -1043,9 +1131,6 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, * * + breaking back: trying proto=udp after proto=tcp, v2 after v3, * mountproto=tcp after mountproto=udp, and so on - * - * XXX: as far as I can tell, changing the NFS program number is not - * supported in the NFS client. */ static int nfs_validate_mount_data(void *options, struct nfs_parsed_mount_data *args, @@ -1069,9 +1154,7 @@ static int nfs_validate_mount_data(void *options, args->acdirmin = 30; args->acdirmax = 60; args->mount_server.protocol = XPRT_TRANSPORT_UDP; - args->mount_server.program = NFS_MNT_PROGRAM; args->nfs_server.protocol = XPRT_TRANSPORT_TCP; - args->nfs_server.program = NFS_PROGRAM; switch (data->version) { case 1: @@ -1102,9 +1185,6 @@ static int nfs_validate_mount_data(void *options, memset(mntfh->data + mntfh->size, 0, sizeof(mntfh->data) - mntfh->size); - if (!nfs_verify_server_address((struct sockaddr *) &data->addr)) - goto out_no_address; - /* * Translate to nfs_parsed_mount_data, which nfs_fill_super * can deal with. @@ -1119,7 +1199,14 @@ static int nfs_validate_mount_data(void *options, args->acregmax = data->acregmax; args->acdirmin = data->acdirmin; args->acdirmax = data->acdirmax; - args->nfs_server.address = data->addr; + + memcpy(&args->nfs_server.address, &data->addr, + sizeof(data->addr)); + args->nfs_server.addrlen = sizeof(data->addr); + if (!nfs_verify_server_address((struct sockaddr *) + &args->nfs_server.address)) + goto out_no_address; + if (!(data->flags & NFS_MOUNT_TCP)) args->nfs_server.protocol = XPRT_TRANSPORT_UDP; /* N.B. caller will free nfs_server.hostname in all cases */ @@ -1322,15 +1409,50 @@ static int nfs_set_super(struct super_block *s, void *data) return ret; } +static int nfs_compare_super_address(struct nfs_server *server1, + struct nfs_server *server2) +{ + struct sockaddr *sap1, *sap2; + + sap1 = (struct sockaddr *)&server1->nfs_client->cl_addr; + sap2 = (struct sockaddr *)&server2->nfs_client->cl_addr; + + if (sap1->sa_family != sap2->sa_family) + return 0; + + switch (sap1->sa_family) { + case AF_INET: { + struct sockaddr_in *sin1 = (struct sockaddr_in *)sap1; + struct sockaddr_in *sin2 = (struct sockaddr_in *)sap2; + if (sin1->sin_addr.s_addr != sin2->sin_addr.s_addr) + return 0; + if (sin1->sin_port != sin2->sin_port) + return 0; + break; + } + case AF_INET6: { + struct sockaddr_in6 *sin1 = (struct sockaddr_in6 *)sap1; + struct sockaddr_in6 *sin2 = (struct sockaddr_in6 *)sap2; + if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr)) + return 0; + if (sin1->sin6_port != sin2->sin6_port) + return 0; + break; + } + default: + return 0; + } + + return 1; +} + static int nfs_compare_super(struct super_block *sb, void *data) { struct nfs_sb_mountdata *sb_mntdata = data; struct nfs_server *server = sb_mntdata->server, *old = NFS_SB(sb); int mntflags = sb_mntdata->mntflags; - if (memcmp(&old->nfs_client->cl_addr, - &server->nfs_client->cl_addr, - sizeof(old->nfs_client->cl_addr)) != 0) + if (!nfs_compare_super_address(old, server)) return 0; /* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */ if (old->flags & NFS_MOUNT_UNSHARED) @@ -1400,6 +1522,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, out: kfree(data.nfs_server.hostname); + kfree(data.mount_server.hostname); return error; out_err_nosb: @@ -1528,12 +1651,35 @@ static void nfs4_fill_super(struct super_block *sb) } /* + * If the user didn't specify a port, set the port number to + * the NFS version 4 default port. + */ +static void nfs4_default_port(struct sockaddr *sap) +{ + switch (sap->sa_family) { + case AF_INET: { + struct sockaddr_in *ap = (struct sockaddr_in *)sap; + if (ap->sin_port == 0) + ap->sin_port = htons(NFS_PORT); + break; + } + case AF_INET6: { + struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap; + if (ap->sin6_port == 0) + ap->sin6_port = htons(NFS_PORT); + break; + } + } +} + +/* * Validate NFSv4 mount options */ static int nfs4_validate_mount_data(void *options, struct nfs_parsed_mount_data *args, const char *dev_name) { + struct sockaddr_in *ap; struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; char *c; @@ -1554,18 +1700,21 @@ static int nfs4_validate_mount_data(void *options, switch (data->version) { case 1: - if (data->host_addrlen != sizeof(args->nfs_server.address)) + ap = (struct sockaddr_in *)&args->nfs_server.address; + if (data->host_addrlen > sizeof(args->nfs_server.address)) goto out_no_address; - if (copy_from_user(&args->nfs_server.address, - data->host_addr, - sizeof(args->nfs_server.address))) + if (data->host_addrlen == 0) + goto out_no_address; + args->nfs_server.addrlen = data->host_addrlen; + if (copy_from_user(ap, data->host_addr, data->host_addrlen)) return -EFAULT; - if (args->nfs_server.address.sin_port == 0) - args->nfs_server.address.sin_port = htons(NFS_PORT); if (!nfs_verify_server_address((struct sockaddr *) &args->nfs_server.address)) goto out_no_address; + nfs4_default_port((struct sockaddr *) + &args->nfs_server.address); + switch (data->auth_flavourlen) { case 0: args->auth_flavors[0] = RPC_AUTH_UNIX; @@ -1623,6 +1772,9 @@ static int nfs4_validate_mount_data(void *options, &args->nfs_server.address)) return -EINVAL; + nfs4_default_port((struct sockaddr *) + &args->nfs_server.address); + switch (args->auth_flavor_len) { case 0: args->auth_flavors[0] = RPC_AUTH_UNIX; @@ -1643,21 +1795,16 @@ static int nfs4_validate_mount_data(void *options, len = c - dev_name; if (len > NFS4_MAXNAMLEN) return -ENAMETOOLONG; - args->nfs_server.hostname = kzalloc(len, GFP_KERNEL); - if (args->nfs_server.hostname == NULL) - return -ENOMEM; - strncpy(args->nfs_server.hostname, dev_name, len - 1); + /* N.B. caller will free nfs_server.hostname in all cases */ + args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL); c++; /* step over the ':' */ len = strlen(c); if (len > NFS4_MAXPATHLEN) return -ENAMETOOLONG; - args->nfs_server.export_path = kzalloc(len + 1, GFP_KERNEL); - if (args->nfs_server.export_path == NULL) - return -ENOMEM; - strncpy(args->nfs_server.export_path, c, len); + args->nfs_server.export_path = kstrndup(c, len, GFP_KERNEL); - dprintk("MNTPATH: %s\n", args->nfs_server.export_path); + dprintk("NFS: MNTPATH: '%s'\n", args->nfs_server.export_path); if (args->client_address == NULL) goto out_no_client_address; diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 233ad38161f9..757415363422 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -14,6 +14,8 @@ #include <linux/sched.h> #include <linux/wait.h> +#include "internal.h" + struct nfs_unlinkdata { struct hlist_node list; struct nfs_removeargs args; @@ -69,24 +71,6 @@ static void nfs_dec_sillycount(struct inode *dir) } /** - * nfs_async_unlink_init - Initialize the RPC info - * task: rpc_task of the sillydelete - */ -static void nfs_async_unlink_init(struct rpc_task *task, void *calldata) -{ - struct nfs_unlinkdata *data = calldata; - struct inode *dir = data->dir; - struct rpc_message msg = { - .rpc_argp = &data->args, - .rpc_resp = &data->res, - .rpc_cred = data->cred, - }; - - NFS_PROTO(dir)->unlink_setup(&msg, dir); - rpc_call_setup(task, &msg, 0); -} - -/** * nfs_async_unlink_done - Sillydelete post-processing * @task: rpc_task of the sillydelete * @@ -113,32 +97,45 @@ static void nfs_async_unlink_release(void *calldata) struct nfs_unlinkdata *data = calldata; nfs_dec_sillycount(data->dir); + nfs_sb_deactive(NFS_SERVER(data->dir)); nfs_free_unlinkdata(data); } static const struct rpc_call_ops nfs_unlink_ops = { - .rpc_call_prepare = nfs_async_unlink_init, .rpc_call_done = nfs_async_unlink_done, .rpc_release = nfs_async_unlink_release, }; static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data) { + struct rpc_message msg = { + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = data->cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_message = &msg, + .callback_ops = &nfs_unlink_ops, + .callback_data = data, + .flags = RPC_TASK_ASYNC, + }; struct rpc_task *task; struct dentry *alias; alias = d_lookup(parent, &data->args.name); if (alias != NULL) { int ret = 0; + /* * Hey, we raced with lookup... See if we need to transfer * the sillyrename information to the aliased dentry. */ nfs_free_dname(data); spin_lock(&alias->d_lock); - if (!(alias->d_flags & DCACHE_NFSFS_RENAMED)) { + if (alias->d_inode != NULL && + !(alias->d_flags & DCACHE_NFSFS_RENAMED)) { alias->d_fsdata = data; - alias->d_flags ^= DCACHE_NFSFS_RENAMED; + alias->d_flags |= DCACHE_NFSFS_RENAMED; ret = 1; } spin_unlock(&alias->d_lock); @@ -151,10 +148,14 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n nfs_dec_sillycount(dir); return 0; } + nfs_sb_active(NFS_SERVER(dir)); data->args.fh = NFS_FH(dir); nfs_fattr_init(&data->res.dir_attr); - task = rpc_run_task(NFS_CLIENT(dir), RPC_TASK_ASYNC, &nfs_unlink_ops, data); + NFS_PROTO(dir)->unlink_setup(&msg, dir); + + task_setup_data.rpc_client = NFS_CLIENT(dir); + task = rpc_run_task(&task_setup_data); if (!IS_ERR(task)) rpc_put_task(task); return 1; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 51cc1bd6a116..5ac5b27b639a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -196,7 +196,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, } /* Update file length */ nfs_grow_file(page, offset, count); - nfs_unlock_request(req); + nfs_clear_page_tag_locked(req); return 0; } @@ -252,7 +252,6 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, struct page *page) { struct inode *inode = page->mapping->host; - struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *req; int ret; @@ -263,10 +262,10 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, spin_unlock(&inode->i_lock); return 0; } - if (nfs_lock_request_dontget(req)) + if (nfs_set_page_tag_locked(req)) break; /* Note: If we hold the page lock, as is the case in nfs_writepage, - * then the call to nfs_lock_request_dontget() will always + * then the call to nfs_set_page_tag_locked() will always * succeed provided that someone hasn't already marked the * request as dirty (in which case we don't care). */ @@ -280,7 +279,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { /* This request is marked for commit */ spin_unlock(&inode->i_lock); - nfs_unlock_request(req); + nfs_clear_page_tag_locked(req); nfs_pageio_complete(pgio); return 0; } @@ -288,8 +287,6 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, spin_unlock(&inode->i_lock); BUG(); } - radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, - NFS_PAGE_TAG_LOCKED); spin_unlock(&inode->i_lock); nfs_pageio_add_request(pgio, req); return 0; @@ -381,6 +378,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) set_page_private(req->wb_page, (unsigned long)req); nfsi->npages++; kref_get(&req->wb_kref); + radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); return 0; } @@ -596,7 +594,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, spin_lock(&inode->i_lock); req = nfs_page_find_request_locked(page); if (req) { - if (!nfs_lock_request_dontget(req)) { + if (!nfs_set_page_tag_locked(req)) { int error; spin_unlock(&inode->i_lock); @@ -646,7 +644,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, || req->wb_page != page || !nfs_dirty_request(req) || offset > rqend || end < req->wb_offset) { - nfs_unlock_request(req); + nfs_clear_page_tag_locked(req); return ERR_PTR(-EBUSY); } @@ -755,7 +753,7 @@ static void nfs_writepage_release(struct nfs_page *req) nfs_clear_page_tag_locked(req); } -static inline int flush_task_priority(int how) +static int flush_task_priority(int how) { switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) { case FLUSH_HIGHPRI: @@ -775,15 +773,31 @@ static void nfs_write_rpcsetup(struct nfs_page *req, unsigned int count, unsigned int offset, int how) { - struct inode *inode; - int flags; + struct inode *inode = req->wb_context->path.dentry->d_inode; + int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; + int priority = flush_task_priority(how); + struct rpc_task *task; + struct rpc_message msg = { + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = req->wb_context->cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = NFS_CLIENT(inode), + .task = &data->task, + .rpc_message = &msg, + .callback_ops = call_ops, + .callback_data = data, + .flags = flags, + .priority = priority, + }; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ data->req = req; data->inode = inode = req->wb_context->path.dentry->d_inode; - data->cred = req->wb_context->cred; + data->cred = msg.rpc_cred; data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; @@ -791,6 +805,12 @@ static void nfs_write_rpcsetup(struct nfs_page *req, data->args.pages = data->pagevec; data->args.count = count; data->args.context = req->wb_context; + data->args.stable = NFS_UNSTABLE; + if (how & FLUSH_STABLE) { + data->args.stable = NFS_DATA_SYNC; + if (!NFS_I(inode)->ncommit) + data->args.stable = NFS_FILE_SYNC; + } data->res.fattr = &data->fattr; data->res.count = count; @@ -798,12 +818,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req, nfs_fattr_init(&data->fattr); /* Set up the initial task struct. */ - flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; - rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data); - NFS_PROTO(inode)->write_setup(data, how); - - data->task.tk_priority = flush_task_priority(how); - data->task.tk_cookie = (unsigned long)inode; + NFS_PROTO(inode)->write_setup(data, &msg); dprintk("NFS: %5u initiated write call " "(req %s/%Ld, %u bytes @ offset %Lu)\n", @@ -812,16 +827,10 @@ static void nfs_write_rpcsetup(struct nfs_page *req, (long long)NFS_FILEID(inode), count, (unsigned long long)data->args.offset); -} - -static void nfs_execute_write(struct nfs_write_data *data) -{ - struct rpc_clnt *clnt = NFS_CLIENT(data->inode); - sigset_t oldset; - rpc_clnt_sigmask(clnt, &oldset); - rpc_execute(&data->task); - rpc_clnt_sigunmask(clnt, &oldset); + task = rpc_run_task(&task_setup_data); + if (!IS_ERR(task)) + rpc_put_task(task); } /* @@ -868,7 +877,6 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned wsize, offset, how); offset += wsize; nbytes -= wsize; - nfs_execute_write(data); } while (nbytes != 0); return 0; @@ -916,7 +924,6 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i /* Set up the argument struct */ nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how); - nfs_execute_write(data); return 0; out_bad: while (!list_empty(head)) { @@ -932,7 +939,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) { - int wsize = NFS_SERVER(inode)->wsize; + size_t wsize = NFS_SERVER(inode)->wsize; if (wsize < PAGE_CACHE_SIZE) nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); @@ -1146,19 +1153,33 @@ static void nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data, int how) { - struct nfs_page *first; - struct inode *inode; - int flags; + struct nfs_page *first = nfs_list_entry(head->next); + struct inode *inode = first->wb_context->path.dentry->d_inode; + int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; + int priority = flush_task_priority(how); + struct rpc_task *task; + struct rpc_message msg = { + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = first->wb_context->cred, + }; + struct rpc_task_setup task_setup_data = { + .task = &data->task, + .rpc_client = NFS_CLIENT(inode), + .rpc_message = &msg, + .callback_ops = &nfs_commit_ops, + .callback_data = data, + .flags = flags, + .priority = priority, + }; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ list_splice_init(head, &data->pages); - first = nfs_list_entry(data->pages.next); - inode = first->wb_context->path.dentry->d_inode; data->inode = inode; - data->cred = first->wb_context->cred; + data->cred = msg.rpc_cred; data->args.fh = NFS_FH(data->inode); /* Note: we always request a commit of the entire inode */ @@ -1170,14 +1191,13 @@ static void nfs_commit_rpcsetup(struct list_head *head, nfs_fattr_init(&data->fattr); /* Set up the initial task struct. */ - flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; - rpc_init_task(&data->task, NFS_CLIENT(inode), flags, &nfs_commit_ops, data); - NFS_PROTO(inode)->commit_setup(data, how); + NFS_PROTO(inode)->commit_setup(data, &msg); - data->task.tk_priority = flush_task_priority(how); - data->task.tk_cookie = (unsigned long)inode; - dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); + + task = rpc_run_task(&task_setup_data); + if (!IS_ERR(task)) + rpc_put_task(task); } /* @@ -1197,7 +1217,6 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) /* Set up the argument struct */ nfs_commit_rpcsetup(head, data, how); - nfs_execute_write(data); return 0; out_bad: while (!list_empty(head)) { diff --git a/fs/splice.c b/fs/splice.c index 0a0b79b01d05..1577a7391d23 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1031,7 +1031,11 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, goto out_release; } +done: pipe->nrbufs = pipe->curbuf = 0; + if (bytes > 0) + file_accessed(in); + return bytes; out_release: @@ -1047,16 +1051,11 @@ out_release: buf->ops = NULL; } } - pipe->nrbufs = pipe->curbuf = 0; - - /* - * If we transferred some data, return the number of bytes: - */ - if (bytes > 0) - return bytes; - return ret; + if (!bytes) + bytes = ret; + goto done; } EXPORT_SYMBOL(splice_direct_to_actor); |