diff options
author | Jeff Garzik <jgarzik@pobox.com> | 2005-10-30 01:56:31 -0500 |
---|---|---|
committer | Jeff Garzik <jgarzik@pobox.com> | 2005-10-30 01:56:31 -0500 |
commit | 81cfb8864c73230eb1c37753aba517db15cf4d8f (patch) | |
tree | 649ff25543834cf9983ea41b93126bea97d75475 /arch | |
parent | 0169e284f6b6b263cc7c2ed25986b96cd6fda610 (diff) | |
parent | 9f75e1eff3edb2bb07349b94c28f4f2a6c66ca43 (diff) |
Merge branch 'master'
Diffstat (limited to 'arch')
60 files changed, 390 insertions, 1544 deletions
diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index c7481d59b6df..6d5251254f68 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -371,6 +371,8 @@ show_mem(void) show_free_areas(); printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_online_node(nid) { + unsigned long flags; + pgdat_resize_lock(NODE_DATA(nid), &flags); i = node_spanned_pages(nid); while (i-- > 0) { struct page *page = nid_page_nr(nid, i); @@ -384,6 +386,7 @@ show_mem(void) else shared += page_count(page) - 1; } + pgdat_resize_unlock(NODE_DATA(nid), &flags); } printk("%ld pages of RAM\n",total); printk("%ld free pages\n",free); diff --git a/arch/alpha/mm/remap.c b/arch/alpha/mm/remap.c index 19817ad3d89b..a78356c3ead5 100644 --- a/arch/alpha/mm/remap.c +++ b/arch/alpha/mm/remap.c @@ -2,7 +2,6 @@ #include <asm/pgalloc.h> #include <asm/cacheflush.h> -/* called with the page_table_lock held */ static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size, unsigned long phys_addr, unsigned long flags) @@ -31,7 +30,6 @@ remap_area_pte(pte_t * pte, unsigned long address, unsigned long size, } while (address && (address < end)); } -/* called with the page_table_lock held */ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size, unsigned long phys_addr, unsigned long flags) @@ -46,7 +44,7 @@ remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size, if (address >= end) BUG(); do { - pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); + pte_t * pte = pte_alloc_kernel(pmd, address); if (!pte) return -ENOMEM; remap_area_pte(pte, address, end - address, @@ -70,7 +68,6 @@ __alpha_remap_area_pages(unsigned long address, unsigned long phys_addr, flush_cache_all(); if (address >= end) BUG(); - spin_lock(&init_mm.page_table_lock); do { pmd_t *pmd; pmd = pmd_alloc(&init_mm, dir, address); @@ -84,7 +81,6 @@ __alpha_remap_area_pages(unsigned long address, unsigned long phys_addr, address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (address && (address < end)); - spin_unlock(&init_mm.page_table_lock); return error; } diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index a94d75fef598..a917e3dd3666 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -139,93 +139,33 @@ struct iwmmxt_sigframe { unsigned long storage[0x98/4]; }; -static int page_present(struct mm_struct *mm, void __user *uptr, int wr) -{ - unsigned long addr = (unsigned long)uptr; - pgd_t *pgd = pgd_offset(mm, addr); - if (pgd_present(*pgd)) { - pmd_t *pmd = pmd_offset(pgd, addr); - if (pmd_present(*pmd)) { - pte_t *pte = pte_offset_map(pmd, addr); - return (pte_present(*pte) && (!wr || pte_write(*pte))); - } - } - return 0; -} - -static int copy_locked(void __user *uptr, void *kptr, size_t size, int write, - void (*copyfn)(void *, void __user *)) -{ - unsigned char v, __user *userptr = uptr; - int err = 0; - - do { - struct mm_struct *mm; - - if (write) { - __put_user_error(0, userptr, err); - __put_user_error(0, userptr + size - 1, err); - } else { - __get_user_error(v, userptr, err); - __get_user_error(v, userptr + size - 1, err); - } - - if (err) - break; - - mm = current->mm; - spin_lock(&mm->page_table_lock); - if (page_present(mm, userptr, write) && - page_present(mm, userptr + size - 1, write)) { - copyfn(kptr, uptr); - } else - err = 1; - spin_unlock(&mm->page_table_lock); - } while (err); - - return err; -} - static int preserve_iwmmxt_context(struct iwmmxt_sigframe *frame) { - int err = 0; + char kbuf[sizeof(*frame) + 8]; + struct iwmmxt_sigframe *kframe; /* the iWMMXt context must be 64 bit aligned */ - WARN_ON((unsigned long)frame & 7); - - __put_user_error(IWMMXT_MAGIC0, &frame->magic0, err); - __put_user_error(IWMMXT_MAGIC1, &frame->magic1, err); - - /* - * iwmmxt_task_copy() doesn't check user permissions. - * Let's do a dummy write on the upper boundary to ensure - * access to user mem is OK all way up. - */ - err |= copy_locked(&frame->storage, current_thread_info(), - sizeof(frame->storage), 1, iwmmxt_task_copy); - return err; + kframe = (struct iwmmxt_sigframe *)((unsigned long)(kbuf + 8) & ~7); + kframe->magic0 = IWMMXT_MAGIC0; + kframe->magic1 = IWMMXT_MAGIC1; + iwmmxt_task_copy(current_thread_info(), &kframe->storage); + return __copy_to_user(frame, kframe, sizeof(*frame)); } static int restore_iwmmxt_context(struct iwmmxt_sigframe *frame) { - unsigned long magic0, magic1; - int err = 0; + char kbuf[sizeof(*frame) + 8]; + struct iwmmxt_sigframe *kframe; - /* the iWMMXt context is 64 bit aligned */ - WARN_ON((unsigned long)frame & 7); - - /* - * Validate iWMMXt context signature. - * Also, iwmmxt_task_restore() doesn't check user permissions. - * Let's do a dummy write on the upper boundary to ensure - * access to user mem is OK all way up. - */ - __get_user_error(magic0, &frame->magic0, err); - __get_user_error(magic1, &frame->magic1, err); - if (!err && magic0 == IWMMXT_MAGIC0 && magic1 == IWMMXT_MAGIC1) - err = copy_locked(&frame->storage, current_thread_info(), - sizeof(frame->storage), 0, iwmmxt_task_restore); - return err; + /* the iWMMXt context must be 64 bit aligned */ + kframe = (struct iwmmxt_sigframe *)((unsigned long)(kbuf + 8) & ~7); + if (__copy_from_user(kframe, frame, sizeof(*frame))) + return -1; + if (kframe->magic0 != IWMMXT_MAGIC0 || + kframe->magic1 != IWMMXT_MAGIC1) + return -1; + iwmmxt_task_restore(current_thread_info(), &kframe->storage); + return 0; } #endif diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index baa09601a64e..66e5a0516f23 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -483,29 +483,33 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) unsigned long addr = regs->ARM_r2; struct mm_struct *mm = current->mm; pgd_t *pgd; pmd_t *pmd; pte_t *pte; + spinlock_t *ptl; regs->ARM_cpsr &= ~PSR_C_BIT; - spin_lock(&mm->page_table_lock); + down_read(&mm->mmap_sem); pgd = pgd_offset(mm, addr); if (!pgd_present(*pgd)) goto bad_access; pmd = pmd_offset(pgd, addr); if (!pmd_present(*pmd)) goto bad_access; - pte = pte_offset_map(pmd, addr); - if (!pte_present(*pte) || !pte_write(*pte)) + pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + if (!pte_present(*pte) || !pte_write(*pte)) { + pte_unmap_unlock(pte, ptl); goto bad_access; + } val = *(unsigned long *)addr; val -= regs->ARM_r0; if (val == 0) { *(unsigned long *)addr = regs->ARM_r1; regs->ARM_cpsr |= PSR_C_BIT; } - spin_unlock(&mm->page_table_lock); + pte_unmap_unlock(pte, ptl); + up_read(&mm->mmap_sem); return val; bad_access: - spin_unlock(&mm->page_table_lock); + up_read(&mm->mmap_sem); /* simulate a write access fault */ do_DataAbort(addr, 15 + (1 << 11), regs); return -1; diff --git a/arch/arm/mm/consistent.c b/arch/arm/mm/consistent.c index 82f4d5e27c54..47b0b767f080 100644 --- a/arch/arm/mm/consistent.c +++ b/arch/arm/mm/consistent.c @@ -397,8 +397,6 @@ static int __init consistent_init(void) pte_t *pte; int ret = 0; - spin_lock(&init_mm.page_table_lock); - do { pgd = pgd_offset(&init_mm, CONSISTENT_BASE); pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE); @@ -409,7 +407,7 @@ static int __init consistent_init(void) } WARN_ON(!pmd_none(*pmd)); - pte = pte_alloc_kernel(&init_mm, pmd, CONSISTENT_BASE); + pte = pte_alloc_kernel(pmd, CONSISTENT_BASE); if (!pte) { printk(KERN_ERR "%s: no pte tables\n", __func__); ret = -ENOMEM; @@ -419,8 +417,6 @@ static int __init consistent_init(void) consistent_pte = pte; } while (0); - spin_unlock(&init_mm.page_table_lock); - return ret; } diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c index be4ab3d73c91..7fc1b35a6746 100644 --- a/arch/arm/mm/fault-armv.c +++ b/arch/arm/mm/fault-armv.c @@ -26,6 +26,11 @@ static unsigned long shared_pte_mask = L_PTE_CACHEABLE; /* * We take the easy way out of this problem - we make the * PTE uncacheable. However, we leave the write buffer on. + * + * Note that the pte lock held when calling update_mmu_cache must also + * guard the pte (somewhere else in the same mm) that we modify here. + * Therefore those configurations which might call adjust_pte (those + * without CONFIG_CPU_CACHE_VIPT) cannot support split page_table_lock. */ static int adjust_pte(struct vm_area_struct *vma, unsigned long address) { @@ -127,7 +132,7 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page); * 2. If we have multiple shared mappings of the same space in * an object, we need to deal with the cache aliasing issues. * - * Note that the page_table_lock will be held. + * Note that the pte lock will be held. */ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t pte) { diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 6fb1258df1b5..0f128c28fee4 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -75,7 +75,7 @@ remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size, pgprot = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_WRITE | flags); do { - pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); + pte_t * pte = pte_alloc_kernel(pmd, address); if (!pte) return -ENOMEM; remap_area_pte(pte, address, end - address, address + phys_addr, pgprot); @@ -97,7 +97,6 @@ remap_area_pages(unsigned long start, unsigned long phys_addr, phys_addr -= address; dir = pgd_offset(&init_mm, address); BUG_ON(address >= end); - spin_lock(&init_mm.page_table_lock); do { pmd_t *pmd = pmd_alloc(&init_mm, dir, address); if (!pmd) { @@ -114,7 +113,6 @@ remap_area_pages(unsigned long start, unsigned long phys_addr, dir++; } while (address && (address < end)); - spin_unlock(&init_mm.page_table_lock); flush_cache_vmap(start, end); return err; } diff --git a/arch/arm/mm/mm-armv.c b/arch/arm/mm/mm-armv.c index 61bc2fa0511e..1221fdde1769 100644 --- a/arch/arm/mm/mm-armv.c +++ b/arch/arm/mm/mm-armv.c @@ -180,11 +180,6 @@ pgd_t *get_pgd_slow(struct mm_struct *mm) if (!vectors_high()) { /* - * This lock is here just to satisfy pmd_alloc and pte_lock - */ - spin_lock(&mm->page_table_lock); - - /* * On ARM, first page must always be allocated since it * contains the machine vectors. */ @@ -201,23 +196,14 @@ pgd_t *get_pgd_slow(struct mm_struct *mm) set_pte(new_pte, *init_pte); pte_unmap_nested(init_pte); pte_unmap(new_pte); - - spin_unlock(&mm->page_table_lock); } return new_pgd; no_pte: - spin_unlock(&mm->page_table_lock); pmd_free(new_pmd); - free_pages((unsigned long)new_pgd, 2); - return NULL; - no_pmd: - spin_unlock(&mm->page_table_lock); free_pages((unsigned long)new_pgd, 2); - return NULL; - no_pgd: return NULL; } @@ -243,6 +229,7 @@ void free_pgd_slow(pgd_t *pgd) pte = pmd_page(*pmd); pmd_clear(pmd); dec_page_state(nr_page_table_pages); + pte_lock_deinit(pte); pte_free(pte); pmd_free(pmd); free: diff --git a/arch/arm/oprofile/backtrace.c b/arch/arm/oprofile/backtrace.c index df35c452a8bf..7c22c12618cc 100644 --- a/arch/arm/oprofile/backtrace.c +++ b/arch/arm/oprofile/backtrace.c @@ -49,42 +49,22 @@ static struct frame_tail* kernel_backtrace(struct frame_tail *tail) static struct frame_tail* user_backtrace(struct frame_tail *tail) { - struct frame_tail buftail; + struct frame_tail buftail[2]; - /* hardware pte might not be valid due to dirty/accessed bit emulation - * so we use copy_from_user and benefit from exception fixups */ - if (copy_from_user(&buftail, tail, sizeof(struct frame_tail))) + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) + return NULL; + if (__copy_from_user_inatomic(buftail, tail, sizeof(buftail))) return NULL; - oprofile_add_trace(buftail.lr); + oprofile_add_trace(buftail[0].lr); /* frame pointers should strictly progress back up the stack * (towards higher addresses) */ - if (tail >= buftail.fp) + if (tail >= buftail[0].fp) return NULL; - return buftail.fp-1; -} - -/* Compare two addresses and see if they're on the same page */ -#define CMP_ADDR_EQUAL(x,y,offset) ((((unsigned long) x) >> PAGE_SHIFT) \ - == ((((unsigned long) y) + offset) >> PAGE_SHIFT)) - -/* check that the page(s) containing the frame tail are present */ -static int pages_present(struct frame_tail *tail) -{ - struct mm_struct * mm = current->mm; - - if (!check_user_page_readable(mm, (unsigned long)tail)) - return 0; - - if (CMP_ADDR_EQUAL(tail, tail, 8)) - return 1; - - if (!check_user_page_readable(mm, ((unsigned long)tail) + 8)) - return 0; - - return 1; + return buftail[0].fp-1; } /* @@ -118,7 +98,6 @@ static int valid_kernel_stack(struct frame_tail *tail, struct pt_regs *regs) void arm_backtrace(struct pt_regs * const regs, unsigned int depth) { struct frame_tail *tail; - unsigned long last_address = 0; tail = ((struct frame_tail *) regs->ARM_fp) - 1; @@ -132,13 +111,6 @@ void arm_backtrace(struct pt_regs * const regs, unsigned int depth) return; } - while (depth-- && tail && !((unsigned long) tail & 3)) { - if ((!CMP_ADDR_EQUAL(last_address, tail, 0) - || !CMP_ADDR_EQUAL(last_address, tail, 8)) - && !pages_present(tail)) - return; - last_address = (unsigned long) tail; + while (depth-- && tail && !((unsigned long) tail & 3)) tail = user_backtrace(tail); - } } - diff --git a/arch/arm26/mm/memc.c b/arch/arm26/mm/memc.c index 8e8a2bb2487d..34def6397c3c 100644 --- a/arch/arm26/mm/memc.c +++ b/arch/arm26/mm/memc.c @@ -79,12 +79,6 @@ pgd_t *get_pgd_slow(struct mm_struct *mm) goto no_pgd; /* - * This lock is here just to satisfy pmd_alloc and pte_lock - * FIXME: I bet we could avoid taking it pretty much altogether - */ - spin_lock(&mm->page_table_lock); - - /* * On ARM, first page must always be allocated since it contains * the machine vectors. */ @@ -92,7 +86,7 @@ pgd_t *get_pgd_slow(struct mm_struct *mm) if (!new_pmd) goto no_pmd; - new_pte = pte_alloc_kernel(mm, new_pmd, 0); + new_pte = pte_alloc_map(mm, new_pmd, 0); if (!new_pte) goto no_pte; @@ -101,6 +95,7 @@ pgd_t *get_pgd_slow(struct mm_struct *mm) init_pte = pte_offset(init_pmd, 0); set_pte(new_pte, *init_pte); + pte_unmap(new_pte); /* * the page table entries are zeroed @@ -112,23 +107,14 @@ pgd_t *get_pgd_slow(struct mm_struct *mm) memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR, (PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t)); - spin_unlock(&mm->page_table_lock); - /* update MEMC tables */ cpu_memc_update_all(new_pgd); return new_pgd; no_pte: - spin_unlock(&mm->page_table_lock); pmd_free(new_pmd); - free_pgd_slow(new_pgd); - return NULL; - no_pmd: - spin_unlock(&mm->page_table_lock); free_pgd_slow(new_pgd); - return NULL; - no_pgd: return NULL; } diff --git a/arch/cris/arch-v32/mm/tlb.c b/arch/cris/arch-v32/mm/tlb.c index 8233406798d3..b08a28bb58ab 100644 --- a/arch/cris/arch-v32/mm/tlb.c +++ b/arch/cris/arch-v32/mm/tlb.c @@ -175,6 +175,8 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm) return 0; } +static DEFINE_SPINLOCK(mmu_context_lock); + /* Called in schedule() just before actually doing the switch_to. */ void switch_mm(struct mm_struct *prev, struct mm_struct *next, @@ -183,10 +185,10 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, int cpu = smp_processor_id(); /* Make sure there is a MMU context. */ - spin_lock(&next->page_table_lock); + spin_lock(&mmu_context_lock); get_mmu_context(next); cpu_set(cpu, next->cpu_vm_mask); - spin_unlock(&next->page_table_lock); + spin_unlock(&mmu_context_lock); /* * Remember the pgd for the fault handlers. Keep a seperate copy of it diff --git a/arch/cris/mm/ioremap.c b/arch/cris/mm/ioremap.c index ebba11e270fa..a92ac9877582 100644 --- a/arch/cris/mm/ioremap.c +++ b/arch/cris/mm/ioremap.c @@ -52,7 +52,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo if (address >= end) BUG(); do { - pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); + pte_t * pte = pte_alloc_kernel(pmd, address); if (!pte) return -ENOMEM; remap_area_pte(pte, address, end - address, address + phys_addr, prot); @@ -74,7 +74,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr, flush_cache_all(); if (address >= end) BUG(); - spin_lock(&init_mm.page_table_lock); do { pud_t *pud; pmd_t *pmd; @@ -94,7 +93,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr, address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (address && (address < end)); - spin_unlock(&init_mm.page_table_lock); flush_tlb_all(); return error; } diff --git a/arch/frv/mm/dma-alloc.c b/arch/frv/mm/dma-alloc.c index cfc4f97490c6..342823aad758 100644 --- a/arch/frv/mm/dma-alloc.c +++ b/arch/frv/mm/dma-alloc.c @@ -55,21 +55,18 @@ static int map_page(unsigned long va, unsigned long pa, pgprot_t prot) pte_t *pte; int err = -ENOMEM; - spin_lock(&init_mm.page_table_lock); - /* Use upper 10 bits of VA to index the first level map */ pge = pgd_offset_k(va); pue = pud_offset(pge, va); pme = pmd_offset(pue, va); /* Use middle 10 bits of VA to index the second-level map */ - pte = pte_alloc_kernel(&init_mm, pme, va); + pte = pte_alloc_kernel(pme, va); if (pte != 0) { err = 0; set_pte(pte, mk_pte_phys(pa & PAGE_MASK, prot)); } - spin_unlock(&init_mm.page_table_lock); return err; } diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c index 4eaec0f3525b..2c67dfe5a6b3 100644 --- a/arch/frv/mm/pgalloc.c +++ b/arch/frv/mm/pgalloc.c @@ -87,14 +87,14 @@ static inline void pgd_list_add(pgd_t *pgd) if (pgd_list) pgd_list->private = (unsigned long) &page->index; pgd_list = page; - page->private = (unsigned long) &pgd_list; + set_page_private(page, (unsigned long)&pgd_list); } static inline void pgd_list_del(pgd_t *pgd) { struct page *next, **pprev, *page = virt_to_page(pgd); next = (struct page *) page->index; - pprev = (struct page **) page->private; + pprev = (struct page **)page_private(page); *pprev = next; if (next) next->private = (unsigned long) pprev; diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c index 16b485009622..fc1993564f98 100644 --- a/arch/i386/kernel/vm86.c +++ b/arch/i386/kernel/vm86.c @@ -134,17 +134,16 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) return ret; } -static void mark_screen_rdonly(struct task_struct * tsk) +static void mark_screen_rdonly(struct mm_struct *mm) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; - pte_t *pte, *mapped; + pte_t *pte; + spinlock_t *ptl; int i; - preempt_disable(); - spin_lock(&tsk->mm->page_table_lock); - pgd = pgd_offset(tsk->mm, 0xA0000); + pgd = pgd_offset(mm, 0xA0000); if (pgd_none_or_clear_bad(pgd)) goto out; pud = pud_offset(pgd, 0xA0000); @@ -153,16 +152,14 @@ static void mark_screen_rdonly(struct task_struct * tsk) pmd = pmd_offset(pud, 0xA0000); if (pmd_none_or_clear_bad(pmd)) goto out; - pte = mapped = pte_offset_map(pmd, 0xA0000); + pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl); for (i = 0; i < 32; i++) { if (pte_present(*pte)) set_pte(pte, pte_wrprotect(*pte)); pte++; } - pte_unmap(mapped); + pte_unmap_unlock(pte, ptl); out: - spin_unlock(&tsk->mm->page_table_lock); - preempt_enable(); flush_tlb(); } @@ -306,7 +303,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk tsk->thread.screen_bitmap = info->screen_bitmap; if (info->flags & VM86_SCREEN_BITMAP) - mark_screen_rdonly(tsk); + mark_screen_rdonly(tsk->mm); __asm__ __volatile__( "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t" "movl %0,%%esp\n\t" diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 244d8ec66be2..c4af9638dbfa 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -98,7 +98,7 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, extern unsigned long find_max_low_pfn(void); extern void find_max_pfn(void); -extern void one_highpage_init(struct page *, int, int); +extern void add_one_highpage_init(struct page *, int, int); extern struct e820map e820; extern unsigned long init_pg_tables_end; @@ -427,7 +427,7 @@ void __init set_highmem_pages_init(int bad_ppro) if (!pfn_valid(node_pfn)) continue; page = pfn_to_page(node_pfn); - one_highpage_init(page, node_pfn, bad_ppro); + add_one_highpage_init(page, node_pfn, bad_ppro); } } totalram_pages += totalhigh_pages; diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 2ebaf75f732e..542d9298da5e 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -27,6 +27,7 @@ #include <linux/slab.h> #include <linux/proc_fs.h> #include <linux/efi.h> +#include <linux/memory_hotplug.h> #include <asm/processor.h> #include <asm/system.h> @@ -266,17 +267,46 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) pkmap_page_table = pte; } -void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) +void __devinit free_new_highpage(struct page *page) +{ + set_page_count(page, 1); + __free_page(page); + totalhigh_pages++; +} + +void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) { if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { ClearPageReserved(page); - set_page_count(page, 1); - __free_page(page); - totalhigh_pages++; + free_new_highpage(page); } else SetPageReserved(page); } +static int add_one_highpage_hotplug(struct page *page, unsigned long pfn) +{ + free_new_highpage(page); + totalram_pages++; +#ifdef CONFIG_FLATMEM + max_mapnr = max(pfn, max_mapnr); +#endif + num_physpages++; + return 0; +} + +/* + * Not currently handling the NUMA case. + * Assuming single node and all memory that + * has been added dynamically that would be + * onlined here is in HIGHMEM + */ +void online_page(struct page *page) +{ + ClearPageReserved(page); + add_one_highpage_hotplug(page, page_to_pfn(page)); +} + + #ifdef CONFIG_NUMA extern void set_highmem_pages_init(int); #else @@ -284,7 +314,7 @@ static void __init set_highmem_pages_init(int bad_ppro) { int pfn; for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) - one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); + add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); totalram_pages += totalhigh_pages; } #endif /* CONFIG_FLATMEM */ @@ -615,6 +645,28 @@ void __init mem_init(void) #endif } +/* + * this is for the non-NUMA, single node SMP system case. + * Specifically, in the case of x86, we will always add + * memory to the highmem for now. + */ +#ifndef CONFIG_NEED_MULTIPLE_NODES +int add_memory(u64 start, u64 size) +{ + struct pglist_data *pgdata = &contig_page_data; + struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1; + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + + return __add_pages(zone, start_pfn, nr_pages); +} + +int remove_memory(u64 start, u64 size) +{ + return -EINVAL; +} +#endif + kmem_cache_t *pgd_cache; kmem_cache_t *pmd_cache; diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c index f379b8d67558..5d09de8d1c6b 100644 --- a/arch/i386/mm/ioremap.c +++ b/arch/i386/mm/ioremap.c @@ -28,7 +28,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long pfn; pfn = phys_addr >> PAGE_SHIFT; - pte = pte_alloc_kernel(&init_mm, pmd, addr); + pte = pte_alloc_kernel(pmd, addr); if (!pte) return -ENOMEM; do { @@ -87,14 +87,12 @@ static int ioremap_page_range(unsigned long addr, flush_cache_all(); phys_addr -= addr; pgd = pgd_offset_k(addr); - spin_lock(&init_mm.page_table_lock); do { next = pgd_addr_end(addr, end); err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, flags); if (err) break; } while (pgd++, addr = next, addr != end); - spin_unlock(&init_mm.page_table_lock); flush_tlb_all(); return err; } diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index dcdce2c6c532..9db3242103be 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -31,11 +31,13 @@ void show_mem(void) pg_data_t *pgdat; unsigned long i; struct page_state ps; + unsigned long flags; printk(KERN_INFO "Mem-info:\n"); show_free_areas(); printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { + pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pgdat_page_nr(pgdat, i); total++; @@ -48,6 +50,7 @@ void show_mem(void) else if (page_count(page)) shared += page_count(page) - 1; } + pgdat_resize_unlock(pgdat, &flags); } printk(KERN_INFO "%d pages of RAM\n", total); printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); @@ -188,19 +191,19 @@ static inline void pgd_list_add(pgd_t *pgd) struct page *page = virt_to_page(pgd); page->index = (unsigned long)pgd_list; if (pgd_list) - pgd_list->private = (unsigned long)&page->index; + set_page_private(pgd_list, (unsigned long)&page->index); pgd_list = page; - page->private = (unsigned long)&pgd_list; + set_page_private(page, (unsigned long)&pgd_list); } static inline void pgd_list_del(pgd_t *pgd) { struct page *next, **pprev, *page = virt_to_page(pgd); next = (struct page *)page->index; - pprev = (struct page **)page->private; + pprev = (struct page **)page_private(page); *pprev = next; if (next) - next->private = (unsigned long)pprev; + set_page_private(next, (unsigned long)pprev); } void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) diff --git a/arch/i386/oprofile/backtrace.c b/arch/i386/oprofile/backtrace.c index 65dfd2edb671..21654be3f73f 100644 --- a/arch/i386/oprofile/backtrace.c +++ b/arch/i386/oprofile/backtrace.c @@ -12,6 +12,7 @@ #include <linux/sched.h> #include <linux/mm.h> #include <asm/ptrace.h> +#include <asm/uaccess.h> struct frame_head { struct frame_head * ebp; @@ -21,26 +22,22 @@ struct frame_head { static struct frame_head * dump_backtrace(struct frame_head * head) { - oprofile_add_trace(head->ret); + struct frame_head bufhead[2]; - /* frame pointers should strictly progress back up the stack - * (towards higher addresses) */ - if (head >= head->ebp) + /* Also check accessibility of one struct frame_head beyond */ + if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) + return NULL; + if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead))) return NULL; - return head->ebp; -} - -/* check that the page(s) containing the frame head are present */ -static int pages_present(struct frame_head * head) -{ - struct mm_struct * mm = current->mm; + oprofile_add_trace(bufhead[0].ret); - /* FIXME: only necessary once per page */ - if (!check_user_page_readable(mm, (unsigned long)head)) - return 0; + /* frame pointers should strictly progress back up the stack + * (towards higher addresses) */ + if (head >= bufhead[0].ebp) + return NULL; - return check_user_page_readable(mm, (unsigned long)(head + 1)); + return bufhead[0].ebp; } /* @@ -97,15 +94,6 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) return; } -#ifdef CONFIG_SMP - if (!spin_trylock(¤t->mm->page_table_lock)) - return; -#endif - - while (depth-- && head && pages_present(head)) + while (depth-- && head) head = dump_backtrace(head); - -#ifdef CONFIG_SMP - spin_unlock(¤t->mm->page_table_lock); -#endif } diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index d71731ee5b61..f7dfc107cb7b 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2352,7 +2352,8 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon insert_vm_struct(mm, vma); mm->total_vm += size >> PAGE_SHIFT; - vm_stat_account(vma); + vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, + vma_pages(vma)); up_write(&task->mm->mmap_sem); /* diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index a3788fb84809..a88cdb7232f8 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -555,9 +555,13 @@ void show_mem(void) show_free_areas(); printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { - unsigned long present = pgdat->node_present_pages; + unsigned long present; + unsigned long flags; int shared = 0, cached = 0, reserved = 0; + printk("Node ID: %d\n", pgdat->node_id); + pgdat_resize_lock(pgdat, &flags); + present = pgdat->node_present_pages; for(i = 0; i < pgdat->node_spanned_pages; i++) { struct page *page; if (pfn_valid(pgdat->node_start_pfn + i)) @@ -571,6 +575,7 @@ void show_mem(void) else if (page_count(page)) shared += page_count(page)-1; } + pgdat_resize_unlock(pgdat, &flags); total_present += present; total_reserved += reserved; total_cached += cached; diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 3c32af910d60..af7eb087dca7 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -20,32 +20,6 @@ extern void die (char *, struct pt_regs *, long); /* - * This routine is analogous to expand_stack() but instead grows the - * register backing store (which grows towards higher addresses). - * Since the register backing store is access sequentially, we - * disallow growing the RBS by more than a page at a time. Note that - * the VM_GROWSUP flag can be set on any VM area but that's fine - * because the total process size is still limited by RLIMIT_STACK and - * RLIMIT_AS. - */ -static inline long -expand_backing_store (struct vm_area_struct *vma, unsigned long address) -{ - unsigned long grow; - - grow = PAGE_SIZE >> PAGE_SHIFT; - if (address - vma->vm_start > current->signal->rlim[RLIMIT_STACK].rlim_cur - || (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->signal->rlim[RLIMIT_AS].rlim_cur)) - return -ENOMEM; - vma->vm_end += PAGE_SIZE; - vma->vm_mm->total_vm += grow; - if (vma->vm_flags & VM_LOCKED) - vma->vm_mm->locked_vm += grow; - __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow); - return 0; -} - -/* * Return TRUE if ADDRESS points at a page in the kernel's mapped segment * (inside region 5, on ia64) and that page is present. */ @@ -185,7 +159,13 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start) || REGION_OFFSET(address) >= RGN_MAP_LIMIT) goto bad_area; - if (expand_backing_store(vma, address)) + /* + * Since the register backing store is accessed sequentially, + * we disallow growing it by more than a page at a time. + */ + if (address > vma->vm_end + PAGE_SIZE - sizeof(long)) + goto bad_area; + if (expand_upwards(vma, address)) goto bad_area; } goto good_area; diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 98246acd4991..e3215ba64ffd 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -158,7 +158,7 @@ ia64_init_addr_space (void) vma->vm_start = current->thread.rbs_bot & PAGE_MASK; vma->vm_end = vma->vm_start + PAGE_SIZE; vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7]; - vma->vm_flags = VM_DATA_DEFAULT_FLAGS | VM_GROWSUP; + vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT; down_write(¤t->mm->mmap_sem); if (insert_vm_struct(current->mm, vma)) { up_write(¤t->mm->mmap_sem); @@ -275,26 +275,21 @@ put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot) pgd = pgd_offset_k(address); /* note: this is NOT pgd_offset()! */ - spin_lock(&init_mm.page_table_lock); { pud = pud_alloc(&init_mm, pgd, address); if (!pud) goto out; - pmd = pmd_alloc(&init_mm, pud, address); if (!pmd) goto out; - pte = pte_alloc_map(&init_mm, pmd, address); + pte = pte_alloc_kernel(pmd, address); if (!pte) goto out; - if (!pte_none(*pte)) { - pte_unmap(pte); + if (!pte_none(*pte)) goto out; - } set_pte(pte, mk_pte(page, pgprot)); - pte_unmap(pte); } - out: spin_unlock(&init_mm.page_table_lock); + out: /* no need for flush_tlb */ return page; } diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index c93e0f2b5fea..c79a9b96d02b 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c @@ -158,10 +158,12 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long # ifdef CONFIG_SMP platform_global_tlb_purge(mm, start, end, nbits); # else + preempt_disable(); do { ia64_ptcl(start, (nbits<<2)); start += (1UL << nbits); } while (start < end); + preempt_enable(); # endif ia64_srlz_i(); /* srlz.i implies srlz.d */ diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c index d9a40b1fe8ba..6facf15b04f3 100644 --- a/arch/m32r/mm/init.c +++ b/arch/m32r/mm/init.c @@ -48,6 +48,8 @@ void show_mem(void) show_free_areas(); printk("Free swap: %6ldkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { + unsigned long flags; + pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pgdat_page_nr(pgdat, i); total++; @@ -60,6 +62,7 @@ void show_mem(void) else if (page_count(page)) shared += page_count(page) - 1; } + pgdat_resize_unlock(pgdat, &flags); } printk("%d pages of RAM\n", total); printk("%d pages of HIGHMEM\n",highmem); @@ -150,10 +153,14 @@ int __init reservedpages_count(void) int reservedpages, nid, i; reservedpages = 0; - for_each_online_node(nid) + for_each_online_node(nid) { + unsigned long flags; + pgdat_resize_lock(NODE_DATA(nid), &flags); for (i = 0 ; i < MAX_LOW_PFN(nid) - START_PFN(nid) ; i++) if (PageReserved(nid_page_nr(nid, i))) reservedpages++; + pgdat_resize_unlock(NODE_DATA(nid), &flags); + } return reservedpages; } diff --git a/arch/m32r/mm/ioremap.c b/arch/m32r/mm/ioremap.c index 70c59055c19c..a151849a605e 100644 --- a/arch/m32r/mm/ioremap.c +++ b/arch/m32r/mm/ioremap.c @@ -67,7 +67,7 @@ remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size, if (address >= end) BUG(); do { - pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); + pte_t * pte = pte_alloc_kernel(pmd, address); if (!pte) return -ENOMEM; remap_area_pte(pte, address, end - address, address + phys_addr, flags); @@ -90,7 +90,6 @@ remap_area_pages(unsigned long address, unsigned long phys_addr, flush_cache_all(); if (address >= end) BUG(); - spin_lock(&init_mm.page_table_lock); do { pmd_t *pmd; pmd = pmd_alloc(&init_mm, dir, address); @@ -104,7 +103,6 @@ remap_area_pages(unsigned long address, unsigned long phys_addr, address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (address && (address < end)); - spin_unlock(&init_mm.page_table_lock); flush_tlb_all(); return error; } diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index ba960bbc8e6d..1dd5d18b2201 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -388,33 +388,11 @@ config AMIGA_PCMCIA Include support in the kernel for pcmcia on Amiga 1200 and Amiga 600. If you intend to use pcmcia cards say Y; otherwise say N. -config STRAM_SWAP - bool "Support for ST-RAM as swap space" - depends on ATARI && BROKEN - ---help--- - Some Atari 68k machines (including the 520STF and 1020STE) divide - their addressable memory into ST and TT sections. The TT section - (up to 512MB) is the main memory; the ST section (up to 4MB) is - accessible to the built-in graphics board, runs slower, and is - present mainly for backward compatibility with older machines. - - This enables support for using (parts of) ST-RAM as swap space, - instead of as normal system memory. This can first enhance system - performance if you have lots of alternate RAM (compared to the size - of ST-RAM), because executable code always will reside in faster - memory. ST-RAM will remain as ultra-fast swap space. On the other - hand, it allows much improved dynamic allocations of ST-RAM buffers - for device driver modules (e.g. floppy, ACSI, SLM printer, DMA - sound). The probability that such allocations at module load time - fail is drastically reduced. - config STRAM_PROC bool "ST-RAM statistics in /proc" depends on ATARI help - Say Y here to report ST-RAM usage statistics in /proc/stram. See - the help for CONFIG_STRAM_SWAP for discussion of ST-RAM and its - uses. + Say Y here to report ST-RAM usage statistics in /proc/stram. config HEARTBEAT bool "Use power LED as a heartbeat" if AMIGA || APOLLO || ATARI || MAC ||Q40 diff --git a/arch/m68k/atari/stram.c b/arch/m68k/atari/stram.c index 5a3c106b40c8..22e0481a5f7b 100644 --- a/arch/m68k/atari/stram.c +++ b/arch/m68k/atari/stram.c @@ -15,11 +15,9 @@ #include <linux/kdev_t.h> #include <linux/major.h> #include <linux/init.h> -#include <linux/swap.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/pagemap.h> -#include <linux/shm.h> #include <linux/bootmem.h> #include <linux/mount.h> #include <linux/blkdev.h> @@ -33,8 +31,6 @@ #include <asm/io.h> #include <asm/semaphore.h> -#include <linux/swapops.h> - #undef DEBUG #ifdef DEBUG @@ -49,8 +45,7 @@ #include <linux/proc_fs.h> #endif -/* Pre-swapping comments: - * +/* * ++roman: * * New version of ST-Ram buffer allocation. Instead of using the @@ -75,76 +70,6 @@ * */ -/* - * New Nov 1997: Use ST-RAM as swap space! - * - * In the past, there were often problems with modules that require ST-RAM - * buffers. Such drivers have to use __get_dma_pages(), which unfortunately - * often isn't very successful in allocating more than 1 page :-( [1] The net - * result was that most of the time you couldn't insmod such modules (ataflop, - * ACSI, SCSI on Falcon, Atari internal framebuffer, not to speak of acsi_slm, - * which needs a 1 MB buffer... :-). - * - * To overcome this limitation, ST-RAM can now be turned into a very - * high-speed swap space. If a request for an ST-RAM buffer comes, the kernel - * now tries to unswap some pages on that swap device to make some free (and - * contiguous) space. This works much better in comparison to - * __get_dma_pages(), since used swap pages can be selectively freed by either - * moving them to somewhere else in swap space, or by reading them back into - * system memory. Ok, there operation of unswapping isn't really cheap (for - * each page, one has to go through the page tables of all processes), but it - * doesn't happen that often (only when allocation ST-RAM, i.e. when loading a - * module that needs ST-RAM). But it at least makes it possible to load such - * modules! - * - * It could also be that overall system performance increases a bit due to - * ST-RAM swapping, since slow ST-RAM isn't used anymore for holding data or - * executing code in. It's then just a (very fast, compared to disk) back - * storage for not-so-often needed data. (But this effect must be compared - * with the loss of total memory...) Don't know if the effect is already - * visible on a TT, where the speed difference between ST- and TT-RAM isn't - * that dramatic, but it should on machines where TT-RAM is really much faster - * (e.g. Afterburner). - * - * [1]: __get_free_pages() does a fine job if you only want one page, but if - * you want more (contiguous) pages, it can give you such a block only if - * there's already a free one. The algorithm can't try to free buffers or swap - * out something in order to make more free space, since all that page-freeing - * mechanisms work "target-less", i.e. they just free something, but not in a - * specific place. I.e., __get_free_pages() can't do anything to free - * *adjacent* pages :-( This situation becomes even worse for DMA memory, - * since the freeing algorithms are also blind to DMA capability of pages. - */ - -/* 1998-10-20: ++andreas - unswap_by_move disabled because it does not handle swapped shm pages. -*/ - -/* 2000-05-01: ++andreas - Integrated with bootmem. Remove all traces of unswap_by_move. -*/ - -#ifdef CONFIG_STRAM_SWAP -#define ALIGN_IF_SWAP(x) PAGE_ALIGN(x) -#else -#define ALIGN_IF_SWAP(x) (x) -#endif - -/* get index of swap page at address 'addr' */ -#define SWAP_NR(addr) (((addr) - swap_start) >> PAGE_SHIFT) - -/* get address of swap page #'nr' */ -#define SWAP_ADDR(nr) (swap_start + ((nr) << PAGE_SHIFT)) - -/* get number of pages for 'n' bytes (already page-aligned) */ -#define N_PAGES(n) ((n) >> PAGE_SHIFT) - -/* The following two numbers define the maximum fraction of ST-RAM in total - * memory, below that the kernel would automatically use ST-RAM as swap - * space. This decision can be overridden with stram_swap= */ -#define MAX_STRAM_FRACTION_NOM 1 -#define MAX_STRAM_FRACTION_DENOM 3 - /* Start and end (virtual) of ST-RAM */ static void *stram_start, *stram_end; @@ -164,10 +89,9 @@ typedef struct stram_block { } BLOCK; /* values for flags field */ -#define BLOCK_FREE 0x01 /* free structure in the BLOCKs pool */ +#define BLOCK_FREE 0x01 /* free structure in the BLOCKs pool */ #define BLOCK_KMALLOCED 0x02 /* structure allocated by kmalloc() */ -#define BLOCK_GFP 0x08 /* block allocated with __get_dma_pages() */ -#define BLOCK_INSWAP 0x10 /* block allocated in swap space */ +#define BLOCK_GFP 0x08 /* block allocated with __get_dma_pages() */ /* list of allocated blocks */ static BLOCK *alloc_list; @@ -179,60 +103,8 @@ static BLOCK *alloc_list; #define N_STATIC_BLOCKS 20 static BLOCK static_blocks[N_STATIC_BLOCKS]; -#ifdef CONFIG_STRAM_SWAP -/* max. number of bytes to use for swapping - * 0 = no ST-RAM swapping - * -1 = do swapping (to whole ST-RAM) if it's less than MAX_STRAM_FRACTION of - * total memory - */ -static int max_swap_size = -1; - -/* start and end of swapping area */ -static void *swap_start, *swap_end; - -/* The ST-RAM's swap info structure */ -static struct swap_info_struct *stram_swap_info; - -/* The ST-RAM's swap type */ -static int stram_swap_type; - -/* Semaphore for get_stram_region. */ -static DECLARE_MUTEX(stram_swap_sem); - -/* major and minor device number of the ST-RAM device; for the major, we use - * the same as Amiga z2ram, which is really similar and impossible on Atari, - * and for the minor a relatively odd number to avoid the user creating and - * using that device. */ -#define STRAM_MAJOR Z2RAM_MAJOR -#define STRAM_MINOR 13 - -/* Some impossible pointer value */ -#define MAGIC_FILE_P (struct file *)0xffffdead - -#ifdef DO_PROC -static unsigned stat_swap_read; -static unsigned stat_swap_write; -static unsigned stat_swap_force; -#endif /* DO_PROC */ - -#endif /* CONFIG_STRAM_SWAP */ - /***************************** Prototypes *****************************/ -#ifdef CONFIG_STRAM_SWAP -static int swap_init(void *start_mem, void *swap_data); -static void *get_stram_region( unsigned long n_pages ); -static void free_stram_region( unsigned long offset, unsigned long n_pages - ); -static int in_some_region(void *addr); -static unsigned long find_free_region( unsigned long n_pages, unsigned long - *total_free, unsigned long - *region_free ); -static void do_stram_request(request_queue_t *); -static int stram_open( struct inode *inode, struct file *filp ); -static int stram_release( struct inode *inode, struct file *filp ); -static void reserve_region(void *start, void *end); -#endif static BLOCK *add_region( void *addr, unsigned long size ); static BLOCK *find_region( void *addr ); static int remove_region( BLOCK *block ); @@ -279,84 +151,11 @@ void __init atari_stram_init(void) */ void __init atari_stram_reserve_pages(void *start_mem) { -#ifdef CONFIG_STRAM_SWAP - /* if max_swap_size is negative (i.e. no stram_swap= option given), - * determine at run time whether to use ST-RAM swapping */ - if (max_swap_size < 0) - /* Use swapping if ST-RAM doesn't make up more than MAX_STRAM_FRACTION - * of total memory. In that case, the max. size is set to 16 MB, - * because ST-RAM can never be bigger than that. - * Also, never use swapping on a Hades, there's no separate ST-RAM in - * that machine. */ - max_swap_size = - (!MACH_IS_HADES && - (N_PAGES(stram_end-stram_start)*MAX_STRAM_FRACTION_DENOM <= - ((unsigned long)high_memory>>PAGE_SHIFT)*MAX_STRAM_FRACTION_NOM)) ? 16*1024*1024 : 0; - DPRINTK( "atari_stram_reserve_pages: max_swap_size = %d\n", max_swap_size ); -#endif - /* always reserve first page of ST-RAM, the first 2 kB are * supervisor-only! */ if (!kernel_in_stram) reserve_bootmem (0, PAGE_SIZE); -#ifdef CONFIG_STRAM_SWAP - { - void *swap_data; - - start_mem = (void *) PAGE_ALIGN ((unsigned long) start_mem); - /* determine first page to use as swap: if the kernel is - in TT-RAM, this is the first page of (usable) ST-RAM; - otherwise just use the end of kernel data (= start_mem) */ - swap_start = !kernel_in_stram ? stram_start + PAGE_SIZE : start_mem; - /* decrement by one page, rest of kernel assumes that first swap page - * is always reserved and maybe doesn't handle swp_entry == 0 - * correctly */ - swap_start -= PAGE_SIZE; - swap_end = stram_end; - if (swap_end-swap_start > max_swap_size) - swap_end = swap_start + max_swap_size; - DPRINTK( "atari_stram_reserve_pages: swapping enabled; " - "swap=%p-%p\n", swap_start, swap_end); - - /* reserve some amount of memory for maintainance of - * swapping itself: one page for each 2048 (PAGE_SIZE/2) - * swap pages. (2 bytes for each page) */ - swap_data = start_mem; - start_mem += ((SWAP_NR(swap_end) + PAGE_SIZE/2 - 1) - >> (PAGE_SHIFT-1)) << PAGE_SHIFT; - /* correct swap_start if necessary */ - if (swap_start + PAGE_SIZE == swap_data) - swap_start = start_mem - PAGE_SIZE; - - if (!swap_init( start_mem, swap_data )) { - printk( KERN_ERR "ST-RAM swap space initialization failed\n" ); - max_swap_size = 0; - return; - } - /* reserve region for swapping meta-data */ - reserve_region(swap_data, start_mem); - /* reserve swapping area itself */ - reserve_region(swap_start + PAGE_SIZE, swap_end); - - /* - * If the whole ST-RAM is used for swapping, there are no allocatable - * dma pages left. But unfortunately, some shared parts of the kernel - * (particularly the SCSI mid-level) call __get_dma_pages() - * unconditionally :-( These calls then fail, and scsi.c even doesn't - * check for NULL return values and just crashes. The quick fix for - * this (instead of doing much clean up work in the SCSI code) is to - * pretend all pages are DMA-able by setting mach_max_dma_address to - * ULONG_MAX. This doesn't change any functionality so far, since - * get_dma_pages() shouldn't be used on Atari anyway anymore (better - * use atari_stram_alloc()), and the Atari SCSI drivers don't need DMA - * memory. But unfortunately there's now no kind of warning (even not - * a NULL return value) if you use get_dma_pages() nevertheless :-( - * You just will get non-DMA-able memory... - */ - mach_max_dma_address = 0xffffffff; - } -#endif } void atari_stram_mem_init_hook (void) @@ -367,7 +166,6 @@ void atari_stram_mem_init_hook (void) /* * This is main public interface: somehow allocate a ST-RAM block - * There are three strategies: * * - If we're before mem_init(), we have to make a static allocation. The * region is taken in the kernel data area (if the kernel is in ST-RAM) or @@ -375,14 +173,9 @@ void atari_stram_mem_init_hook (void) * rsvd_stram_* region. The ST-RAM is somewhere in the middle of kernel * address space in the latter case. * - * - If mem_init() already has been called and ST-RAM swapping is enabled, - * try to get the memory from the (pseudo) swap-space, either free already - * or by moving some other pages out of the swap. - * - * - If mem_init() already has been called, and ST-RAM swapping is not - * enabled, the only possibility is to try with __get_dma_pages(). This has - * the disadvantage that it's very hard to get more than 1 page, and it is - * likely to fail :-( + * - If mem_init() already has been called, try with __get_dma_pages(). + * This has the disadvantage that it's very hard to get more than 1 page, + * and it is likely to fail :-( * */ void *atari_stram_alloc(long size, const char *owner) @@ -393,27 +186,13 @@ void *atari_stram_alloc(long size, const char *owner) DPRINTK("atari_stram_alloc(size=%08lx,owner=%s)\n", size, owner); - size = ALIGN_IF_SWAP(size); - DPRINTK( "atari_stram_alloc: rounded size = %08lx\n", size ); -#ifdef CONFIG_STRAM_SWAP - if (max_swap_size) { - /* If swapping is active: make some free space in the swap - "device". */ - DPRINTK( "atari_stram_alloc: after mem_init, swapping ok, " - "calling get_region\n" ); - addr = get_stram_region( N_PAGES(size) ); - flags = BLOCK_INSWAP; - } - else -#endif if (!mem_init_done) return alloc_bootmem_low(size); else { - /* After mem_init() and no swapping: can only resort to - * __get_dma_pages() */ + /* After mem_init(): can only resort to __get_dma_pages() */ addr = (void *)__get_dma_pages(GFP_KERNEL, get_order(size)); flags = BLOCK_GFP; - DPRINTK( "atari_stram_alloc: after mem_init, swapping off, " + DPRINTK( "atari_stram_alloc: after mem_init, " "get_pages=%p\n", addr ); } @@ -422,12 +201,7 @@ void *atari_stram_alloc(long size, const char *owner) /* out of memory for BLOCK structure :-( */ DPRINTK( "atari_stram_alloc: out of mem for BLOCK -- " "freeing again\n" ); -#ifdef CONFIG_STRAM_SWAP - if (flags == BLOCK_INSWAP) - free_stram_region( SWAP_NR(addr), N_PAGES(size) ); - else -#endif - free_pages((unsigned long)addr, get_order(size)); + free_pages((unsigned long)addr, get_order(size)); return( NULL ); } block->owner = owner; @@ -451,25 +225,12 @@ void atari_stram_free( void *addr ) DPRINTK( "atari_stram_free: found block (%p): size=%08lx, owner=%s, " "flags=%02x\n", block, block->size, block->owner, block->flags ); -#ifdef CONFIG_STRAM_SWAP - if (!max_swap_size) { -#endif - if (block->flags & BLOCK_GFP) { - DPRINTK("atari_stram_free: is kmalloced, order_size=%d\n", - get_order(block->size)); - free_pages((unsigned long)addr, get_order(block->size)); - } - else - goto fail; -#ifdef CONFIG_STRAM_SWAP - } - else if (block->flags & BLOCK_INSWAP) { - DPRINTK( "atari_stram_free: is swap-alloced\n" ); - free_stram_region( SWAP_NR(block->start), N_PAGES(block->size) ); - } - else + if (!(block->flags & BLOCK_GFP)) goto fail; -#endif + + DPRINTK("atari_stram_free: is kmalloced, order_size=%d\n", + get_order(block->size)); + free_pages((unsigned long)addr, get_order(block->size)); remove_region( block ); return; @@ -478,612 +239,6 @@ void atari_stram_free( void *addr ) "(called from %p)\n", addr, __builtin_return_address(0) ); } - -#ifdef CONFIG_STRAM_SWAP - - -/* ------------------------------------------------------------------------ */ -/* Main Swapping Functions */ -/* ------------------------------------------------------------------------ */ - - -/* - * Initialize ST-RAM swap device - * (lots copied and modified from sys_swapon() in mm/swapfile.c) - */ -static int __init swap_init(void *start_mem, void *swap_data) -{ - static struct dentry fake_dentry; - static struct vfsmount fake_vfsmnt; - struct swap_info_struct *p; - struct inode swap_inode; - unsigned int type; - void *addr; - int i, j, k, prev; - - DPRINTK("swap_init(start_mem=%p, swap_data=%p)\n", - start_mem, swap_data); - - /* need at least one page for swapping to (and this also isn't very - * much... :-) */ - if (swap_end - swap_start < 2*PAGE_SIZE) { - printk( KERN_WARNING "stram_swap_init: swap space too small\n" ); - return( 0 ); - } - - /* find free slot in swap_info */ - for( p = swap_info, type = 0; type < nr_swapfiles; type++, p++ ) - if (!(p->flags & SWP_USED)) - break; - if (type >= MAX_SWAPFILES) { - printk( KERN_WARNING "stram_swap_init: max. number of " - "swap devices exhausted\n" ); - return( 0 ); - } - if (type >= nr_swapfiles) - nr_swapfiles = type+1; - - stram_swap_info = p; - stram_swap_type = type; - - /* fake some dir cache entries to give us some name in /dev/swaps */ - fake_dentry.d_parent = &fake_dentry; - fake_dentry.d_name.name = "stram (internal)"; - fake_dentry.d_name.len = 16; - fake_vfsmnt.mnt_parent = &fake_vfsmnt; - - p->flags = SWP_USED; - p->swap_file = &fake_dentry; - p->swap_vfsmnt = &fake_vfsmnt; - p->swap_map = swap_data; - p->cluster_nr = 0; - p->next = -1; - p->prio = 0x7ff0; /* a rather high priority, but not the higest - * to give the user a chance to override */ - - /* call stram_open() directly, avoids at least the overhead in - * constructing a dummy file structure... */ - swap_inode.i_rdev = MKDEV( STRAM_MAJOR, STRAM_MINOR ); - stram_open( &swap_inode, MAGIC_FILE_P ); - p->max = SWAP_NR(swap_end); - - /* initialize swap_map: set regions that are already allocated or belong - * to kernel data space to SWAP_MAP_BAD, otherwise to free */ - j = 0; /* # of free pages */ - k = 0; /* # of already allocated pages (from pre-mem_init stram_alloc()) */ - p->lowest_bit = 0; - p->highest_bit = 0; - for( i = 1, addr = SWAP_ADDR(1); i < p->max; - i++, addr += PAGE_SIZE ) { - if (in_some_region( addr )) { - p->swap_map[i] = SWAP_MAP_BAD; - ++k; - } - else if (kernel_in_stram && addr < start_mem ) { - p->swap_map[i] = SWAP_MAP_BAD; - } - else { - p->swap_map[i] = 0; - ++j; - if (!p->lowest_bit) p->lowest_bit = i; - p->highest_bit = i; - } - } - /* first page always reserved (and doesn't really belong to swap space) */ - p->swap_map[0] = SWAP_MAP_BAD; - - /* now swapping to this device ok */ - p->pages = j + k; - swap_list_lock(); - nr_swap_pages += j; - p->flags = SWP_WRITEOK; - - /* insert swap space into swap_list */ - prev = -1; - for (i = swap_list.head; i >= 0; i = swap_info[i].next) { - if (p->prio >= swap_info[i].prio) { - break; - } - prev = i; - } - p->next = i; - if (prev < 0) { - swap_list.head = swap_list.next = p - swap_info; - } else { - swap_info[prev].next = p - swap_info; - } - swap_list_unlock(); - - printk( KERN_INFO "Using %dk (%d pages) of ST-RAM as swap space.\n", - p->pages << 2, p->pages ); - return( 1 ); -} - - -/* - * The swap entry has been read in advance, and we return 1 to indicate - * that the page has been used or is no longer needed. - * - * Always set the resulting pte to be nowrite (the same as COW pages - * after one process has exited). We don't know just how many PTEs will - * share this swap entry, so be cautious and let do_wp_page work out - * what to do if a write is requested later. - */ -static inline void unswap_pte(struct vm_area_struct * vma, unsigned long - address, pte_t *dir, swp_entry_t entry, - struct page *page) -{ - pte_t pte = *dir; - - if (pte_none(pte)) - return; - if (pte_present(pte)) { - /* If this entry is swap-cached, then page must already - hold the right address for any copies in physical - memory */ - if (pte_page(pte) != page) - return; - /* We will be removing the swap cache in a moment, so... */ - set_pte(dir, pte_mkdirty(pte)); - return; - } - if (pte_val(pte) != entry.val) - return; - - DPRINTK("unswap_pte: replacing entry %08lx by new page %p", - entry.val, page); - set_pte(dir, pte_mkdirty(mk_pte(page, vma->vm_page_prot))); - swap_free(entry); - get_page(page); - inc_mm_counter(vma->vm_mm, rss); -} - -static inline void unswap_pmd(struct vm_area_struct * vma, pmd_t *dir, - unsigned long address, unsigned long size, - unsigned long offset, swp_entry_t entry, - struct page *page) -{ - pte_t * pte; - unsigned long end; - - if (pmd_none(*dir)) - return; - if (pmd_bad(*dir)) { - pmd_ERROR(*dir); - pmd_clear(dir); - return; - } - pte = pte_offset_kernel(dir, address); - offset += address & PMD_MASK; - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - do { - unswap_pte(vma, offset+address-vma->vm_start, pte, entry, page); - address += PAGE_SIZE; - pte++; - } while (address < end); -} - -static inline void unswap_pgd(struct vm_area_struct * vma, pgd_t *dir, - unsigned long address, unsigned long size, - swp_entry_t entry, struct page *page) -{ - pmd_t * pmd; - unsigned long offset, end; - - if (pgd_none(*dir)) - return; - if (pgd_bad(*dir)) { - pgd_ERROR(*dir); - pgd_clear(dir); - return; - } - pmd = pmd_offset(dir, address); - offset = address & PGDIR_MASK; - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - do { - unswap_pmd(vma, pmd, address, end - address, offset, entry, - page); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address < end); -} - -static void unswap_vma(struct vm_area_struct * vma, pgd_t *pgdir, - swp_entry_t entry, struct page *page) -{ - unsigned long start = vma->vm_start, end = vma->vm_end; - - do { - unswap_pgd(vma, pgdir, start, end - start, entry, page); - start = (start + PGDIR_SIZE) & PGDIR_MASK; - pgdir++; - } while (start < end); -} - -static void unswap_process(struct mm_struct * mm, swp_entry_t entry, - struct page *page) -{ - struct vm_area_struct* vma; - - /* - * Go through process' page directory. - */ - if (!mm) - return; - for (vma = mm->mmap; vma; vma = vma->vm_next) { - pgd_t * pgd = pgd_offset(mm, vma->vm_start); - unswap_vma(vma, pgd, entry, page); - } -} - - -static int unswap_by_read(unsigned short *map, unsigned long max, - unsigned long start, unsigned long n_pages) -{ - struct task_struct *p; - struct page *page; - swp_entry_t entry; - unsigned long i; - - DPRINTK( "unswapping %lu..%lu by reading in\n", - start, start+n_pages-1 ); - - for( i = start; i < start+n_pages; ++i ) { - if (map[i] == SWAP_MAP_BAD) { - printk( KERN_ERR "get_stram_region: page %lu already " - "reserved??\n", i ); - continue; - } - - if (map[i]) { - entry = swp_entry(stram_swap_type, i); - DPRINTK("unswap: map[i=%lu]=%u nr_swap=%ld\n", - i, map[i], nr_swap_pages); - - swap_device_lock(stram_swap_info); - map[i]++; - swap_device_unlock(stram_swap_info); - /* Get a page for the entry, using the existing - swap cache page if there is one. Otherwise, - get a clean page and read the swap into it. */ - page = read_swap_cache_async(entry, NULL, 0); - if (!page) { - swap_free(entry); - return -ENOMEM; - } - read_lock(&tasklist_lock); - for_each_process(p) - unswap_process(p->mm, entry, page); - read_unlock(&tasklist_lock); - shmem_unuse(entry, page); - /* Now get rid of the extra reference to the - temporary page we've been using. */ - if (PageSwapCache(page)) - delete_from_swap_cache(page); - __free_page(page); - #ifdef DO_PROC - stat_swap_force++; - #endif - } - - DPRINTK( "unswap: map[i=%lu]=%u nr_swap=%ld\n", - i, map[i], nr_swap_pages ); - swap_list_lock(); - swap_device_lock(stram_swap_info); - map[i] = SWAP_MAP_BAD; - if (stram_swap_info->lowest_bit == i) - stram_swap_info->lowest_bit++; - if (stram_swap_info->highest_bit == i) - stram_swap_info->highest_bit--; - --nr_swap_pages; - swap_device_unlock(stram_swap_info); - swap_list_unlock(); - } - - return 0; -} - -/* - * reserve a region in ST-RAM swap space for an allocation - */ -static void *get_stram_region( unsigned long n_pages ) -{ - unsigned short *map = stram_swap_info->swap_map; - unsigned long max = stram_swap_info->max; - unsigned long start, total_free, region_free; - int err; - void *ret = NULL; - - DPRINTK( "get_stram_region(n_pages=%lu)\n", n_pages ); - - down(&stram_swap_sem); - - /* disallow writing to the swap device now */ - stram_swap_info->flags = SWP_USED; - - /* find a region of n_pages pages in the swap space including as much free - * pages as possible (and excluding any already-reserved pages). */ - if (!(start = find_free_region( n_pages, &total_free, ®ion_free ))) - goto end; - DPRINTK( "get_stram_region: region starts at %lu, has %lu free pages\n", - start, region_free ); - - err = unswap_by_read(map, max, start, n_pages); - if (err) - goto end; - - ret = SWAP_ADDR(start); - end: - /* allow using swap device again */ - stram_swap_info->flags = SWP_WRITEOK; - up(&stram_swap_sem); - DPRINTK( "get_stram_region: returning %p\n", ret ); - return( ret ); -} - - -/* - * free a reserved region in ST-RAM swap space - */ -static void free_stram_region( unsigned long offset, unsigned long n_pages ) -{ - unsigned short *map = stram_swap_info->swap_map; - - DPRINTK( "free_stram_region(offset=%lu,n_pages=%lu)\n", offset, n_pages ); - - if (offset < 1 || offset + n_pages > stram_swap_info->max) { - printk( KERN_ERR "free_stram_region: Trying to free non-ST-RAM\n" ); - return; - } - - swap_list_lock(); - swap_device_lock(stram_swap_info); - /* un-reserve the freed pages */ - for( ; n_pages > 0; ++offset, --n_pages ) { - if (map[offset] != SWAP_MAP_BAD) - printk( KERN_ERR "free_stram_region: Swap page %lu was not " - "reserved\n", offset ); - map[offset] = 0; - } - - /* update swapping meta-data */ - if (offset < stram_swap_info->lowest_bit) - stram_swap_info->lowest_bit = offset; - if (offset+n_pages-1 > stram_swap_info->highest_bit) - stram_swap_info->highest_bit = offset+n_pages-1; - if (stram_swap_info->prio > swap_info[swap_list.next].prio) - swap_list.next = swap_list.head; - nr_swap_pages += n_pages; - swap_device_unlock(stram_swap_info); - swap_list_unlock(); -} - - -/* ------------------------------------------------------------------------ */ -/* Utility Functions for Swapping */ -/* ------------------------------------------------------------------------ */ - - -/* is addr in some of the allocated regions? */ -static int in_some_region(void *addr) -{ - BLOCK *p; - - for( p = alloc_list; p; p = p->next ) { - if (p->start <= addr && addr < p->start + p->size) - return( 1 ); - } - return( 0 ); -} - - -static unsigned long find_free_region(unsigned long n_pages, - unsigned long *total_free, - unsigned long *region_free) -{ - unsigned short *map = stram_swap_info->swap_map; - unsigned long max = stram_swap_info->max; - unsigned long head, tail, max_start; - long nfree, max_free; - - /* first scan the swap space for a suitable place for the allocation */ - head = 1; - max_start = 0; - max_free = -1; - *total_free = 0; - - start_over: - /* increment tail until final window size reached, and count free pages */ - nfree = 0; - for( tail = head; tail-head < n_pages && tail < max; ++tail ) { - if (map[tail] == SWAP_MAP_BAD) { - head = tail+1; - goto start_over; - } - if (!map[tail]) { - ++nfree; - ++*total_free; - } - } - if (tail-head < n_pages) - goto out; - if (nfree > max_free) { - max_start = head; - max_free = nfree; - if (max_free >= n_pages) - /* don't need more free pages... :-) */ - goto out; - } - - /* now shift the window and look for the area where as much pages as - * possible are free */ - while( tail < max ) { - nfree -= (map[head++] == 0); - if (map[tail] == SWAP_MAP_BAD) { - head = tail+1; - goto start_over; - } - if (!map[tail]) { - ++nfree; - ++*total_free; - } - ++tail; - if (nfree > max_free) { - max_start = head; - max_free = nfree; - if (max_free >= n_pages) - /* don't need more free pages... :-) */ - goto out; - } - } - - out: - if (max_free < 0) { - printk( KERN_NOTICE "get_stram_region: ST-RAM too full or fragmented " - "-- can't allocate %lu pages\n", n_pages ); - return( 0 ); - } - - *region_free = max_free; - return( max_start ); -} - - -/* setup parameters from command line */ -void __init stram_swap_setup(char *str, int *ints) -{ - if (ints[0] >= 1) - max_swap_size = ((ints[1] < 0 ? 0 : ints[1]) * 1024) & PAGE_MASK; -} - - -/* ------------------------------------------------------------------------ */ -/* ST-RAM device */ -/* ------------------------------------------------------------------------ */ - -static int refcnt; - -static void do_stram_request(request_queue_t *q) -{ - struct request *req; - - while ((req = elv_next_request(q)) != NULL) { - void *start = swap_start + (req->sector << 9); - unsigned long len = req->current_nr_sectors << 9; - if ((start + len) > swap_end) { - printk( KERN_ERR "stram: bad access beyond end of device: " - "block=%ld, count=%d\n", - req->sector, - req->current_nr_sectors ); - end_request(req, 0); - continue; - } - - if (req->cmd == READ) { - memcpy(req->buffer, start, len); -#ifdef DO_PROC - stat_swap_read += N_PAGES(len); -#endif - } - else { - memcpy(start, req->buffer, len); -#ifdef DO_PROC - stat_swap_write += N_PAGES(len); -#endif - } - end_request(req, 1); - } -} - - -static int stram_open( struct inode *inode, struct file *filp ) -{ - if (filp != MAGIC_FILE_P) { - printk( KERN_NOTICE "Only kernel can open ST-RAM device\n" ); - return( -EPERM ); - } - if (refcnt) - return( -EBUSY ); - ++refcnt; - return( 0 ); -} - -static int stram_release( struct inode *inode, struct file *filp ) -{ - if (filp != MAGIC_FILE_P) { - printk( KERN_NOTICE "Only kernel can close ST-RAM device\n" ); - return( -EPERM ); - } - if (refcnt > 0) - --refcnt; - return( 0 ); -} - - -static struct block_device_operations stram_fops = { - .open = stram_open, - .release = stram_release, -}; - -static struct gendisk *stram_disk; -static struct request_queue *stram_queue; -static DEFINE_SPINLOCK(stram_lock); - -int __init stram_device_init(void) -{ - if (!MACH_IS_ATARI) - /* no point in initializing this, I hope */ - return -ENXIO; - - if (!max_swap_size) - /* swapping not enabled */ - return -ENXIO; - stram_disk = alloc_disk(1); - if (!stram_disk) - return -ENOMEM; - - if (register_blkdev(STRAM_MAJOR, "stram")) { - put_disk(stram_disk); - return -ENXIO; - } - - stram_queue = blk_init_queue(do_stram_request, &stram_lock); - if (!stram_queue) { - unregister_blkdev(STRAM_MAJOR, "stram"); - put_disk(stram_disk); - return -ENOMEM; - } - - stram_disk->major = STRAM_MAJOR; - stram_disk->first_minor = STRAM_MINOR; - stram_disk->fops = &stram_fops; - stram_disk->queue = stram_queue; - sprintf(stram_disk->disk_name, "stram"); - set_capacity(stram_disk, (swap_end - swap_start)/512); - add_disk(stram_disk); - return 0; -} - - - -/* ------------------------------------------------------------------------ */ -/* Misc Utility Functions */ -/* ------------------------------------------------------------------------ */ - -/* reserve a range of pages */ -static void reserve_region(void *start, void *end) -{ - reserve_bootmem (virt_to_phys(start), end - start); -} - -#endif /* CONFIG_STRAM_SWAP */ - /* ------------------------------------------------------------------------ */ /* Region Management */ @@ -1173,50 +328,9 @@ int get_stram_list( char *buf ) { int len = 0; BLOCK *p; -#ifdef CONFIG_STRAM_SWAP - int i; - unsigned short *map = stram_swap_info->swap_map; - unsigned long max = stram_swap_info->max; - unsigned free = 0, used = 0, rsvd = 0; -#endif -#ifdef CONFIG_STRAM_SWAP - if (max_swap_size) { - for( i = 1; i < max; ++i ) { - if (!map[i]) - ++free; - else if (map[i] == SWAP_MAP_BAD) - ++rsvd; - else - ++used; - } - PRINT_PROC( - "Total ST-RAM: %8u kB\n" - "Total ST-RAM swap: %8lu kB\n" - "Free swap: %8u kB\n" - "Used swap: %8u kB\n" - "Allocated swap: %8u kB\n" - "Swap Reads: %8u\n" - "Swap Writes: %8u\n" - "Swap Forced Reads: %8u\n", - (stram_end - stram_start) >> 10, - (max-1) << (PAGE_SHIFT-10), - free << (PAGE_SHIFT-10), - used << (PAGE_SHIFT-10), - rsvd << (PAGE_SHIFT-10), - stat_swap_read, - stat_swap_write, - stat_swap_force ); - } - else { -#endif - PRINT_PROC( "ST-RAM swapping disabled\n" ); - PRINT_PROC("Total ST-RAM: %8u kB\n", + PRINT_PROC("Total ST-RAM: %8u kB\n", (stram_end - stram_start) >> 10); -#ifdef CONFIG_STRAM_SWAP - } -#endif - PRINT_PROC( "Allocated regions:\n" ); for( p = alloc_list; p; p = p->next ) { if (len + 50 >= PAGE_SIZE) @@ -1227,8 +341,6 @@ int get_stram_list( char *buf ) p->owner); if (p->flags & BLOCK_GFP) PRINT_PROC( "page-alloced)\n" ); - else if (p->flags & BLOCK_INSWAP) - PRINT_PROC( "in swap)\n" ); else PRINT_PROC( "??)\n" ); } diff --git a/arch/m68k/mm/kmap.c b/arch/m68k/mm/kmap.c index 5dcb3fa35ea9..fe2383e36b06 100644 --- a/arch/m68k/mm/kmap.c +++ b/arch/m68k/mm/kmap.c @@ -201,7 +201,7 @@ void *__ioremap(unsigned long physaddr, unsigned long size, int cacheflag) virtaddr += PTRTREESIZE; size -= PTRTREESIZE; } else { - pte_dir = pte_alloc_kernel(&init_mm, pmd_dir, virtaddr); + pte_dir = pte_alloc_kernel(pmd_dir, virtaddr); if (!pte_dir) { printk("ioremap: no mem for pte_dir\n"); return NULL; diff --git a/arch/m68k/sun3x/dvma.c b/arch/m68k/sun3x/dvma.c index 32e55adfeb8e..117481e86305 100644 --- a/arch/m68k/sun3x/dvma.c +++ b/arch/m68k/sun3x/dvma.c @@ -116,7 +116,7 @@ inline int dvma_map_cpu(unsigned long kaddr, pte_t *pte; unsigned long end3; - if((pte = pte_alloc_kernel(&init_mm, pmd, vaddr)) == NULL) { + if((pte = pte_alloc_kernel(pmd, vaddr)) == NULL) { ret = -ENOMEM; goto out; } diff --git a/arch/mips/kernel/irixelf.c b/arch/mips/kernel/irixelf.c index 99262fe64560..7ce34d4aa220 100644 --- a/arch/mips/kernel/irixelf.c +++ b/arch/mips/kernel/irixelf.c @@ -697,7 +697,6 @@ static int load_irix_binary(struct linux_binprm * bprm, struct pt_regs * regs) /* Do this so that we can load the interpreter, if need be. We will * change some of these later. */ - set_mm_counter(current->mm, rss, 0); setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT); current->mm->start_stack = bprm->p; diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c index 9c44ca70befa..3101d1db5592 100644 --- a/arch/mips/mm/ioremap.c +++ b/arch/mips/mm/ioremap.c @@ -55,7 +55,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, if (address >= end) BUG(); do { - pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); + pte_t * pte = pte_alloc_kernel(pmd, address); if (!pte) return -ENOMEM; remap_area_pte(pte, address, end - address, address + phys_addr, flags); @@ -77,7 +77,6 @@ static int remap_area_pages(unsigned long address, phys_t phys_addr, flush_cache_all(); if (address >= end) BUG(); - spin_lock(&init_mm.page_table_lock); do { pud_t *pud; pmd_t *pmd; @@ -96,7 +95,6 @@ static int remap_area_pages(unsigned long address, phys_t phys_addr, address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (address && (address < end)); - spin_unlock(&init_mm.page_table_lock); flush_tlb_all(); return error; } diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index e15f09eaed12..a065349aee37 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -270,7 +270,6 @@ void flush_dcache_page(struct page *page) unsigned long offset; unsigned long addr; pgoff_t pgoff; - pte_t *pte; unsigned long pfn = page_to_pfn(page); @@ -301,21 +300,16 @@ void flush_dcache_page(struct page *page) * taking a page fault if the pte doesn't exist. * This is just for speed. If the page translation * isn't there, there's no point exciting the - * nadtlb handler into a nullification frenzy */ - - - if(!(pte = translation_exists(mpnt, addr))) - continue; - - /* make sure we really have this page: the private + * nadtlb handler into a nullification frenzy. + * + * Make sure we really have this page: the private * mappings may cover this area but have COW'd this - * particular page */ - if(pte_pfn(*pte) != pfn) - continue; - - __flush_cache_page(mpnt, addr); - - break; + * particular page. + */ + if (translation_exists(mpnt, addr, pfn)) { + __flush_cache_page(mpnt, addr); + break; + } } flush_dcache_mmap_unlock(mapping); } diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index ae6213d71670..f94a02ef3d95 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -114,7 +114,7 @@ static inline int map_pmd_uncached(pmd_t * pmd, unsigned long vaddr, if (end > PGDIR_SIZE) end = PGDIR_SIZE; do { - pte_t * pte = pte_alloc_kernel(&init_mm, pmd, vaddr); + pte_t * pte = pte_alloc_kernel(pmd, vaddr); if (!pte) return -ENOMEM; if (map_pte_uncached(pte, orig_vaddr, end - vaddr, paddr_ptr)) diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 2886ad70db48..29b998e430e6 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -505,7 +505,9 @@ void show_mem(void) for (j = node_start_pfn(i); j < node_end_pfn(i); j++) { struct page *p; + unsigned long flags; + pgdat_resize_lock(NODE_DATA(i), &flags); p = nid_page_nr(i, j) - node_start_pfn(i); total++; @@ -517,6 +519,7 @@ void show_mem(void) free++; else shared += page_count(p) - 1; + pgdat_resize_unlock(NODE_DATA(i), &flags); } } #endif diff --git a/arch/parisc/mm/ioremap.c b/arch/parisc/mm/ioremap.c index f2df502cdae3..5c7a1b3b9326 100644 --- a/arch/parisc/mm/ioremap.c +++ b/arch/parisc/mm/ioremap.c @@ -52,7 +52,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo if (address >= end) BUG(); do { - pte_t * pte = pte_alloc_kernel(NULL, pmd, address); + pte_t * pte = pte_alloc_kernel(pmd, address); if (!pte) return -ENOMEM; remap_area_pte(pte, address, end - address, address + phys_addr, flags); @@ -75,10 +75,9 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr, flush_cache_all(); if (address >= end) BUG(); - spin_lock(&init_mm.page_table_lock); do { pmd_t *pmd; - pmd = pmd_alloc(dir, address); + pmd = pmd_alloc(&init_mm, dir, address); error = -ENOMEM; if (!pmd) break; @@ -89,7 +88,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr, address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (address && (address < end)); - spin_unlock(&init_mm.page_table_lock); flush_tlb_all(); return error; } diff --git a/arch/ppc/kernel/dma-mapping.c b/arch/ppc/kernel/dma-mapping.c index 0f710d2baec6..685fd0defe23 100644 --- a/arch/ppc/kernel/dma-mapping.c +++ b/arch/ppc/kernel/dma-mapping.c @@ -335,8 +335,6 @@ static int __init dma_alloc_init(void) pte_t *pte; int ret = 0; - spin_lock(&init_mm.page_table_lock); - do { pgd = pgd_offset(&init_mm, CONSISTENT_BASE); pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE); @@ -347,7 +345,7 @@ static int __init dma_alloc_init(void) } WARN_ON(!pmd_none(*pmd)); - pte = pte_alloc_kernel(&init_mm, pmd, CONSISTENT_BASE); + pte = pte_alloc_kernel(pmd, CONSISTENT_BASE); if (!pte) { printk(KERN_ERR "%s: no pte tables\n", __func__); ret = -ENOMEM; @@ -357,8 +355,6 @@ static int __init dma_alloc_init(void) consistent_pte = pte; } while (0); - spin_unlock(&init_mm.page_table_lock); - return ret; } diff --git a/arch/ppc/mm/4xx_mmu.c b/arch/ppc/mm/4xx_mmu.c index b7bcbc232f39..4d006aa1a0d1 100644 --- a/arch/ppc/mm/4xx_mmu.c +++ b/arch/ppc/mm/4xx_mmu.c @@ -110,13 +110,11 @@ unsigned long __init mmu_mapin_ram(void) pmd_t *pmdp; unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE; - spin_lock(&init_mm.page_table_lock); pmdp = pmd_offset(pgd_offset_k(v), v); pmd_val(*pmdp++) = val; pmd_val(*pmdp++) = val; pmd_val(*pmdp++) = val; pmd_val(*pmdp++) = val; - spin_unlock(&init_mm.page_table_lock); v += LARGE_PAGE_SIZE_16M; p += LARGE_PAGE_SIZE_16M; @@ -127,10 +125,8 @@ unsigned long __init mmu_mapin_ram(void) pmd_t *pmdp; unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE; - spin_lock(&init_mm.page_table_lock); pmdp = pmd_offset(pgd_offset_k(v), v); pmd_val(*pmdp) = val; - spin_unlock(&init_mm.page_table_lock); v += LARGE_PAGE_SIZE_4M; p += LARGE_PAGE_SIZE_4M; diff --git a/arch/ppc/mm/pgtable.c b/arch/ppc/mm/pgtable.c index 43505b1fc5d8..6ea9185fd120 100644 --- a/arch/ppc/mm/pgtable.c +++ b/arch/ppc/mm/pgtable.c @@ -280,18 +280,16 @@ map_page(unsigned long va, phys_addr_t pa, int flags) pte_t *pg; int err = -ENOMEM; - spin_lock(&init_mm.page_table_lock); /* Use upper 10 bits of VA to index the first level map */ pd = pmd_offset(pgd_offset_k(va), va); /* Use middle 10 bits of VA to index the second-level map */ - pg = pte_alloc_kernel(&init_mm, pd, va); + pg = pte_alloc_kernel(pd, va); if (pg != 0) { err = 0; set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags))); if (mem_init_done) flush_HPTE(0, va, pmd_val(*pd)); } - spin_unlock(&init_mm.page_table_lock); return err; } diff --git a/arch/ppc64/kernel/vdso.c b/arch/ppc64/kernel/vdso.c index efa985f05aca..4aacf521e3e4 100644 --- a/arch/ppc64/kernel/vdso.c +++ b/arch/ppc64/kernel/vdso.c @@ -176,13 +176,13 @@ static struct page * vdso_vma_nopage(struct vm_area_struct * vma, return NOPAGE_SIGBUS; /* - * Last page is systemcfg, special handling here, no get_page() a - * this is a reserved page + * Last page is systemcfg. */ if ((vma->vm_end - address) <= PAGE_SIZE) - return virt_to_page(systemcfg); + pg = virt_to_page(systemcfg); + else + pg = virt_to_page(vbase + offset); - pg = virt_to_page(vbase + offset); get_page(pg); DBG(" ->page count: %d\n", page_count(pg)); @@ -259,7 +259,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack) * gettimeofday will be totally dead. It's fine to use that for setting * breakpoints in the vDSO code pages though */ - vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; + vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | VM_RESERVED; vma->vm_flags |= mm->def_flags; vma->vm_page_prot = protection_map[vma->vm_flags & 0x7]; vma->vm_ops = &vdso_vmops; @@ -603,6 +603,8 @@ void __init vdso_init(void) ClearPageReserved(pg); get_page(pg); } + + get_page(virt_to_page(systemcfg)); } int in_gate_area_no_task(unsigned long addr) diff --git a/arch/ppc64/mm/imalloc.c b/arch/ppc64/mm/imalloc.c index c65b87b92756..f4ca29cf5364 100644 --- a/arch/ppc64/mm/imalloc.c +++ b/arch/ppc64/mm/imalloc.c @@ -300,12 +300,7 @@ void im_free(void * addr) for (p = &imlist ; (tmp = *p) ; p = &tmp->next) { if (tmp->addr == addr) { *p = tmp->next; - - /* XXX: do we need the lock? */ - spin_lock(&init_mm.page_table_lock); unmap_vm_area(tmp); - spin_unlock(&init_mm.page_table_lock); - kfree(tmp); up(&imlist_sem); return; diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index be64b157afce..e2bd7776622f 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c @@ -104,6 +104,8 @@ void show_mem(void) show_free_areas(); printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { + unsigned long flags; + pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; i++) { page = pgdat_page_nr(pgdat, i); total++; @@ -114,6 +116,7 @@ void show_mem(void) else if (page_count(page)) shared += page_count(page) - 1; } + pgdat_resize_unlock(pgdat, &flags); } printk("%ld pages of RAM\n", total); printk("%ld reserved pages\n", reserved); @@ -155,7 +158,6 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags) unsigned long vsid; if (mem_init_done) { - spin_lock(&init_mm.page_table_lock); pgdp = pgd_offset_k(ea); pudp = pud_alloc(&init_mm, pgdp, ea); if (!pudp) @@ -163,12 +165,11 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags) pmdp = pmd_alloc(&init_mm, pudp, ea); if (!pmdp) return -ENOMEM; - ptep = pte_alloc_kernel(&init_mm, pmdp, ea); + ptep = pte_alloc_kernel(pmdp, ea); if (!ptep) return -ENOMEM; set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags))); - spin_unlock(&init_mm.page_table_lock); } else { unsigned long va, vpn, hash, hpteg; @@ -649,11 +650,14 @@ void __init mem_init(void) #endif for_each_pgdat(pgdat) { + unsigned long flags; + pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; i++) { page = pgdat_page_nr(pgdat, i); if (PageReserved(page)) reservedpages++; } + pgdat_resize_unlock(pgdat, &flags); } codesize = (unsigned long)&_etext - (unsigned long)&_stext; @@ -867,3 +871,80 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, return vma_prot; } EXPORT_SYMBOL(phys_mem_access_prot); + +#ifdef CONFIG_MEMORY_HOTPLUG + +void online_page(struct page *page) +{ + ClearPageReserved(page); + free_cold_page(page); + totalram_pages++; + num_physpages++; +} + +/* + * This works only for the non-NUMA case. Later, we'll need a lookup + * to convert from real physical addresses to nid, that doesn't use + * pfn_to_nid(). + */ +int __devinit add_memory(u64 start, u64 size) +{ + struct pglist_data *pgdata = NODE_DATA(0); + struct zone *zone; + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + + /* this should work for most non-highmem platforms */ + zone = pgdata->node_zones; + + return __add_pages(zone, start_pfn, nr_pages); + + return 0; +} + +/* + * First pass at this code will check to determine if the remove + * request is within the RMO. Do not allow removal within the RMO. + */ +int __devinit remove_memory(u64 start, u64 size) +{ + struct zone *zone; + unsigned long start_pfn, end_pfn, nr_pages; + + start_pfn = start >> PAGE_SHIFT; + nr_pages = size >> PAGE_SHIFT; + end_pfn = start_pfn + nr_pages; + + printk("%s(): Attempting to remove memoy in range " + "%lx to %lx\n", __func__, start, start+size); + /* + * check for range within RMO + */ + zone = page_zone(pfn_to_page(start_pfn)); + + printk("%s(): memory will be removed from " + "the %s zone\n", __func__, zone->name); + + /* + * not handling removing memory ranges that + * overlap multiple zones yet + */ + if (end_pfn > (zone->zone_start_pfn + zone->spanned_pages)) + goto overlap; + + /* make sure it is NOT in RMO */ + if ((start < lmb.rmo_size) || ((start+size) < lmb.rmo_size)) { + printk("%s(): range to be removed must NOT be in RMO!\n", + __func__); + goto in_rmo; + } + + return __remove_pages(zone, start_pfn, nr_pages); + +overlap: + printk("%s(): memory range to be removed overlaps " + "multiple zones!!!\n", __func__); +in_rmo: + return -1; +} +#endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/s390/mm/ioremap.c b/arch/s390/mm/ioremap.c index c6c39d868bc8..0f6e9ecbefe2 100644 --- a/arch/s390/mm/ioremap.c +++ b/arch/s390/mm/ioremap.c @@ -58,7 +58,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo if (address >= end) BUG(); do { - pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); + pte_t * pte = pte_alloc_kernel(pmd, address); if (!pte) return -ENOMEM; remap_area_pte(pte, address, end - address, address + phys_addr, flags); @@ -80,7 +80,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr, flush_cache_all(); if (address >= end) BUG(); - spin_lock(&init_mm.page_table_lock); do { pmd_t *pmd; pmd = pmd_alloc(&init_mm, dir, address); @@ -94,7 +93,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr, address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (address && (address < end)); - spin_unlock(&init_mm.page_table_lock); flush_tlb_all(); return 0; } diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 7abba2161da6..775f86cd3fe8 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -194,10 +194,13 @@ asmlinkage int __do_page_fault(struct pt_regs *regs, unsigned long writeaccess, unsigned long address) { unsigned long addrmax = P4SEG; - pgd_t *dir; + pgd_t *pgd; pmd_t *pmd; pte_t *pte; pte_t entry; + struct mm_struct *mm; + spinlock_t *ptl; + int ret = 1; #ifdef CONFIG_SH_KGDB if (kgdb_nofault && kgdb_bus_err_hook) @@ -208,28 +211,28 @@ asmlinkage int __do_page_fault(struct pt_regs *regs, unsigned long writeaccess, addrmax = P4SEG_STORE_QUE + 0x04000000; #endif - if (address >= P3SEG && address < addrmax) - dir = pgd_offset_k(address); - else if (address >= TASK_SIZE) + if (address >= P3SEG && address < addrmax) { + pgd = pgd_offset_k(address); + mm = NULL; + } else if (address >= TASK_SIZE) return 1; - else if (!current->mm) + else if (!(mm = current->mm)) return 1; else - dir = pgd_offset(current->mm, address); + pgd = pgd_offset(mm, address); - pmd = pmd_offset(dir, address); - if (pmd_none(*pmd)) - return 1; - if (pmd_bad(*pmd)) { - pmd_ERROR(*pmd); - pmd_clear(pmd); + pmd = pmd_offset(pgd, address); + if (pmd_none_or_clear_bad(pmd)) return 1; - } - pte = pte_offset_kernel(pmd, address); + if (mm) + pte = pte_offset_map_lock(mm, pmd, address, &ptl); + else + pte = pte_offset_kernel(pmd, address); + entry = *pte; if (pte_none(entry) || pte_not_present(entry) || (writeaccess && !pte_write(entry))) - return 1; + goto unlock; if (writeaccess) entry = pte_mkdirty(entry); @@ -251,8 +254,11 @@ asmlinkage int __do_page_fault(struct pt_regs *regs, unsigned long writeaccess, set_pte(pte, entry); update_mmu_cache(NULL, address, entry); - - return 0; + ret = 0; +unlock: + if (mm) + pte_unmap_unlock(pte, ptl); + return ret; } void flush_tlb_page(struct vm_area_struct *vma, unsigned long page) diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c index 95bb1a6c6060..6b7a7688c98e 100644 --- a/arch/sh/mm/hugetlbpage.c +++ b/arch/sh/mm/hugetlbpage.c @@ -54,8 +54,6 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) return pte; } -#define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0) - void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c index 9f490c2742f0..e794e27a72f1 100644 --- a/arch/sh/mm/ioremap.c +++ b/arch/sh/mm/ioremap.c @@ -57,7 +57,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, if (address >= end) BUG(); do { - pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); + pte_t * pte = pte_alloc_kernel(pmd, address); if (!pte) return -ENOMEM; remap_area_pte(pte, address, end - address, address + phys_addr, flags); @@ -79,7 +79,6 @@ int remap_area_pages(unsigned long address, unsigned long phys_addr, flush_cache_all(); if (address >= end) BUG(); - spin_lock(&init_mm.page_table_lock); do { pmd_t *pmd; pmd = pmd_alloc(&init_mm, dir, address); @@ -93,7 +92,6 @@ int remap_area_pages(unsigned long address, unsigned long phys_addr, address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (address && (address < end)); - spin_unlock(&init_mm.page_table_lock); flush_tlb_all(); return error; } diff --git a/arch/sh64/mm/cache.c b/arch/sh64/mm/cache.c index 3b87e25ea773..c0c1b21350d8 100644 --- a/arch/sh64/mm/cache.c +++ b/arch/sh64/mm/cache.c @@ -584,32 +584,36 @@ static void sh64_dcache_purge_phy_page(unsigned long paddr) } } -static void sh64_dcache_purge_user_page(struct mm_struct *mm, unsigned long eaddr) +static void sh64_dcache_purge_user_pages(struct mm_struct *mm, + unsigned long addr, unsigned long end) { pgd_t *pgd; pmd_t *pmd; pte_t *pte; pte_t entry; + spinlock_t *ptl; unsigned long paddr; - /* NOTE : all the callers of this have mm->page_table_lock held, so the - following page table traversal is safe even on SMP/pre-emptible. */ - - if (!mm) return; /* No way to find physical address of page */ - pgd = pgd_offset(mm, eaddr); - if (pgd_bad(*pgd)) return; - - pmd = pmd_offset(pgd, eaddr); - if (pmd_none(*pmd) || pmd_bad(*pmd)) return; - - pte = pte_offset_kernel(pmd, eaddr); - entry = *pte; - if (pte_none(entry) || !pte_present(entry)) return; - - paddr = pte_val(entry) & PAGE_MASK; - - sh64_dcache_purge_coloured_phy_page(paddr, eaddr); - + if (!mm) + return; /* No way to find physical address of page */ + + pgd = pgd_offset(mm, addr); + if (pgd_bad(*pgd)) + return; + + pmd = pmd_offset(pgd, addr); + if (pmd_none(*pmd) || pmd_bad(*pmd)) + return; + + pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + do { + entry = *pte; + if (pte_none(entry) || !pte_present(entry)) + continue; + paddr = pte_val(entry) & PAGE_MASK; + sh64_dcache_purge_coloured_phy_page(paddr, addr); + } while (pte++, addr += PAGE_SIZE, addr != end); + pte_unmap_unlock(pte - 1, ptl); } /****************************************************************************/ @@ -668,7 +672,7 @@ static void sh64_dcache_purge_user_range(struct mm_struct *mm, int n_pages; n_pages = ((end - start) >> PAGE_SHIFT); - if (n_pages >= 64) { + if (n_pages >= 64 || ((start ^ (end - 1)) & PMD_MASK)) { #if 1 sh64_dcache_purge_all(); #else @@ -707,20 +711,10 @@ static void sh64_dcache_purge_user_range(struct mm_struct *mm, } #endif } else { - /* 'Small' range */ - unsigned long aligned_start; - unsigned long eaddr; - unsigned long last_page_start; - - aligned_start = start & PAGE_MASK; - /* 'end' is 1 byte beyond the end of the range */ - last_page_start = (end - 1) & PAGE_MASK; - - eaddr = aligned_start; - while (eaddr <= last_page_start) { - sh64_dcache_purge_user_page(mm, eaddr); - eaddr += PAGE_SIZE; - } + /* Small range, covered by a single page table page */ + start &= PAGE_MASK; /* should already be so */ + end = PAGE_ALIGN(end); /* should already be so */ + sh64_dcache_purge_user_pages(mm, start, end); } return; } @@ -880,9 +874,7 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start, addresses from the user address space specified by mm, after writing back any dirty data. - Note(1), 'end' is 1 byte beyond the end of the range to flush. - - Note(2), this is called with mm->page_table_lock held.*/ + Note, 'end' is 1 byte beyond the end of the range to flush. */ sh64_dcache_purge_user_range(mm, start, end); sh64_icache_inv_user_page_range(mm, start, end); @@ -898,7 +890,7 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr, unsigned the I-cache must be searched too in case the page in question is both writable and being executed from (e.g. stack trampolines.) - Note(1), this is called with mm->page_table_lock held. + Note, this is called with pte lock held. */ sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT); diff --git a/arch/sh64/mm/hugetlbpage.c b/arch/sh64/mm/hugetlbpage.c index dcd9c8a8baf8..ed6a505b3ee2 100644 --- a/arch/sh64/mm/hugetlbpage.c +++ b/arch/sh64/mm/hugetlbpage.c @@ -54,41 +54,31 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) return pte; } -#define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0) - -static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, - struct page *page, pte_t * page_table, int write_access) +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry) { - unsigned long i; - pte_t entry; - - add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE); - - if (write_access) - entry = pte_mkwrite(pte_mkdirty(mk_pte(page, - vma->vm_page_prot))); - else - entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot)); - entry = pte_mkyoung(entry); - mk_pte_huge(entry); + int i; for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { - set_pte(page_table, entry); - page_table++; - + set_pte_at(mm, addr, ptep, entry); + ptep++; + addr += PAGE_SIZE; pte_val(entry) += PAGE_SIZE; } } -pte_t huge_ptep_get_and_clear(pte_t *ptep) +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { pte_t entry; + int i; entry = *ptep; for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { - pte_clear(pte); - pte++; + pte_clear(mm, addr, ptep); + addr += PAGE_SIZE; + ptep++; } return entry; @@ -106,79 +96,6 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len) return 0; } -int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, - struct vm_area_struct *vma) -{ - pte_t *src_pte, *dst_pte, entry; - struct page *ptepage; - unsigned long addr = vma->vm_start; - unsigned long end = vma->vm_end; - int i; - - while (addr < end) { - dst_pte = huge_pte_alloc(dst, addr); - if (!dst_pte) - goto nomem; - src_pte = huge_pte_offset(src, addr); - BUG_ON(!src_pte || pte_none(*src_pte)); - entry = *src_pte; - ptepage = pte_page(entry); - get_page(ptepage); - for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { - set_pte(dst_pte, entry); - pte_val(entry) += PAGE_SIZE; - dst_pte++; - } - add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE); - addr += HPAGE_SIZE; - } - return 0; - -nomem: - return -ENOMEM; -} - -int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, - struct page **pages, struct vm_area_struct **vmas, - unsigned long *position, int *length, int i) -{ - unsigned long vaddr = *position; - int remainder = *length; - - WARN_ON(!is_vm_hugetlb_page(vma)); - - while (vaddr < vma->vm_end && remainder) { - if (pages) { - pte_t *pte; - struct page *page; - - pte = huge_pte_offset(mm, vaddr); - - /* hugetlb should be locked, and hence, prefaulted */ - BUG_ON(!pte || pte_none(*pte)); - - page = pte_page(*pte); - - WARN_ON(!PageCompound(page)); - - get_page(page); - pages[i] = page; - } - - if (vmas) - vmas[i] = vma; - - vaddr += PAGE_SIZE; - --remainder; - ++i; - } - - *length = remainder; - *position = vaddr; - - return i; -} - struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { @@ -195,84 +112,3 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, { return NULL; } - -void unmap_hugepage_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - struct mm_struct *mm = vma->vm_mm; - unsigned long address; - pte_t *pte; - struct page *page; - int i; - - BUG_ON(start & (HPAGE_SIZE - 1)); - BUG_ON(end & (HPAGE_SIZE - 1)); - - for (address = start; address < end; address += HPAGE_SIZE) { - pte = huge_pte_offset(mm, address); - BUG_ON(!pte); - if (pte_none(*pte)) - continue; - page = pte_page(*pte); - put_page(page); - for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { - pte_clear(mm, address+(i*PAGE_SIZE), pte); - pte++; - } - } - add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT)); - flush_tlb_range(vma, start, end); -} - -int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) -{ - struct mm_struct *mm = current->mm; - unsigned long addr; - int ret = 0; - - BUG_ON(vma->vm_start & ~HPAGE_MASK); - BUG_ON(vma->vm_end & ~HPAGE_MASK); - - spin_lock(&mm->page_table_lock); - for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { - unsigned long idx; - pte_t *pte = huge_pte_alloc(mm, addr); - struct page *page; - - if (!pte) { - ret = -ENOMEM; - goto out; - } - if (!pte_none(*pte)) - continue; - - idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) - + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); - page = find_get_page(mapping, idx); - if (!page) { - /* charge the fs quota first */ - if (hugetlb_get_quota(mapping)) { - ret = -ENOMEM; - goto out; - } - page = alloc_huge_page(); - if (!page) { - hugetlb_put_quota(mapping); - ret = -ENOMEM; - goto out; - } - ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC); - if (! ret) { - unlock_page(page); - } else { - hugetlb_put_quota(mapping); - free_huge_page(page); - goto out; - } - } - set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE); - } -out: - spin_unlock(&mm->page_table_lock); - return ret; -} diff --git a/arch/sh64/mm/ioremap.c b/arch/sh64/mm/ioremap.c index f4003da556bc..fb1866fa2c9d 100644 --- a/arch/sh64/mm/ioremap.c +++ b/arch/sh64/mm/ioremap.c @@ -79,7 +79,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo BUG(); do { - pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); + pte_t * pte = pte_alloc_kernel(pmd, address); if (!pte) return -ENOMEM; remap_area_pte(pte, address, end - address, address + phys_addr, flags); @@ -101,7 +101,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr, flush_cache_all(); if (address >= end) BUG(); - spin_lock(&init_mm.page_table_lock); do { pmd_t *pmd = pmd_alloc(&init_mm, dir, address); error = -ENOMEM; @@ -115,7 +114,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr, address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (address && (address < end)); - spin_unlock(&init_mm.page_table_lock); flush_tlb_all(); return 0; } diff --git a/arch/sparc/mm/generic.c b/arch/sparc/mm/generic.c index 20ccb957fb77..9604893ffdbd 100644 --- a/arch/sparc/mm/generic.c +++ b/arch/sparc/mm/generic.c @@ -73,14 +73,16 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from, int space = GET_IOSPACE(pfn); unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT; + /* See comment in mm/memory.c remap_pfn_range */ + vma->vm_flags |= VM_IO | VM_RESERVED; + prot = __pgprot(pg_iobits); offset -= from; dir = pgd_offset(mm, from); flush_cache_range(vma, beg, end); - spin_lock(&mm->page_table_lock); while (from < end) { - pmd_t *pmd = pmd_alloc(current->mm, dir, from); + pmd_t *pmd = pmd_alloc(mm, dir, from); error = -ENOMEM; if (!pmd) break; @@ -90,7 +92,6 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from, from = (from + PGDIR_SIZE) & PGDIR_MASK; dir++; } - spin_unlock(&mm->page_table_lock); flush_tlb_range(vma, beg, end); return error; diff --git a/arch/sparc64/kernel/binfmt_aout32.c b/arch/sparc64/kernel/binfmt_aout32.c index b2854ef221d0..edf52d06b280 100644 --- a/arch/sparc64/kernel/binfmt_aout32.c +++ b/arch/sparc64/kernel/binfmt_aout32.c @@ -241,7 +241,6 @@ static int load_aout32_binary(struct linux_binprm * bprm, struct pt_regs * regs) current->mm->brk = ex.a_bss + (current->mm->start_brk = N_BSSADDR(ex)); - set_mm_counter(current->mm, rss, 0); current->mm->mmap = NULL; compute_creds(bprm); current->flags &= ~PF_FORKNOEXEC; diff --git a/arch/sparc64/mm/generic.c b/arch/sparc64/mm/generic.c index c954d91f01d0..112c316e7cd2 100644 --- a/arch/sparc64/mm/generic.c +++ b/arch/sparc64/mm/generic.c @@ -127,14 +127,16 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from, int space = GET_IOSPACE(pfn); unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT; + /* See comment in mm/memory.c remap_pfn_range */ + vma->vm_flags |= VM_IO | VM_RESERVED; + prot = __pgprot(pg_iobits); offset -= from; dir = pgd_offset(mm, from); flush_cache_range(vma, beg, end); - spin_lock(&mm->page_table_lock); while (from < end) { - pud_t *pud = pud_alloc(current->mm, dir, from); + pud_t *pud = pud_alloc(mm, dir, from); error = -ENOMEM; if (!pud) break; @@ -144,8 +146,7 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from, from = (from + PGDIR_SIZE) & PGDIR_MASK; dir++; } - flush_tlb_range(vma, beg, end); - spin_unlock(&mm->page_table_lock); + flush_tlb_range(vma, beg, end); return error; } diff --git a/arch/sparc64/mm/tlb.c b/arch/sparc64/mm/tlb.c index 90ca99d0b89c..8b104be4662b 100644 --- a/arch/sparc64/mm/tlb.c +++ b/arch/sparc64/mm/tlb.c @@ -18,8 +18,7 @@ /* Heavily inspired by the ppc64 code. */ -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers) = - { NULL, 0, 0, 0, 0, 0, { 0 }, { NULL }, }; +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers) = { 0, }; void flush_tlb_pending(void) { @@ -72,7 +71,7 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, pte_t *ptep, pte_t no_cache_flush: - if (mp->tlb_frozen) + if (mp->fullmm) return; nr = mp->tlb_nr; @@ -97,7 +96,7 @@ void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long unsigned long nr = mp->tlb_nr; long s = start, e = end, vpte_base; - if (mp->tlb_frozen) + if (mp->fullmm) return; /* If start is greater than end, that is a real problem. */ diff --git a/arch/um/include/tlb.h b/arch/um/include/tlb.h index 45d7da6c3b2c..8efc1e0f1b84 100644 --- a/arch/um/include/tlb.h +++ b/arch/um/include/tlb.h @@ -34,7 +34,6 @@ struct host_vm_op { } u; }; -extern void mprotect_kernel_vm(int w); extern void force_flush_all(void); extern void fix_range_common(struct mm_struct *mm, unsigned long start_addr, unsigned long end_addr, int force, diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c index 0d73ceeece72..34b54a3e2132 100644 --- a/arch/um/kernel/process_kern.c +++ b/arch/um/kernel/process_kern.c @@ -222,6 +222,7 @@ void *um_virt_to_phys(struct task_struct *task, unsigned long addr, pud_t *pud; pmd_t *pmd; pte_t *pte; + pte_t ptent; if(task->mm == NULL) return(ERR_PTR(-EINVAL)); @@ -238,12 +239,13 @@ void *um_virt_to_phys(struct task_struct *task, unsigned long addr, return(ERR_PTR(-EINVAL)); pte = pte_offset_kernel(pmd, addr); - if(!pte_present(*pte)) + ptent = *pte; + if(!pte_present(ptent)) return(ERR_PTR(-EINVAL)); if(pte_out != NULL) - *pte_out = *pte; - return((void *) (pte_val(*pte) & PAGE_MASK) + (addr & ~PAGE_MASK)); + *pte_out = ptent; + return((void *) (pte_val(ptent) & PAGE_MASK) + (addr & ~PAGE_MASK)); } char *current_cmd(void) diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 240143b616a2..9e5e39cea821 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -28,7 +28,6 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc, pmd_t *pmd; pte_t *pte; - spin_lock(&mm->page_table_lock); pgd = pgd_offset(mm, proc); pud = pud_alloc(mm, pgd, proc); if (!pud) @@ -63,7 +62,6 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc, *pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT)); *pte = pte_mkexec(*pte); *pte = pte_wrprotect(*pte); - spin_unlock(&mm->page_table_lock); return(0); out_pmd: @@ -71,7 +69,6 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc, out_pte: pmd_free(pmd); out: - spin_unlock(&mm->page_table_lock); return(-ENOMEM); } @@ -147,6 +144,7 @@ void destroy_context_skas(struct mm_struct *mm) if(!proc_mm || !ptrace_faultinfo){ free_page(mmu->id.stack); + pte_lock_deinit(virt_to_page(mmu->last_page_table)); pte_free_kernel((pte_t *) mmu->last_page_table); dec_page_state(nr_page_table_pages); #ifdef CONFIG_3_LEVEL_PGTABLES diff --git a/arch/um/kernel/tt/tlb.c b/arch/um/kernel/tt/tlb.c index f1d85dbb45b9..ae6217c86135 100644 --- a/arch/um/kernel/tt/tlb.c +++ b/arch/um/kernel/tt/tlb.c @@ -74,42 +74,6 @@ void flush_tlb_kernel_range_tt(unsigned long start, unsigned long end) atomic_inc(&vmchange_seq); } -static void protect_vm_page(unsigned long addr, int w, int must_succeed) -{ - int err; - - err = protect_memory(addr, PAGE_SIZE, 1, w, 1, must_succeed); - if(err == 0) return; - else if((err == -EFAULT) || (err == -ENOMEM)){ - flush_tlb_kernel_range(addr, addr + PAGE_SIZE); - protect_vm_page(addr, w, 1); - } - else panic("protect_vm_page : protect failed, errno = %d\n", err); -} - -void mprotect_kernel_vm(int w) -{ - struct mm_struct *mm; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - unsigned long addr; - - mm = &init_mm; - for(addr = start_vm; addr < end_vm;){ - pgd = pgd_offset(mm, addr); - pud = pud_offset(pgd, addr); - pmd = pmd_offset(pud, addr); - if(pmd_present(*pmd)){ - pte = pte_offset_kernel(pmd, addr); - if(pte_present(*pte)) protect_vm_page(addr, w, 0); - addr += PAGE_SIZE; - } - else addr += PMD_SIZE; - } -} - void flush_tlb_kernel_vm_tt(void) { flush_tlb_kernel_range(start_vm, end_vm); diff --git a/arch/x86_64/ia32/ia32_aout.c b/arch/x86_64/ia32/ia32_aout.c index 3e6780fa0186..93c60f4aa47a 100644 --- a/arch/x86_64/ia32/ia32_aout.c +++ b/arch/x86_64/ia32/ia32_aout.c @@ -314,7 +314,6 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) current->mm->free_area_cache = TASK_UNMAPPED_BASE; current->mm->cached_hole_size = 0; - set_mm_counter(current->mm, rss, 0); current->mm->mmap = NULL; compute_creds(bprm); current->flags &= ~PF_FORKNOEXEC; diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c index 6972df480d2b..ecf7acb5db9b 100644 --- a/arch/x86_64/mm/ioremap.c +++ b/arch/x86_64/mm/ioremap.c @@ -60,7 +60,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo if (address >= end) BUG(); do { - pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); + pte_t * pte = pte_alloc_kernel(pmd, address); if (!pte) return -ENOMEM; remap_area_pte(pte, address, end - address, address + phys_addr, flags); @@ -105,7 +105,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr, flush_cache_all(); if (address >= end) BUG(); - spin_lock(&init_mm.page_table_lock); do { pud_t *pud; pud = pud_alloc(&init_mm, pgd, address); @@ -119,7 +118,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr, address = (address + PGDIR_SIZE) & PGDIR_MASK; pgd++; } while (address && (address < end)); - spin_unlock(&init_mm.page_table_lock); flush_tlb_all(); return error; } |