diff options
Diffstat (limited to 'arch/s390')
57 files changed, 1415 insertions, 1404 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8a4cae78f03c..8b7892bf6d8b 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -116,6 +116,7 @@ config S390 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_TRACE_MCOUNT_TEST + select HAVE_GENERIC_HARDIRQS select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZ4 @@ -445,6 +446,16 @@ config PCI_NR_FUNCTIONS This allows you to specify the maximum number of PCI functions which this kernel will support. +config PCI_NR_MSI + int "Maximum number of MSI interrupts (64-32768)" + range 64 32768 + default "256" + help + This defines the number of virtual interrupts the kernel will + provide for MSI interrupts. If you configure your system to have + too few drivers will fail to allocate MSI interrupts for all + PCI devices. + source "drivers/pci/Kconfig" source "drivers/pci/pcie/Kconfig" source "drivers/pci/hotplug/Kconfig" diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c index bb5dd496614f..17ab8b7b53cc 100644 --- a/arch/s390/hypfs/hypfs_dbfs.c +++ b/arch/s390/hypfs/hypfs_dbfs.c @@ -105,7 +105,7 @@ void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df) int hypfs_dbfs_init(void) { dbfs_dir = debugfs_create_dir("s390_hypfs", NULL); - return PTR_RET(dbfs_dir); + return PTR_ERR_OR_ZERO(dbfs_dir); } void hypfs_dbfs_exit(void) diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h index 4066cee0c2d2..4bbb5957ed1b 100644 --- a/arch/s390/include/asm/airq.h +++ b/arch/s390/include/asm/airq.h @@ -9,6 +9,8 @@ #ifndef _ASM_S390_AIRQ_H #define _ASM_S390_AIRQ_H +#include <linux/bit_spinlock.h> + struct airq_struct { struct hlist_node list; /* Handler queueing. */ void (*handler)(struct airq_struct *); /* Thin-interrupt handler */ @@ -23,4 +25,69 @@ struct airq_struct { int register_adapter_interrupt(struct airq_struct *airq); void unregister_adapter_interrupt(struct airq_struct *airq); +/* Adapter interrupt bit vector */ +struct airq_iv { + unsigned long *vector; /* Adapter interrupt bit vector */ + unsigned long *avail; /* Allocation bit mask for the bit vector */ + unsigned long *bitlock; /* Lock bit mask for the bit vector */ + unsigned long *ptr; /* Pointer associated with each bit */ + unsigned int *data; /* 32 bit value associated with each bit */ + unsigned long bits; /* Number of bits in the vector */ + unsigned long end; /* Number of highest allocated bit + 1 */ + spinlock_t lock; /* Lock to protect alloc & free */ +}; + +#define AIRQ_IV_ALLOC 1 /* Use an allocation bit mask */ +#define AIRQ_IV_BITLOCK 2 /* Allocate the lock bit mask */ +#define AIRQ_IV_PTR 4 /* Allocate the ptr array */ +#define AIRQ_IV_DATA 8 /* Allocate the data array */ + +struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags); +void airq_iv_release(struct airq_iv *iv); +unsigned long airq_iv_alloc_bit(struct airq_iv *iv); +void airq_iv_free_bit(struct airq_iv *iv, unsigned long bit); +unsigned long airq_iv_scan(struct airq_iv *iv, unsigned long start, + unsigned long end); + +static inline unsigned long airq_iv_end(struct airq_iv *iv) +{ + return iv->end; +} + +static inline void airq_iv_lock(struct airq_iv *iv, unsigned long bit) +{ + const unsigned long be_to_le = BITS_PER_LONG - 1; + bit_spin_lock(bit ^ be_to_le, iv->bitlock); +} + +static inline void airq_iv_unlock(struct airq_iv *iv, unsigned long bit) +{ + const unsigned long be_to_le = BITS_PER_LONG - 1; + bit_spin_unlock(bit ^ be_to_le, iv->bitlock); +} + +static inline void airq_iv_set_data(struct airq_iv *iv, unsigned long bit, + unsigned int data) +{ + iv->data[bit] = data; +} + +static inline unsigned int airq_iv_get_data(struct airq_iv *iv, + unsigned long bit) +{ + return iv->data[bit]; +} + +static inline void airq_iv_set_ptr(struct airq_iv *iv, unsigned long bit, + unsigned long ptr) +{ + iv->ptr[bit] = ptr; +} + +static inline unsigned long airq_iv_get_ptr(struct airq_iv *iv, + unsigned long bit) +{ + return iv->ptr[bit]; +} + #endif /* _ASM_S390_AIRQ_H */ diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 7d4676758733..10135a38673c 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -216,7 +216,7 @@ static inline void __set_bit(unsigned long nr, volatile unsigned long *ptr) addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); asm volatile( " oc %O0(1,%R0),%1" - : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" ); + : "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc"); } static inline void @@ -244,7 +244,7 @@ __clear_bit(unsigned long nr, volatile unsigned long *ptr) addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); asm volatile( " nc %O0(1,%R0),%1" - : "=Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) : "cc" ); + : "+Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) : "cc"); } static inline void @@ -271,7 +271,7 @@ static inline void __change_bit(unsigned long nr, volatile unsigned long *ptr) addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); asm volatile( " xc %O0(1,%R0),%1" - : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" ); + : "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc"); } static inline void @@ -301,7 +301,7 @@ test_and_set_bit_simple(unsigned long nr, volatile unsigned long *ptr) ch = *(unsigned char *) addr; asm volatile( " oc %O0(1,%R0),%1" - : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) + : "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc", "memory"); return (ch >> (nr & 7)) & 1; } @@ -320,7 +320,7 @@ test_and_clear_bit_simple(unsigned long nr, volatile unsigned long *ptr) ch = *(unsigned char *) addr; asm volatile( " nc %O0(1,%R0),%1" - : "=Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) + : "+Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) : "cc", "memory"); return (ch >> (nr & 7)) & 1; } @@ -339,7 +339,7 @@ test_and_change_bit_simple(unsigned long nr, volatile unsigned long *ptr) ch = *(unsigned char *) addr; asm volatile( " xc %O0(1,%R0),%1" - : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) + : "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc", "memory"); return (ch >> (nr & 7)) & 1; } diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index ffb898961c8d..d42625053c37 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -296,6 +296,7 @@ static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1, return 0; } +void channel_subsystem_reinit(void); extern void css_schedule_reprobe(void); extern void reipl_ccw_dev(struct ccw_dev_id *id); diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index d2ff41370c0c..f65bd3634519 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -13,9 +13,6 @@ #include <asm/div64.h> -#define __ARCH_HAS_VTIME_ACCOUNT -#define __ARCH_HAS_VTIME_TASK_SWITCH - /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ typedef unsigned long long __nocast cputime_t; diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h index 0c82ba86e997..a908d2941c5d 100644 --- a/arch/s390/include/asm/hardirq.h +++ b/arch/s390/include/asm/hardirq.h @@ -20,4 +20,9 @@ #define HARDIRQ_BITS 8 +static inline void ack_bad_irq(unsigned int irq) +{ + printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq); +} + #endif /* __ASM_HARDIRQ_H */ diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index bd90359d6d22..11eae5f55b70 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -17,6 +17,9 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); +pte_t huge_ptep_get(pte_t *ptep); +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); /* * If the arch doesn't supply something else, assume that hugepage @@ -38,147 +41,75 @@ static inline int prepare_hugepage_range(struct file *file, int arch_prepare_hugepage(struct page *page); void arch_release_hugepage(struct page *page); -static inline pte_t huge_pte_wrprotect(pte_t pte) +static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { - pte_val(pte) |= _PAGE_RO; - return pte; + pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY; } -static inline int huge_pte_none(pte_t pte) +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) { - return (pte_val(pte) & _SEGMENT_ENTRY_INV) && - !(pte_val(pte) & _SEGMENT_ENTRY_RO); + huge_ptep_get_and_clear(vma->vm_mm, address, ptep); } -static inline pte_t huge_ptep_get(pte_t *ptep) +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) { - pte_t pte = *ptep; - unsigned long mask; - - if (!MACHINE_HAS_HPAGE) { - ptep = (pte_t *) (pte_val(pte) & _SEGMENT_ENTRY_ORIGIN); - if (ptep) { - mask = pte_val(pte) & - (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); - pte = pte_mkhuge(*ptep); - pte_val(pte) |= mask; - } + int changed = !pte_same(huge_ptep_get(ptep), pte); + if (changed) { + huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + set_huge_pte_at(vma->vm_mm, addr, ptep, pte); } - return pte; + return changed; } -static inline void __pmd_csp(pmd_t *pmdp) +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { - register unsigned long reg2 asm("2") = pmd_val(*pmdp); - register unsigned long reg3 asm("3") = pmd_val(*pmdp) | - _SEGMENT_ENTRY_INV; - register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5; - - asm volatile( - " csp %1,%3" - : "=m" (*pmdp) - : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc"); + pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep); + set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte)); } -static inline void huge_ptep_invalidate(struct mm_struct *mm, - unsigned long address, pte_t *ptep) -{ - pmd_t *pmdp = (pmd_t *) ptep; - - if (MACHINE_HAS_IDTE) - __pmd_idte(address, pmdp); - else - __pmd_csp(pmdp); - pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY; -} - -static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - pte_t pte = huge_ptep_get(ptep); - - huge_ptep_invalidate(mm, addr, ptep); - return pte; -} - -#define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ -({ \ - int __changed = !pte_same(huge_ptep_get(__ptep), __entry); \ - if (__changed) { \ - huge_ptep_invalidate((__vma)->vm_mm, __addr, __ptep); \ - set_huge_pte_at((__vma)->vm_mm, __addr, __ptep, __entry); \ - } \ - __changed; \ -}) - -#define huge_ptep_set_wrprotect(__mm, __addr, __ptep) \ -({ \ - pte_t __pte = huge_ptep_get(__ptep); \ - if (huge_pte_write(__pte)) { \ - huge_ptep_invalidate(__mm, __addr, __ptep); \ - set_huge_pte_at(__mm, __addr, __ptep, \ - huge_pte_wrprotect(__pte)); \ - } \ -}) - -static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep) +static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot) { - huge_ptep_invalidate(vma->vm_mm, address, ptep); + return mk_pte(page, pgprot); } -static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot) +static inline int huge_pte_none(pte_t pte) { - pte_t pte; - pmd_t pmd; - - pmd = mk_pmd_phys(page_to_phys(page), pgprot); - pte_val(pte) = pmd_val(pmd); - return pte; + return pte_none(pte); } static inline int huge_pte_write(pte_t pte) { - pmd_t pmd; - - pmd_val(pmd) = pte_val(pte); - return pmd_write(pmd); + return pte_write(pte); } static inline int huge_pte_dirty(pte_t pte) { - /* No dirty bit in the segment table entry. */ - return 0; + return pte_dirty(pte); } static inline pte_t huge_pte_mkwrite(pte_t pte) { - pmd_t pmd; - - pmd_val(pmd) = pte_val(pte); - pte_val(pte) = pmd_val(pmd_mkwrite(pmd)); - return pte; + return pte_mkwrite(pte); } static inline pte_t huge_pte_mkdirty(pte_t pte) { - /* No dirty bit in the segment table entry. */ - return pte; + return pte_mkdirty(pte); } -static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot) +static inline pte_t huge_pte_wrprotect(pte_t pte) { - pmd_t pmd; - - pmd_val(pmd) = pte_val(pte); - pte_val(pte) = pmd_val(pmd_modify(pmd, newprot)); - return pte; + return pte_wrprotect(pte); } -static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) +static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot) { - pmd_clear((pmd_t *) ptep); + return pte_modify(pte, newprot); } #endif /* _ASM_S390_HUGETLB_H */ diff --git a/arch/s390/include/asm/hw_irq.h b/arch/s390/include/asm/hw_irq.h index 7e3d2586c1ff..ee96a8b697f9 100644 --- a/arch/s390/include/asm/hw_irq.h +++ b/arch/s390/include/asm/hw_irq.h @@ -4,19 +4,8 @@ #include <linux/msi.h> #include <linux/pci.h> -static inline struct msi_desc *irq_get_msi_desc(unsigned int irq) -{ - return __irq_get_msi_desc(irq); -} - -/* Must be called with msi map lock held */ -static inline int irq_set_msi_desc(unsigned int irq, struct msi_desc *msi) -{ - if (!msi) - return -EINVAL; - - msi->irq = irq; - return 0; -} +void __init init_airq_interrupts(void); +void __init init_cio_interrupts(void); +void __init init_ext_interrupts(void); #endif diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index 87c17bfb2968..1eaa3625803c 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -1,17 +1,28 @@ #ifndef _ASM_IRQ_H #define _ASM_IRQ_H +#define EXT_INTERRUPT 1 +#define IO_INTERRUPT 2 +#define THIN_INTERRUPT 3 + +#define NR_IRQS_BASE 4 + +#ifdef CONFIG_PCI_NR_MSI +# define NR_IRQS (NR_IRQS_BASE + CONFIG_PCI_NR_MSI) +#else +# define NR_IRQS NR_IRQS_BASE +#endif + +/* This number is used when no interrupt has been assigned */ +#define NO_IRQ 0 + +#ifndef __ASSEMBLY__ + #include <linux/hardirq.h> #include <linux/percpu.h> #include <linux/cache.h> #include <linux/types.h> -enum interruption_main_class { - EXTERNAL_INTERRUPT, - IO_INTERRUPT, - NR_IRQS -}; - enum interruption_class { IRQEXT_CLK, IRQEXT_EXC, @@ -72,14 +83,8 @@ void service_subclass_irq_unregister(void); void measurement_alert_subclass_register(void); void measurement_alert_subclass_unregister(void); -#ifdef CONFIG_LOCKDEP -# define disable_irq_nosync_lockdep(irq) disable_irq_nosync(irq) -# define disable_irq_nosync_lockdep_irqsave(irq, flags) \ - disable_irq_nosync(irq) -# define disable_irq_lockdep(irq) disable_irq(irq) -# define enable_irq_lockdep(irq) enable_irq(irq) -# define enable_irq_lockdep_irqrestore(irq, flags) \ - enable_irq(irq) -#endif +#define irq_canonicalize(irq) (irq) + +#endif /* __ASSEMBLY__ */ #endif /* _ASM_IRQ_H */ diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 3238d4004e84..e87ecaa2c569 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -274,6 +274,14 @@ struct kvm_arch{ int css_support; }; +#define KVM_HVA_ERR_BAD (-1UL) +#define KVM_HVA_ERR_RO_BAD (-2UL) + +static inline bool kvm_is_error_hva(unsigned long addr) +{ + return IS_ERR_VALUE(addr); +} + extern int sie64a(struct kvm_s390_sie_block *, u64 *); extern char sie_exit; #endif diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 6340178748bf..ff132ac64ddd 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -12,8 +12,6 @@ typedef struct { unsigned long asce_bits; unsigned long asce_limit; unsigned long vdso_base; - /* Cloned contexts will be created with extended page tables. */ - unsigned int alloc_pgste:1; /* The mmu context has extended page tables. */ unsigned int has_pgste:1; } mm_context_t; diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 084e7755ed9b..9f973d8de90e 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -21,24 +21,7 @@ static inline int init_new_context(struct task_struct *tsk, #ifdef CONFIG_64BIT mm->context.asce_bits |= _ASCE_TYPE_REGION3; #endif - if (current->mm && current->mm->context.alloc_pgste) { - /* - * alloc_pgste indicates, that any NEW context will be created - * with extended page tables. The old context is unchanged. The - * page table allocation and the page table operations will - * look at has_pgste to distinguish normal and extended page - * tables. The only way to create extended page tables is to - * set alloc_pgste and then create a new context (e.g. dup_mm). - * The page table allocation is called after init_new_context - * and if has_pgste is set, it will create extended page - * tables. - */ - mm->context.has_pgste = 1; - mm->context.alloc_pgste = 1; - } else { - mm->context.has_pgste = 0; - mm->context.alloc_pgste = 0; - } + mm->context.has_pgste = 0; mm->context.asce_limit = STACK_TOP_MAX; crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); return 0; @@ -77,8 +60,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, WARN_ON(atomic_read(&prev->context.attach_count) < 0); atomic_inc(&next->context.attach_count); /* Check for TLBs not flushed yet */ - if (next->context.flush_mm) - __tlb_flush_mm(next); + __tlb_flush_mm_lazy(next); } #define enter_lazy_tlb(mm,tsk) do { } while (0) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 5d64fb7619cc..1e51f2915b2e 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -32,16 +32,6 @@ void storage_key_init_range(unsigned long start, unsigned long end); -static inline unsigned long pfmf(unsigned long function, unsigned long address) -{ - asm volatile( - " .insn rre,0xb9af0000,%[function],%[address]" - : [address] "+a" (address) - : [function] "d" (function) - : "memory"); - return address; -} - static inline void clear_page(void *page) { register unsigned long reg1 asm ("1") = 0; @@ -150,15 +140,6 @@ static inline int page_reset_referenced(unsigned long addr) #define _PAGE_FP_BIT 0x08 /* HW fetch protection bit */ #define _PAGE_ACC_BITS 0xf0 /* HW access control bits */ -/* - * Test and clear referenced bit in storage key. - */ -#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG -static inline int page_test_and_clear_young(unsigned long pfn) -{ - return page_reset_referenced(pfn << PAGE_SHIFT); -} - struct page; void arch_free_page(struct page *page, int order); void arch_alloc_page(struct page *page, int order); diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 6e577ba0e5da..c290f13d1c47 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -6,6 +6,7 @@ /* must be set before including pci_clp.h */ #define PCI_BAR_COUNT 6 +#include <linux/pci.h> #include <asm-generic/pci.h> #include <asm-generic/pci-dma-compat.h> #include <asm/pci_clp.h> @@ -53,14 +54,9 @@ struct zpci_fmb { atomic64_t unmapped_pages; } __packed __aligned(16); -struct msi_map { - unsigned long irq; - struct msi_desc *msi; - struct hlist_node msi_chain; -}; - -#define ZPCI_NR_MSI_VECS 64 -#define ZPCI_MSI_MASK (ZPCI_NR_MSI_VECS - 1) +#define ZPCI_MSI_VEC_BITS 11 +#define ZPCI_MSI_VEC_MAX (1 << ZPCI_MSI_VEC_BITS) +#define ZPCI_MSI_VEC_MASK (ZPCI_MSI_VEC_MAX - 1) enum zpci_state { ZPCI_FN_STATE_RESERVED, @@ -91,8 +87,7 @@ struct zpci_dev { /* IRQ stuff */ u64 msi_addr; /* MSI address */ - struct zdev_irq_map *irq_map; - struct msi_map *msi_map[ZPCI_NR_MSI_VECS]; + struct airq_iv *aibv; /* adapter interrupt bit vector */ unsigned int aisb; /* number of the summary bit */ /* DMA stuff */ @@ -122,11 +117,6 @@ struct zpci_dev { struct dentry *debugfs_perf; }; -struct pci_hp_callback_ops { - int (*create_slot) (struct zpci_dev *zdev); - void (*remove_slot) (struct zpci_dev *zdev); -}; - static inline bool zdev_enabled(struct zpci_dev *zdev) { return (zdev->fh & (1UL << 31)) ? true : false; @@ -146,32 +136,38 @@ int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); int zpci_unregister_ioat(struct zpci_dev *, u8); /* CLP */ -int clp_find_pci_devices(void); +int clp_scan_pci_devices(void); +int clp_rescan_pci_devices(void); +int clp_rescan_pci_devices_simple(void); int clp_add_pci_device(u32, u32, int); int clp_enable_fh(struct zpci_dev *, u8); int clp_disable_fh(struct zpci_dev *); -/* MSI */ -struct msi_desc *__irq_get_msi_desc(unsigned int); -int zpci_msi_set_mask_bits(struct msi_desc *, u32, u32); -int zpci_setup_msi_irq(struct zpci_dev *, struct msi_desc *, unsigned int, int); -void zpci_teardown_msi_irq(struct zpci_dev *, struct msi_desc *); -int zpci_msihash_init(void); -void zpci_msihash_exit(void); - #ifdef CONFIG_PCI /* Error handling and recovery */ void zpci_event_error(void *); void zpci_event_availability(void *); +void zpci_rescan(void); #else /* CONFIG_PCI */ static inline void zpci_event_error(void *e) {} static inline void zpci_event_availability(void *e) {} +static inline void zpci_rescan(void) {} #endif /* CONFIG_PCI */ +#ifdef CONFIG_HOTPLUG_PCI_S390 +int zpci_init_slot(struct zpci_dev *); +void zpci_exit_slot(struct zpci_dev *); +#else /* CONFIG_HOTPLUG_PCI_S390 */ +static inline int zpci_init_slot(struct zpci_dev *zdev) +{ + return 0; +} +static inline void zpci_exit_slot(struct zpci_dev *zdev) {} +#endif /* CONFIG_HOTPLUG_PCI_S390 */ + /* Helpers */ struct zpci_dev *get_zdev(struct pci_dev *); struct zpci_dev *get_zdev_by_fid(u32); -bool zpci_fid_present(u32); /* sysfs */ int zpci_sysfs_add_device(struct device *); @@ -181,14 +177,6 @@ void zpci_sysfs_remove_device(struct device *); int zpci_dma_init(void); void zpci_dma_exit(void); -/* Hotplug */ -extern struct mutex zpci_list_lock; -extern struct list_head zpci_list; -extern unsigned int s390_pci_probe; - -void zpci_register_hp_ops(struct pci_hp_callback_ops *); -void zpci_deregister_hp_ops(void); - /* FMB */ int zpci_fmb_enable_device(struct zpci_dev *); int zpci_fmb_disable_device(struct zpci_dev *); diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h index e6a2bdd4d705..df6eac9f0cb4 100644 --- a/arch/s390/include/asm/pci_insn.h +++ b/arch/s390/include/asm/pci_insn.h @@ -79,11 +79,11 @@ struct zpci_fib { } __packed; -int s390pci_mod_fc(u64 req, struct zpci_fib *fib); -int s390pci_refresh_trans(u64 fn, u64 addr, u64 range); -int s390pci_load(u64 *data, u64 req, u64 offset); -int s390pci_store(u64 data, u64 req, u64 offset); -int s390pci_store_block(const u64 *data, u64 req, u64 offset); -void set_irq_ctrl(u16 ctl, char *unused, u8 isc); +int zpci_mod_fc(u64 req, struct zpci_fib *fib); +int zpci_refresh_trans(u64 fn, u64 addr, u64 range); +int zpci_load(u64 *data, u64 req, u64 offset); +int zpci_store(u64 data, u64 req, u64 offset); +int zpci_store_block(const u64 *data, u64 req, u64 offset); +void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc); #endif diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h index 83a9caa6ae53..d194d544d694 100644 --- a/arch/s390/include/asm/pci_io.h +++ b/arch/s390/include/asm/pci_io.h @@ -36,7 +36,7 @@ static inline RETTYPE zpci_read_##RETTYPE(const volatile void __iomem *addr) \ u64 data; \ int rc; \ \ - rc = s390pci_load(&data, req, ZPCI_OFFSET(addr)); \ + rc = zpci_load(&data, req, ZPCI_OFFSET(addr)); \ if (rc) \ data = -1ULL; \ return (RETTYPE) data; \ @@ -50,7 +50,7 @@ static inline void zpci_write_##VALTYPE(VALTYPE val, \ u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH); \ u64 data = (VALTYPE) val; \ \ - s390pci_store(data, req, ZPCI_OFFSET(addr)); \ + zpci_store(data, req, ZPCI_OFFSET(addr)); \ } zpci_read(8, u64) @@ -83,7 +83,7 @@ static inline int zpci_write_single(u64 req, const u64 *data, u64 offset, u8 len val = 0; /* let FW report error */ break; } - return s390pci_store(val, req, offset); + return zpci_store(val, req, offset); } static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len) @@ -91,7 +91,7 @@ static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len) u64 data; int cc; - cc = s390pci_load(&data, req, offset); + cc = zpci_load(&data, req, offset); if (cc) goto out; @@ -115,7 +115,7 @@ out: static inline int zpci_write_block(u64 req, const u64 *data, u64 offset) { - return s390pci_store_block(data, req, offset); + return zpci_store_block(data, req, offset); } static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 75fb726de91f..9b60a36c348d 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -217,63 +217,57 @@ extern unsigned long MODULES_END; /* Hardware bits in the page table entry */ #define _PAGE_CO 0x100 /* HW Change-bit override */ -#define _PAGE_RO 0x200 /* HW read-only bit */ +#define _PAGE_PROTECT 0x200 /* HW read-only bit */ #define _PAGE_INVALID 0x400 /* HW invalid bit */ +#define _PAGE_LARGE 0x800 /* Bit to mark a large pte */ /* Software bits in the page table entry */ -#define _PAGE_SWT 0x001 /* SW pte type bit t */ -#define _PAGE_SWX 0x002 /* SW pte type bit x */ -#define _PAGE_SWC 0x004 /* SW pte changed bit */ -#define _PAGE_SWR 0x008 /* SW pte referenced bit */ -#define _PAGE_SWW 0x010 /* SW pte write bit */ -#define _PAGE_SPECIAL 0x020 /* SW associated with special page */ +#define _PAGE_PRESENT 0x001 /* SW pte present bit */ +#define _PAGE_TYPE 0x002 /* SW pte type bit */ +#define _PAGE_YOUNG 0x004 /* SW pte young bit */ +#define _PAGE_DIRTY 0x008 /* SW pte dirty bit */ +#define _PAGE_READ 0x010 /* SW pte read bit */ +#define _PAGE_WRITE 0x020 /* SW pte write bit */ +#define _PAGE_SPECIAL 0x040 /* SW associated with special page */ #define __HAVE_ARCH_PTE_SPECIAL /* Set of bits not changed in pte_modify */ #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_CO | \ - _PAGE_SWC | _PAGE_SWR) - -/* Six different types of pages. */ -#define _PAGE_TYPE_EMPTY 0x400 -#define _PAGE_TYPE_NONE 0x401 -#define _PAGE_TYPE_SWAP 0x403 -#define _PAGE_TYPE_FILE 0x601 /* bit 0x002 is used for offset !! */ -#define _PAGE_TYPE_RO 0x200 -#define _PAGE_TYPE_RW 0x000 - -/* - * Only four types for huge pages, using the invalid bit and protection bit - * of a segment table entry. - */ -#define _HPAGE_TYPE_EMPTY 0x020 /* _SEGMENT_ENTRY_INV */ -#define _HPAGE_TYPE_NONE 0x220 -#define _HPAGE_TYPE_RO 0x200 /* _SEGMENT_ENTRY_RO */ -#define _HPAGE_TYPE_RW 0x000 + _PAGE_DIRTY | _PAGE_YOUNG) /* - * PTE type bits are rather complicated. handle_pte_fault uses pte_present, - * pte_none and pte_file to find out the pte type WITHOUT holding the page - * table lock. ptep_clear_flush on the other hand uses ptep_clear_flush to - * invalidate a given pte. ipte sets the hw invalid bit and clears all tlbs - * for the page. The page table entry is set to _PAGE_TYPE_EMPTY afterwards. - * This change is done while holding the lock, but the intermediate step - * of a previously valid pte with the hw invalid bit set can be observed by - * handle_pte_fault. That makes it necessary that all valid pte types with - * the hw invalid bit set must be distinguishable from the four pte types - * empty, none, swap and file. + * handle_pte_fault uses pte_present, pte_none and pte_file to find out the + * pte type WITHOUT holding the page table lock. The _PAGE_PRESENT bit + * is used to distinguish present from not-present ptes. It is changed only + * with the page table lock held. + * + * The following table gives the different possible bit combinations for + * the pte hardware and software bits in the last 12 bits of a pte: * - * irxt ipte irxt - * _PAGE_TYPE_EMPTY 1000 -> 1000 - * _PAGE_TYPE_NONE 1001 -> 1001 - * _PAGE_TYPE_SWAP 1011 -> 1011 - * _PAGE_TYPE_FILE 11?1 -> 11?1 - * _PAGE_TYPE_RO 0100 -> 1100 - * _PAGE_TYPE_RW 0000 -> 1000 + * 842100000000 + * 000084210000 + * 000000008421 + * .IR...wrdytp + * empty .10...000000 + * swap .10...xxxx10 + * file .11...xxxxx0 + * prot-none, clean, old .11...000001 + * prot-none, clean, young .11...000101 + * prot-none, dirty, old .10...001001 + * prot-none, dirty, young .10...001101 + * read-only, clean, old .11...010001 + * read-only, clean, young .01...010101 + * read-only, dirty, old .11...011001 + * read-only, dirty, young .01...011101 + * read-write, clean, old .11...110001 + * read-write, clean, young .01...110101 + * read-write, dirty, old .10...111001 + * read-write, dirty, young .00...111101 * - * pte_none is true for bits combinations 1000, 1010, 1100, 1110 - * pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001 - * pte_file is true for bits combinations 1101, 1111 - * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid. + * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001 + * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400 + * pte_file is true for the bit pattern .11...xxxxx0, (pte & 0x601) == 0x600 + * pte_swap is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402 */ #ifndef CONFIG_64BIT @@ -286,14 +280,25 @@ extern unsigned long MODULES_END; #define _ASCE_TABLE_LENGTH 0x7f /* 128 x 64 entries = 8k */ /* Bits in the segment table entry */ +#define _SEGMENT_ENTRY_BITS 0x7fffffffUL /* Valid segment table bits */ #define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */ -#define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */ -#define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */ +#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */ +#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */ #define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */ #define _SEGMENT_ENTRY_PTL 0x0f /* page table length */ +#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_PROTECT #define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL) -#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) +#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID) + +/* + * Segment table entry encoding (I = invalid, R = read-only bit): + * ..R...I..... + * prot-none ..1...1..... + * read-only ..1...0..... + * read-write ..0...0..... + * empty ..0...1..... + */ /* Page status table bits for virtualization */ #define PGSTE_ACC_BITS 0xf0000000UL @@ -303,9 +308,7 @@ extern unsigned long MODULES_END; #define PGSTE_HC_BIT 0x00200000UL #define PGSTE_GR_BIT 0x00040000UL #define PGSTE_GC_BIT 0x00020000UL -#define PGSTE_UR_BIT 0x00008000UL -#define PGSTE_UC_BIT 0x00004000UL /* user dirty (migration) */ -#define PGSTE_IN_BIT 0x00002000UL /* IPTE notify bit */ +#define PGSTE_IN_BIT 0x00008000UL /* IPTE notify bit */ #else /* CONFIG_64BIT */ @@ -324,8 +327,8 @@ extern unsigned long MODULES_END; /* Bits in the region table entry */ #define _REGION_ENTRY_ORIGIN ~0xfffUL/* region/segment table origin */ -#define _REGION_ENTRY_RO 0x200 /* region protection bit */ -#define _REGION_ENTRY_INV 0x20 /* invalid region table entry */ +#define _REGION_ENTRY_PROTECT 0x200 /* region protection bit */ +#define _REGION_ENTRY_INVALID 0x20 /* invalid region table entry */ #define _REGION_ENTRY_TYPE_MASK 0x0c /* region/segment table type mask */ #define _REGION_ENTRY_TYPE_R1 0x0c /* region first table type */ #define _REGION_ENTRY_TYPE_R2 0x08 /* region second table type */ @@ -333,29 +336,47 @@ extern unsigned long MODULES_END; #define _REGION_ENTRY_LENGTH 0x03 /* region third length */ #define _REGION1_ENTRY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_LENGTH) -#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INV) +#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID) #define _REGION2_ENTRY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH) -#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INV) +#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID) #define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH) -#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INV) +#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID) #define _REGION3_ENTRY_LARGE 0x400 /* RTTE-format control, large page */ #define _REGION3_ENTRY_RO 0x200 /* page protection bit */ #define _REGION3_ENTRY_CO 0x100 /* change-recording override */ /* Bits in the segment table entry */ +#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL +#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff1ff33UL #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */ #define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */ -#define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */ -#define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */ +#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */ +#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */ #define _SEGMENT_ENTRY (0) -#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) +#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID) #define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */ #define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */ +#define _SEGMENT_ENTRY_SPLIT 0x001 /* THP splitting bit */ +#define _SEGMENT_ENTRY_YOUNG 0x002 /* SW segment young bit */ +#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_YOUNG + +/* + * Segment table entry encoding (R = read-only, I = invalid, y = young bit): + * ..R...I...y. + * prot-none, old ..0...1...1. + * prot-none, young ..1...1...1. + * read-only, old ..1...1...0. + * read-only, young ..1...0...1. + * read-write, old ..0...1...0. + * read-write, young ..0...0...1. + * The segment table origin is used to distinguish empty (origin==0) from + * read-write, old segment table entries (origin!=0) + */ + #define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */ -#define _SEGMENT_ENTRY_SPLIT (1UL << _SEGMENT_ENTRY_SPLIT_BIT) /* Set of bits not changed in pmd_modify */ #define _SEGMENT_CHG_MASK (_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \ @@ -369,9 +390,7 @@ extern unsigned long MODULES_END; #define PGSTE_HC_BIT 0x0020000000000000UL #define PGSTE_GR_BIT 0x0004000000000000UL #define PGSTE_GC_BIT 0x0002000000000000UL -#define PGSTE_UR_BIT 0x0000800000000000UL -#define PGSTE_UC_BIT 0x0000400000000000UL /* user dirty (migration) */ -#define PGSTE_IN_BIT 0x0000200000000000UL /* IPTE notify bit */ +#define PGSTE_IN_BIT 0x0000800000000000UL /* IPTE notify bit */ #endif /* CONFIG_64BIT */ @@ -386,14 +405,18 @@ extern unsigned long MODULES_END; /* * Page protection definitions. */ -#define PAGE_NONE __pgprot(_PAGE_TYPE_NONE) -#define PAGE_RO __pgprot(_PAGE_TYPE_RO) -#define PAGE_RW __pgprot(_PAGE_TYPE_RO | _PAGE_SWW) -#define PAGE_RWC __pgprot(_PAGE_TYPE_RW | _PAGE_SWW | _PAGE_SWC) - -#define PAGE_KERNEL PAGE_RWC -#define PAGE_SHARED PAGE_KERNEL -#define PAGE_COPY PAGE_RO +#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID) +#define PAGE_READ __pgprot(_PAGE_PRESENT | _PAGE_READ | \ + _PAGE_INVALID | _PAGE_PROTECT) +#define PAGE_WRITE __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ + _PAGE_INVALID | _PAGE_PROTECT) + +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ + _PAGE_YOUNG | _PAGE_DIRTY) +#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ + _PAGE_YOUNG | _PAGE_DIRTY) +#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \ + _PAGE_PROTECT) /* * On s390 the page table entry has an invalid bit and a read-only bit. @@ -402,35 +425,31 @@ extern unsigned long MODULES_END; */ /*xwr*/ #define __P000 PAGE_NONE -#define __P001 PAGE_RO -#define __P010 PAGE_RO -#define __P011 PAGE_RO -#define __P100 PAGE_RO -#define __P101 PAGE_RO -#define __P110 PAGE_RO -#define __P111 PAGE_RO +#define __P001 PAGE_READ +#define __P010 PAGE_READ +#define __P011 PAGE_READ +#define __P100 PAGE_READ +#define __P101 PAGE_READ +#define __P110 PAGE_READ +#define __P111 PAGE_READ #define __S000 PAGE_NONE -#define __S001 PAGE_RO -#define __S010 PAGE_RW -#define __S011 PAGE_RW -#define __S100 PAGE_RO -#define __S101 PAGE_RO -#define __S110 PAGE_RW -#define __S111 PAGE_RW +#define __S001 PAGE_READ +#define __S010 PAGE_WRITE +#define __S011 PAGE_WRITE +#define __S100 PAGE_READ +#define __S101 PAGE_READ +#define __S110 PAGE_WRITE +#define __S111 PAGE_WRITE /* * Segment entry (large page) protection definitions. */ -#define SEGMENT_NONE __pgprot(_HPAGE_TYPE_NONE) -#define SEGMENT_RO __pgprot(_HPAGE_TYPE_RO) -#define SEGMENT_RW __pgprot(_HPAGE_TYPE_RW) - -static inline int mm_exclusive(struct mm_struct *mm) -{ - return likely(mm == current->active_mm && - atomic_read(&mm->context.attach_count) <= 1); -} +#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \ + _SEGMENT_ENTRY_NONE) +#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_INVALID | \ + _SEGMENT_ENTRY_PROTECT) +#define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_INVALID) static inline int mm_has_pgste(struct mm_struct *mm) { @@ -467,7 +486,7 @@ static inline int pgd_none(pgd_t pgd) { if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) return 0; - return (pgd_val(pgd) & _REGION_ENTRY_INV) != 0UL; + return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL; } static inline int pgd_bad(pgd_t pgd) @@ -478,7 +497,7 @@ static inline int pgd_bad(pgd_t pgd) * invalid for either table entry. */ unsigned long mask = - ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & + ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID & ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; return (pgd_val(pgd) & mask) != 0; } @@ -494,7 +513,7 @@ static inline int pud_none(pud_t pud) { if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) return 0; - return (pud_val(pud) & _REGION_ENTRY_INV) != 0UL; + return (pud_val(pud) & _REGION_ENTRY_INVALID) != 0UL; } static inline int pud_large(pud_t pud) @@ -512,7 +531,7 @@ static inline int pud_bad(pud_t pud) * invalid for either table entry. */ unsigned long mask = - ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & + ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID & ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; return (pud_val(pud) & mask) != 0; } @@ -521,30 +540,36 @@ static inline int pud_bad(pud_t pud) static inline int pmd_present(pmd_t pmd) { - unsigned long mask = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO; - return (pmd_val(pmd) & mask) == _HPAGE_TYPE_NONE || - !(pmd_val(pmd) & _SEGMENT_ENTRY_INV); + return pmd_val(pmd) != _SEGMENT_ENTRY_INVALID; } static inline int pmd_none(pmd_t pmd) { - return (pmd_val(pmd) & _SEGMENT_ENTRY_INV) && - !(pmd_val(pmd) & _SEGMENT_ENTRY_RO); + return pmd_val(pmd) == _SEGMENT_ENTRY_INVALID; } static inline int pmd_large(pmd_t pmd) { #ifdef CONFIG_64BIT - return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE); + return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; #else return 0; #endif } +static inline int pmd_prot_none(pmd_t pmd) +{ + return (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) && + (pmd_val(pmd) & _SEGMENT_ENTRY_NONE); +} + static inline int pmd_bad(pmd_t pmd) { - unsigned long mask = ~_SEGMENT_ENTRY_ORIGIN & ~_SEGMENT_ENTRY_INV; - return (pmd_val(pmd) & mask) != _SEGMENT_ENTRY; +#ifdef CONFIG_64BIT + if (pmd_large(pmd)) + return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0; +#endif + return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0; } #define __HAVE_ARCH_PMDP_SPLITTING_FLUSH @@ -563,31 +588,40 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma, #define __HAVE_ARCH_PMD_WRITE static inline int pmd_write(pmd_t pmd) { - return (pmd_val(pmd) & _SEGMENT_ENTRY_RO) == 0; + if (pmd_prot_none(pmd)) + return 0; + return (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) == 0; } static inline int pmd_young(pmd_t pmd) { - return 0; + int young = 0; +#ifdef CONFIG_64BIT + if (pmd_prot_none(pmd)) + young = (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) != 0; + else + young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0; +#endif + return young; } -static inline int pte_none(pte_t pte) +static inline int pte_present(pte_t pte) { - return (pte_val(pte) & _PAGE_INVALID) && !(pte_val(pte) & _PAGE_SWT); + /* Bit pattern: (pte & 0x001) == 0x001 */ + return (pte_val(pte) & _PAGE_PRESENT) != 0; } -static inline int pte_present(pte_t pte) +static inline int pte_none(pte_t pte) { - unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT | _PAGE_SWX; - return (pte_val(pte) & mask) == _PAGE_TYPE_NONE || - (!(pte_val(pte) & _PAGE_INVALID) && - !(pte_val(pte) & _PAGE_SWT)); + /* Bit pattern: pte == 0x400 */ + return pte_val(pte) == _PAGE_INVALID; } static inline int pte_file(pte_t pte) { - unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT; - return (pte_val(pte) & mask) == _PAGE_TYPE_FILE; + /* Bit pattern: (pte & 0x601) == 0x600 */ + return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | _PAGE_PRESENT)) + == (_PAGE_INVALID | _PAGE_PROTECT); } static inline int pte_special(pte_t pte) @@ -634,6 +668,15 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) #endif } +static inline pgste_t pgste_get(pte_t *ptep) +{ + unsigned long pgste = 0; +#ifdef CONFIG_PGSTE + pgste = *(unsigned long *)(ptep + PTRS_PER_PTE); +#endif + return __pgste(pgste); +} + static inline void pgste_set(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE @@ -644,33 +687,28 @@ static inline void pgste_set(pte_t *ptep, pgste_t pgste) static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - unsigned long address, bits; - unsigned char skey; + unsigned long address, bits, skey; if (pte_val(*ptep) & _PAGE_INVALID) return pgste; address = pte_val(*ptep) & PAGE_MASK; - skey = page_get_storage_key(address); + skey = (unsigned long) page_get_storage_key(address); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); - /* Clear page changed & referenced bit in the storage key */ - if (bits & _PAGE_CHANGED) + if (!(pgste_val(pgste) & PGSTE_HC_BIT) && (bits & _PAGE_CHANGED)) { + /* Transfer dirty + referenced bit to host bits in pgste */ + pgste_val(pgste) |= bits << 52; page_set_storage_key(address, skey ^ bits, 0); - else if (bits) + } else if (!(pgste_val(pgste) & PGSTE_HR_BIT) && + (bits & _PAGE_REFERENCED)) { + /* Transfer referenced bit to host bit in pgste */ + pgste_val(pgste) |= PGSTE_HR_BIT; page_reset_referenced(address); + } /* Transfer page changed & referenced bit to guest bits in pgste */ pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ - /* Get host changed & referenced bits from pgste */ - bits |= (pgste_val(pgste) & (PGSTE_HR_BIT | PGSTE_HC_BIT)) >> 52; - /* Transfer page changed & referenced bit to kvm user bits */ - pgste_val(pgste) |= bits << 45; /* PGSTE_UR_BIT & PGSTE_UC_BIT */ - /* Clear relevant host bits in pgste. */ - pgste_val(pgste) &= ~(PGSTE_HR_BIT | PGSTE_HC_BIT); - pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); /* Copy page access key and fetch protection bit to pgste */ - pgste_val(pgste) |= - (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; - /* Transfer referenced bit to pte */ - pte_val(*ptep) |= (bits & _PAGE_REFERENCED) << 1; + pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; #endif return pgste; @@ -679,24 +717,11 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - int young; - if (pte_val(*ptep) & _PAGE_INVALID) return pgste; /* Get referenced bit from storage key */ - young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); - if (young) - pgste_val(pgste) |= PGSTE_GR_BIT; - /* Get host referenced bit from pgste */ - if (pgste_val(pgste) & PGSTE_HR_BIT) { - pgste_val(pgste) &= ~PGSTE_HR_BIT; - young = 1; - } - /* Transfer referenced bit to kvm user bits and pte */ - if (young) { - pgste_val(pgste) |= PGSTE_UR_BIT; - pte_val(*ptep) |= _PAGE_SWR; - } + if (page_reset_referenced(pte_val(*ptep) & PAGE_MASK)) + pgste_val(pgste) |= PGSTE_HR_BIT | PGSTE_GR_BIT; #endif return pgste; } @@ -723,13 +748,13 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry) static inline void pgste_set_pte(pte_t *ptep, pte_t entry) { - if (!MACHINE_HAS_ESOP && (pte_val(entry) & _PAGE_SWW)) { + if (!MACHINE_HAS_ESOP && (pte_val(entry) & _PAGE_WRITE)) { /* * Without enhanced suppression-on-protection force * the dirty bit on for all writable ptes. */ - pte_val(entry) |= _PAGE_SWC; - pte_val(entry) &= ~_PAGE_RO; + pte_val(entry) |= _PAGE_DIRTY; + pte_val(entry) &= ~_PAGE_PROTECT; } *ptep = entry; } @@ -841,21 +866,17 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, */ static inline int pte_write(pte_t pte) { - return (pte_val(pte) & _PAGE_SWW) != 0; + return (pte_val(pte) & _PAGE_WRITE) != 0; } static inline int pte_dirty(pte_t pte) { - return (pte_val(pte) & _PAGE_SWC) != 0; + return (pte_val(pte) & _PAGE_DIRTY) != 0; } static inline int pte_young(pte_t pte) { -#ifdef CONFIG_PGSTE - if (pte_val(pte) & _PAGE_SWR) - return 1; -#endif - return 0; + return (pte_val(pte) & _PAGE_YOUNG) != 0; } /* @@ -880,12 +901,12 @@ static inline void pud_clear(pud_t *pud) static inline void pmd_clear(pmd_t *pmdp) { - pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; + pmd_val(*pmdp) = _SEGMENT_ENTRY_INVALID; } static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_val(*ptep) = _PAGE_TYPE_EMPTY; + pte_val(*ptep) = _PAGE_INVALID; } /* @@ -896,55 +917,63 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { pte_val(pte) &= _PAGE_CHG_MASK; pte_val(pte) |= pgprot_val(newprot); - if ((pte_val(pte) & _PAGE_SWC) && (pte_val(pte) & _PAGE_SWW)) - pte_val(pte) &= ~_PAGE_RO; + /* + * newprot for PAGE_NONE, PAGE_READ and PAGE_WRITE has the + * invalid bit set, clear it again for readable, young pages + */ + if ((pte_val(pte) & _PAGE_YOUNG) && (pte_val(pte) & _PAGE_READ)) + pte_val(pte) &= ~_PAGE_INVALID; + /* + * newprot for PAGE_READ and PAGE_WRITE has the page protection + * bit set, clear it again for writable, dirty pages + */ + if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE)) + pte_val(pte) &= ~_PAGE_PROTECT; return pte; } static inline pte_t pte_wrprotect(pte_t pte) { - pte_val(pte) &= ~_PAGE_SWW; - /* Do not clobber _PAGE_TYPE_NONE pages! */ - if (!(pte_val(pte) & _PAGE_INVALID)) - pte_val(pte) |= _PAGE_RO; + pte_val(pte) &= ~_PAGE_WRITE; + pte_val(pte) |= _PAGE_PROTECT; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { - pte_val(pte) |= _PAGE_SWW; - if (pte_val(pte) & _PAGE_SWC) - pte_val(pte) &= ~_PAGE_RO; + pte_val(pte) |= _PAGE_WRITE; + if (pte_val(pte) & _PAGE_DIRTY) + pte_val(pte) &= ~_PAGE_PROTECT; return pte; } static inline pte_t pte_mkclean(pte_t pte) { - pte_val(pte) &= ~_PAGE_SWC; - /* Do not clobber _PAGE_TYPE_NONE pages! */ - if (!(pte_val(pte) & _PAGE_INVALID)) - pte_val(pte) |= _PAGE_RO; + pte_val(pte) &= ~_PAGE_DIRTY; + pte_val(pte) |= _PAGE_PROTECT; return pte; } static inline pte_t pte_mkdirty(pte_t pte) { - pte_val(pte) |= _PAGE_SWC; - if (pte_val(pte) & _PAGE_SWW) - pte_val(pte) &= ~_PAGE_RO; + pte_val(pte) |= _PAGE_DIRTY; + if (pte_val(pte) & _PAGE_WRITE) + pte_val(pte) &= ~_PAGE_PROTECT; return pte; } static inline pte_t pte_mkold(pte_t pte) { -#ifdef CONFIG_PGSTE - pte_val(pte) &= ~_PAGE_SWR; -#endif + pte_val(pte) &= ~_PAGE_YOUNG; + pte_val(pte) |= _PAGE_INVALID; return pte; } static inline pte_t pte_mkyoung(pte_t pte) { + pte_val(pte) |= _PAGE_YOUNG; + if (pte_val(pte) & _PAGE_READ) + pte_val(pte) &= ~_PAGE_INVALID; return pte; } @@ -957,7 +986,7 @@ static inline pte_t pte_mkspecial(pte_t pte) #ifdef CONFIG_HUGETLB_PAGE static inline pte_t pte_mkhuge(pte_t pte) { - pte_val(pte) |= (_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO); + pte_val(pte) |= _PAGE_LARGE; return pte; } #endif @@ -974,8 +1003,8 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); pgste = pgste_update_all(ptep, pgste); - dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); - pgste_val(pgste) &= ~PGSTE_UC_BIT; + dirty = !!(pgste_val(pgste) & PGSTE_HC_BIT); + pgste_val(pgste) &= ~PGSTE_HC_BIT; pgste_set_unlock(ptep, pgste); return dirty; } @@ -994,59 +1023,75 @@ static inline int ptep_test_and_clear_user_young(struct mm_struct *mm, if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); pgste = pgste_update_young(ptep, pgste); - young = !!(pgste_val(pgste) & PGSTE_UR_BIT); - pgste_val(pgste) &= ~PGSTE_UR_BIT; + young = !!(pgste_val(pgste) & PGSTE_HR_BIT); + pgste_val(pgste) &= ~PGSTE_HR_BIT; pgste_set_unlock(ptep, pgste); } return young; } +static inline void __ptep_ipte(unsigned long address, pte_t *ptep) +{ + if (!(pte_val(*ptep) & _PAGE_INVALID)) { +#ifndef CONFIG_64BIT + /* pto must point to the start of the segment table */ + pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00); +#else + /* ipte in zarch mode can do the math */ + pte_t *pto = ptep; +#endif + asm volatile( + " ipte %2,%3" + : "=m" (*ptep) : "m" (*ptep), + "a" (pto), "a" (address)); + } +} + +static inline void ptep_flush_lazy(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + int active = (mm == current->active_mm) ? 1 : 0; + + if (atomic_read(&mm->context.attach_count) > active) + __ptep_ipte(address, ptep); + else + mm->context.flush_mm = 1; +} + #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { pgste_t pgste; pte_t pte; + int young; if (mm_has_pgste(vma->vm_mm)) { pgste = pgste_get_lock(ptep); - pgste = pgste_update_young(ptep, pgste); - pte = *ptep; - *ptep = pte_mkold(pte); - pgste_set_unlock(ptep, pgste); - return pte_young(pte); + pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste); } - return 0; + + pte = *ptep; + __ptep_ipte(addr, ptep); + young = pte_young(pte); + pte = pte_mkold(pte); + + if (mm_has_pgste(vma->vm_mm)) { + pgste_set_pte(ptep, pte); + pgste_set_unlock(ptep, pgste); + } else + *ptep = pte; + + return young; } #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH static inline int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - /* No need to flush TLB - * On s390 reference bits are in storage key and never in TLB - * With virtualization we handle the reference bit, without we - * we can simply return */ return ptep_test_and_clear_young(vma, address, ptep); } -static inline void __ptep_ipte(unsigned long address, pte_t *ptep) -{ - if (!(pte_val(*ptep) & _PAGE_INVALID)) { -#ifndef CONFIG_64BIT - /* pto must point to the start of the segment table */ - pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00); -#else - /* ipte in zarch mode can do the math */ - pte_t *pto = ptep; -#endif - asm volatile( - " ipte %2,%3" - : "=m" (*ptep) : "m" (*ptep), - "a" (pto), "a" (address)); - } -} - /* * This is hard to understand. ptep_get_and_clear and ptep_clear_flush * both clear the TLB for the unmapped pte. The reason is that @@ -1067,16 +1112,14 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, pgste_t pgste; pte_t pte; - mm->context.flush_mm = 1; if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); pgste = pgste_ipte_notify(mm, address, ptep, pgste); } pte = *ptep; - if (!mm_exclusive(mm)) - __ptep_ipte(address, ptep); - pte_val(*ptep) = _PAGE_TYPE_EMPTY; + ptep_flush_lazy(mm, address, ptep); + pte_val(*ptep) = _PAGE_INVALID; if (mm_has_pgste(mm)) { pgste = pgste_update_all(&pte, pgste); @@ -1093,15 +1136,14 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, pgste_t pgste; pte_t pte; - mm->context.flush_mm = 1; if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); pgste_ipte_notify(mm, address, ptep, pgste); } pte = *ptep; - if (!mm_exclusive(mm)) - __ptep_ipte(address, ptep); + ptep_flush_lazy(mm, address, ptep); + pte_val(*ptep) |= _PAGE_INVALID; if (mm_has_pgste(mm)) { pgste = pgste_update_all(&pte, pgste); @@ -1117,7 +1159,7 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, pgste_t pgste; if (mm_has_pgste(mm)) { - pgste = *(pgste_t *)(ptep + PTRS_PER_PTE); + pgste = pgste_get(ptep); pgste_set_key(ptep, pgste, pte); pgste_set_pte(ptep, pte); pgste_set_unlock(ptep, pgste); @@ -1139,7 +1181,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, pte = *ptep; __ptep_ipte(address, ptep); - pte_val(*ptep) = _PAGE_TYPE_EMPTY; + pte_val(*ptep) = _PAGE_INVALID; if (mm_has_pgste(vma->vm_mm)) { pgste = pgste_update_all(&pte, pgste); @@ -1163,18 +1205,17 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, pgste_t pgste; pte_t pte; - if (mm_has_pgste(mm)) { + if (!full && mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); - if (!full) - pgste = pgste_ipte_notify(mm, address, ptep, pgste); + pgste = pgste_ipte_notify(mm, address, ptep, pgste); } pte = *ptep; if (!full) - __ptep_ipte(address, ptep); - pte_val(*ptep) = _PAGE_TYPE_EMPTY; + ptep_flush_lazy(mm, address, ptep); + pte_val(*ptep) = _PAGE_INVALID; - if (mm_has_pgste(mm)) { + if (!full && mm_has_pgste(mm)) { pgste = pgste_update_all(&pte, pgste); pgste_set_unlock(ptep, pgste); } @@ -1189,14 +1230,12 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, pte_t pte = *ptep; if (pte_write(pte)) { - mm->context.flush_mm = 1; if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); pgste = pgste_ipte_notify(mm, address, ptep, pgste); } - if (!mm_exclusive(mm)) - __ptep_ipte(address, ptep); + ptep_flush_lazy(mm, address, ptep); pte = pte_wrprotect(pte); if (mm_has_pgste(mm)) { @@ -1240,7 +1279,7 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) { pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); - return __pte; + return pte_mkyoung(__pte); } static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) @@ -1248,10 +1287,8 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) unsigned long physpage = page_to_phys(page); pte_t __pte = mk_pte_phys(physpage, pgprot); - if ((pte_val(__pte) & _PAGE_SWW) && PageDirty(page)) { - pte_val(__pte) |= _PAGE_SWC; - pte_val(__pte) &= ~_PAGE_RO; - } + if (pte_write(__pte) && PageDirty(page)) + __pte = pte_mkdirty(__pte); return __pte; } @@ -1313,7 +1350,7 @@ static inline void __pmd_idte(unsigned long address, pmd_t *pmdp) unsigned long sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t); - if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) { + if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)) { asm volatile( " .insn rrf,0xb98e0000,%2,%3,0,0" : "=m" (*pmdp) @@ -1324,24 +1361,68 @@ static inline void __pmd_idte(unsigned long address, pmd_t *pmdp) } } +static inline void __pmd_csp(pmd_t *pmdp) +{ + register unsigned long reg2 asm("2") = pmd_val(*pmdp); + register unsigned long reg3 asm("3") = pmd_val(*pmdp) | + _SEGMENT_ENTRY_INVALID; + register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5; + + asm volatile( + " csp %1,%3" + : "=m" (*pmdp) + : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc"); +} + #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) { /* - * pgprot is PAGE_NONE, PAGE_RO, or PAGE_RW (see __Pxxx / __Sxxx) + * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx) * Convert to segment table entry format. */ if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE)) return pgprot_val(SEGMENT_NONE); - if (pgprot_val(pgprot) == pgprot_val(PAGE_RO)) - return pgprot_val(SEGMENT_RO); - return pgprot_val(SEGMENT_RW); + if (pgprot_val(pgprot) == pgprot_val(PAGE_READ)) + return pgprot_val(SEGMENT_READ); + return pgprot_val(SEGMENT_WRITE); +} + +static inline pmd_t pmd_mkyoung(pmd_t pmd) +{ +#ifdef CONFIG_64BIT + if (pmd_prot_none(pmd)) { + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + } else { + pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; + pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID; + } +#endif + return pmd; +} + +static inline pmd_t pmd_mkold(pmd_t pmd) +{ +#ifdef CONFIG_64BIT + if (pmd_prot_none(pmd)) { + pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; + } else { + pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG; + pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; + } +#endif + return pmd; } static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { + int young; + + young = pmd_young(pmd); pmd_val(pmd) &= _SEGMENT_CHG_MASK; pmd_val(pmd) |= massage_pgprot_pmd(newprot); + if (young) + pmd = pmd_mkyoung(pmd); return pmd; } @@ -1349,18 +1430,29 @@ static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot) { pmd_t __pmd; pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot); - return __pmd; + return pmd_mkyoung(__pmd); } static inline pmd_t pmd_mkwrite(pmd_t pmd) { - /* Do not clobber _HPAGE_TYPE_NONE pages! */ - if (!(pmd_val(pmd) & _SEGMENT_ENTRY_INV)) - pmd_val(pmd) &= ~_SEGMENT_ENTRY_RO; + /* Do not clobber PROT_NONE segments! */ + if (!pmd_prot_none(pmd)) + pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; return pmd; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */ +static inline void pmdp_flush_lazy(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) +{ + int active = (mm == current->active_mm) ? 1 : 0; + + if ((atomic_read(&mm->context.attach_count) & 0xffff) > active) + __pmd_idte(address, pmdp); + else + mm->context.flush_mm = 1; +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_PGTABLE_DEPOSIT @@ -1378,7 +1470,7 @@ static inline int pmd_trans_splitting(pmd_t pmd) static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t entry) { - if (!(pmd_val(entry) & _SEGMENT_ENTRY_INV) && MACHINE_HAS_EDAT1) + if (!(pmd_val(entry) & _SEGMENT_ENTRY_INVALID) && MACHINE_HAS_EDAT1) pmd_val(entry) |= _SEGMENT_ENTRY_CO; *pmdp = entry; } @@ -1391,7 +1483,9 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd) static inline pmd_t pmd_wrprotect(pmd_t pmd) { - pmd_val(pmd) |= _SEGMENT_ENTRY_RO; + /* Do not clobber PROT_NONE segments! */ + if (!pmd_prot_none(pmd)) + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; return pmd; } @@ -1401,50 +1495,16 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) return pmd; } -static inline pmd_t pmd_mkold(pmd_t pmd) -{ - /* No referenced bit in the segment table entry. */ - return pmd; -} - -static inline pmd_t pmd_mkyoung(pmd_t pmd) -{ - /* No referenced bit in the segment table entry. */ - return pmd; -} - #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { - unsigned long pmd_addr = pmd_val(*pmdp) & HPAGE_MASK; - long tmp, rc; - int counter; + pmd_t pmd; - rc = 0; - if (MACHINE_HAS_RRBM) { - counter = PTRS_PER_PTE >> 6; - asm volatile( - "0: .insn rre,0xb9ae0000,%0,%3\n" /* rrbm */ - " ogr %1,%0\n" - " la %3,0(%4,%3)\n" - " brct %2,0b\n" - : "=&d" (tmp), "+&d" (rc), "+d" (counter), - "+a" (pmd_addr) - : "a" (64 * 4096UL) : "cc"); - rc = !!rc; - } else { - counter = PTRS_PER_PTE; - asm volatile( - "0: rrbe 0,%2\n" - " la %2,0(%3,%2)\n" - " brc 12,1f\n" - " lhi %0,1\n" - "1: brct %1,0b\n" - : "+d" (rc), "+d" (counter), "+a" (pmd_addr) - : "a" (4096UL) : "cc"); - } - return rc; + pmd = *pmdp; + __pmd_idte(address, pmdp); + *pmdp = pmd_mkold(pmd); + return pmd_young(pmd); } #define __HAVE_ARCH_PMDP_GET_AND_CLEAR @@ -1510,10 +1570,8 @@ static inline unsigned long pmd_pfn(pmd_t pmd) * exception will occur instead of a page translation exception. The * specifiation exception has the bad habit not to store necessary * information in the lowcore. - * Bit 21 and bit 22 are the page invalid bit and the page protection - * bit. We set both to indicate a swapped page. - * Bit 30 and 31 are used to distinguish the different page types. For - * a swapped page these bits need to be zero. + * Bits 21, 22, 30 and 31 are used to indicate the page type. + * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402 * This leaves the bits 1-19 and bits 24-29 to store type and offset. * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19 * plus 24 for the offset. @@ -1527,10 +1585,8 @@ static inline unsigned long pmd_pfn(pmd_t pmd) * exception will occur instead of a page translation exception. The * specifiation exception has the bad habit not to store necessary * information in the lowcore. - * Bit 53 and bit 54 are the page invalid bit and the page protection - * bit. We set both to indicate a swapped page. - * Bit 62 and 63 are used to distinguish the different page types. For - * a swapped page these bits need to be zero. + * Bits 53, 54, 62 and 63 are used to indicate the page type. + * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402 * This leaves the bits 0-51 and bits 56-61 to store type and offset. * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51 * plus 56 for the offset. @@ -1547,7 +1603,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) { pte_t pte; offset &= __SWP_OFFSET_MASK; - pte_val(pte) = _PAGE_TYPE_SWAP | ((type & 0x1f) << 2) | + pte_val(pte) = _PAGE_INVALID | _PAGE_TYPE | ((type & 0x1f) << 2) | ((offset & 1UL) << 7) | ((offset & ~1UL) << 11); return pte; } @@ -1570,7 +1626,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) #define pgoff_to_pte(__off) \ ((pte_t) { ((((__off) & 0x7f) << 1) + (((__off) >> 7) << 12)) \ - | _PAGE_TYPE_FILE }) + | _PAGE_INVALID | _PAGE_PROTECT }) #endif /* !__ASSEMBLY__ */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index b0e6435b2f02..0eb37505cab1 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -43,6 +43,7 @@ extern void execve_tail(void); #ifndef CONFIG_64BIT #define TASK_SIZE (1UL << 31) +#define TASK_MAX_SIZE (1UL << 31) #define TASK_UNMAPPED_BASE (1UL << 30) #else /* CONFIG_64BIT */ @@ -51,6 +52,7 @@ extern void execve_tail(void); #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \ (1UL << 30) : (1UL << 41)) #define TASK_SIZE TASK_SIZE_OF(current) +#define TASK_MAX_SIZE (1UL << 53) #endif /* CONFIG_64BIT */ diff --git a/arch/s390/include/asm/serial.h b/arch/s390/include/asm/serial.h new file mode 100644 index 000000000000..5b3e48ef534b --- /dev/null +++ b/arch/s390/include/asm/serial.h @@ -0,0 +1,6 @@ +#ifndef _ASM_S390_SERIAL_H +#define _ASM_S390_SERIAL_H + +#define BASE_BAUD 0 + +#endif /* _ASM_S390_SERIAL_H */ diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index 80b6f11263c4..6dbd559763c9 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -8,6 +8,7 @@ #define __ASM_SWITCH_TO_H #include <linux/thread_info.h> +#include <asm/ptrace.h> extern struct task_struct *__switch_to(void *, void *); extern void update_cr_regs(struct task_struct *task); @@ -68,12 +69,16 @@ static inline void restore_fp_regs(s390_fp_regs *fpregs) static inline void save_access_regs(unsigned int *acrs) { - asm volatile("stam 0,15,%0" : "=Q" (*acrs)); + typedef struct { int _[NUM_ACRS]; } acrstype; + + asm volatile("stam 0,15,%0" : "=Q" (*(acrstype *)acrs)); } static inline void restore_access_regs(unsigned int *acrs) { - asm volatile("lam 0,15,%0" : : "Q" (*acrs)); + typedef struct { int _[NUM_ACRS]; } acrstype; + + asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs)); } #define switch_to(prev,next,last) do { \ diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 6d6d92b4ea11..2cb846c4b37f 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -63,13 +63,14 @@ static inline void tlb_gather_mmu(struct mmu_gather *tlb, static inline void tlb_flush_mmu(struct mmu_gather *tlb) { + __tlb_flush_mm_lazy(tlb->mm); tlb_table_flush(tlb); } static inline void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) { - tlb_table_flush(tlb); + tlb_flush_mmu(tlb); } /* diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 6b32af30878c..f9fef0425fee 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -86,7 +86,7 @@ static inline void __tlb_flush_mm(struct mm_struct * mm) __tlb_flush_full(mm); } -static inline void __tlb_flush_mm_cond(struct mm_struct * mm) +static inline void __tlb_flush_mm_lazy(struct mm_struct * mm) { if (mm->context.flush_mm) { __tlb_flush_mm(mm); @@ -118,13 +118,13 @@ static inline void __tlb_flush_mm_cond(struct mm_struct * mm) static inline void flush_tlb_mm(struct mm_struct *mm) { - __tlb_flush_mm_cond(mm); + __tlb_flush_mm_lazy(mm); } static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - __tlb_flush_mm_cond(vma->vm_mm); + __tlb_flush_mm_lazy(vma->vm_mm); } static inline void flush_tlb_kernel_range(unsigned long start, diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h new file mode 100644 index 000000000000..af9896c53eb3 --- /dev/null +++ b/arch/s390/include/asm/vtime.h @@ -0,0 +1,7 @@ +#ifndef _S390_VTIME_H +#define _S390_VTIME_H + +#define __ARCH_HAS_VTIME_ACCOUNT +#define __ARCH_HAS_VTIME_TASK_SWITCH + +#endif /* _S390_VTIME_H */ diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index be7a408be7a1..cc30d1fb000c 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -18,6 +18,7 @@ #include <asm/unistd.h> #include <asm/page.h> #include <asm/sigp.h> +#include <asm/irq.h> __PT_R0 = __PT_GPRS __PT_R1 = __PT_GPRS + 4 @@ -435,6 +436,11 @@ io_skip: io_loop: l %r1,BASED(.Ldo_IRQ) lr %r2,%r11 # pass pointer to pt_regs + lhi %r3,IO_INTERRUPT + tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ? + jz io_call + lhi %r3,THIN_INTERRUPT +io_call: basr %r14,%r1 # call do_IRQ tm __LC_MACHINE_FLAGS+2,0x10 # MACHINE_FLAG_LPAR jz io_return @@ -584,9 +590,10 @@ ext_skip: mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS TRACE_IRQS_OFF + l %r1,BASED(.Ldo_IRQ) lr %r2,%r11 # pass pointer to pt_regs - l %r1,BASED(.Ldo_extint) - basr %r14,%r1 # call do_extint + lhi %r3,EXT_INTERRUPT + basr %r14,%r1 # call do_IRQ j io_return /* @@ -879,13 +886,13 @@ cleanup_idle: stm %r9,%r10,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) # prepare return psw - n %r8,BASED(cleanup_idle_wait) # clear wait state bit + n %r8,BASED(cleanup_idle_wait) # clear irq & wait state bits l %r9,24(%r11) # return from psw_idle br %r14 cleanup_idle_insn: .long psw_idle_lpsw + 0x80000000 cleanup_idle_wait: - .long 0xfffdffff + .long 0xfcfdffff /* * Integer constants @@ -902,7 +909,6 @@ cleanup_idle_wait: .Ldo_machine_check: .long s390_do_machine_check .Lhandle_mcck: .long s390_handle_mcck .Ldo_IRQ: .long do_IRQ -.Ldo_extint: .long do_extint .Ldo_signal: .long do_signal .Ldo_notify_resume: .long do_notify_resume .Ldo_per_trap: .long do_per_trap diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 1c039d0c24c7..2b2188b97c6a 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -19,6 +19,7 @@ #include <asm/unistd.h> #include <asm/page.h> #include <asm/sigp.h> +#include <asm/irq.h> __PT_R0 = __PT_GPRS __PT_R1 = __PT_GPRS + 8 @@ -468,6 +469,11 @@ io_skip: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) io_loop: lgr %r2,%r11 # pass pointer to pt_regs + lghi %r3,IO_INTERRUPT + tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ? + jz io_call + lghi %r3,THIN_INTERRUPT +io_call: brasl %r14,do_IRQ tm __LC_MACHINE_FLAGS+6,0x10 # MACHINE_FLAG_LPAR jz io_return @@ -623,7 +629,8 @@ ext_skip: TRACE_IRQS_OFF xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs - brasl %r14,do_extint + lghi %r3,EXT_INTERRUPT + brasl %r14,do_IRQ j io_return /* @@ -922,7 +929,7 @@ cleanup_idle: stg %r9,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) # prepare return psw - nihh %r8,0xfffd # clear wait state bit + nihh %r8,0xfcfd # clear irq & wait state bits lg %r9,48(%r11) # return from psw_idle br %r14 cleanup_idle_insn: diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 54b0995514e8..b34ba0ea96a9 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -22,6 +22,7 @@ #include <asm/cputime.h> #include <asm/lowcore.h> #include <asm/irq.h> +#include <asm/hw_irq.h> #include "entry.h" DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat); @@ -42,9 +43,10 @@ struct irq_class { * Since the external and I/O interrupt fields are already sums we would end * up with having a sum which accounts each interrupt twice. */ -static const struct irq_class irqclass_main_desc[NR_IRQS] = { - [EXTERNAL_INTERRUPT] = {.name = "EXT"}, - [IO_INTERRUPT] = {.name = "I/O"} +static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = { + [EXT_INTERRUPT] = {.name = "EXT"}, + [IO_INTERRUPT] = {.name = "I/O"}, + [THIN_INTERRUPT] = {.name = "AIO"}, }; /* @@ -86,6 +88,28 @@ static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = { [CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"}, }; +void __init init_IRQ(void) +{ + irq_reserve_irqs(0, THIN_INTERRUPT); + init_cio_interrupts(); + init_airq_interrupts(); + init_ext_interrupts(); +} + +void do_IRQ(struct pt_regs *regs, int irq) +{ + struct pt_regs *old_regs; + + old_regs = set_irq_regs(regs); + irq_enter(); + if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) + /* Serve timer interrupts first. */ + clock_comparator_work(); + generic_handle_irq(irq); + irq_exit(); + set_irq_regs(old_regs); +} + /* * show_interrupts is needed by /proc/interrupts. */ @@ -100,27 +124,36 @@ int show_interrupts(struct seq_file *p, void *v) for_each_online_cpu(cpu) seq_printf(p, "CPU%d ", cpu); seq_putc(p, '\n'); + goto out; } if (irq < NR_IRQS) { + if (irq >= NR_IRQS_BASE) + goto out; seq_printf(p, "%s: ", irqclass_main_desc[irq].name); for_each_online_cpu(cpu) - seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[irq]); + seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu)); seq_putc(p, '\n'); - goto skip_arch_irqs; + goto out; } for (irq = 0; irq < NR_ARCH_IRQS; irq++) { seq_printf(p, "%s: ", irqclass_sub_desc[irq].name); for_each_online_cpu(cpu) - seq_printf(p, "%10u ", per_cpu(irq_stat, cpu).irqs[irq]); + seq_printf(p, "%10u ", + per_cpu(irq_stat, cpu).irqs[irq]); if (irqclass_sub_desc[irq].desc) seq_printf(p, " %s", irqclass_sub_desc[irq].desc); seq_putc(p, '\n'); } -skip_arch_irqs: +out: put_online_cpus(); return 0; } +int arch_show_interrupts(struct seq_file *p, int prec) +{ + return 0; +} + /* * Switch to the asynchronous interrupt stack for softirq execution. */ @@ -159,14 +192,6 @@ asmlinkage void do_softirq(void) local_irq_restore(flags); } -#ifdef CONFIG_PROC_FS -void init_irq_proc(void) -{ - if (proc_mkdir("irq", NULL)) - create_prof_cpu_mask(); -} -#endif - /* * ext_int_hash[index] is the list head for all external interrupts that hash * to this index. @@ -183,14 +208,6 @@ struct ext_int_info { /* ext_int_hash_lock protects the handler lists for external interrupts */ DEFINE_SPINLOCK(ext_int_hash_lock); -static void __init init_external_interrupts(void) -{ - int idx; - - for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++) - INIT_LIST_HEAD(&ext_int_hash[idx]); -} - static inline int ext_hash(u16 code) { return (code + (code >> 9)) & 0xff; @@ -234,20 +251,13 @@ int unregister_external_interrupt(u16 code, ext_int_handler_t handler) } EXPORT_SYMBOL(unregister_external_interrupt); -void __irq_entry do_extint(struct pt_regs *regs) +static irqreturn_t do_ext_interrupt(int irq, void *dummy) { + struct pt_regs *regs = get_irq_regs(); struct ext_code ext_code; - struct pt_regs *old_regs; struct ext_int_info *p; int index; - old_regs = set_irq_regs(regs); - irq_enter(); - if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) { - /* Serve timer interrupts first. */ - clock_comparator_work(); - } - kstat_incr_irqs_this_cpu(EXTERNAL_INTERRUPT, NULL); ext_code = *(struct ext_code *) ®s->int_code; if (ext_code.code != 0x1004) __get_cpu_var(s390_idle).nohz_delay = 1; @@ -259,13 +269,25 @@ void __irq_entry do_extint(struct pt_regs *regs) p->handler(ext_code, regs->int_parm, regs->int_parm_long); rcu_read_unlock(); - irq_exit(); - set_irq_regs(old_regs); + + return IRQ_HANDLED; } -void __init init_IRQ(void) +static struct irqaction external_interrupt = { + .name = "EXT", + .handler = do_ext_interrupt, +}; + +void __init init_ext_interrupts(void) { - init_external_interrupts(); + int idx; + + for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++) + INIT_LIST_HEAD(&ext_int_hash[idx]); + + irq_set_chip_and_handler(EXT_INTERRUPT, + &dummy_irq_chip, handle_percpu_irq); + setup_irq(EXT_INTERRUPT, &external_interrupt); } static DEFINE_SPINLOCK(sc_irq_lock); @@ -313,69 +335,3 @@ void measurement_alert_subclass_unregister(void) spin_unlock(&ma_subclass_lock); } EXPORT_SYMBOL(measurement_alert_subclass_unregister); - -#ifdef CONFIG_SMP -void synchronize_irq(unsigned int irq) -{ - /* - * Not needed, the handler is protected by a lock and IRQs that occur - * after the handler is deleted are just NOPs. - */ -} -EXPORT_SYMBOL_GPL(synchronize_irq); -#endif - -#ifndef CONFIG_PCI - -/* Only PCI devices have dynamically-defined IRQ handlers */ - -int request_irq(unsigned int irq, irq_handler_t handler, - unsigned long irqflags, const char *devname, void *dev_id) -{ - return -EINVAL; -} -EXPORT_SYMBOL_GPL(request_irq); - -void free_irq(unsigned int irq, void *dev_id) -{ - WARN_ON(1); -} -EXPORT_SYMBOL_GPL(free_irq); - -void enable_irq(unsigned int irq) -{ - WARN_ON(1); -} -EXPORT_SYMBOL_GPL(enable_irq); - -void disable_irq(unsigned int irq) -{ - WARN_ON(1); -} -EXPORT_SYMBOL_GPL(disable_irq); - -#endif /* !CONFIG_PCI */ - -void disable_irq_nosync(unsigned int irq) -{ - disable_irq(irq); -} -EXPORT_SYMBOL_GPL(disable_irq_nosync); - -unsigned long probe_irq_on(void) -{ - return 0; -} -EXPORT_SYMBOL_GPL(probe_irq_on); - -int probe_irq_off(unsigned long val) -{ - return 0; -} -EXPORT_SYMBOL_GPL(probe_irq_off); - -unsigned int probe_irq_mask(unsigned long val) -{ - return val; -} -EXPORT_SYMBOL_GPL(probe_irq_mask); diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 3388b2b2a07d..adbbe7f1cb0d 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -105,14 +105,31 @@ static int __kprobes get_fixup_type(kprobe_opcode_t *insn) fixup |= FIXUP_RETURN_REGISTER; break; case 0xeb: - if ((insn[2] & 0xff) == 0x44 || /* bxhg */ - (insn[2] & 0xff) == 0x45) /* bxleg */ + switch (insn[2] & 0xff) { + case 0x44: /* bxhg */ + case 0x45: /* bxleg */ fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + } break; case 0xe3: /* bctg */ if ((insn[2] & 0xff) == 0x46) fixup = FIXUP_BRANCH_NOT_TAKEN; break; + case 0xec: + switch (insn[2] & 0xff) { + case 0xe5: /* clgrb */ + case 0xe6: /* cgrb */ + case 0xf6: /* crb */ + case 0xf7: /* clrb */ + case 0xfc: /* cgib */ + case 0xfd: /* cglib */ + case 0xfe: /* cib */ + case 0xff: /* clib */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + } + break; } return fixup; } diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 504175ebf8b0..c4c033819879 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -214,10 +214,7 @@ static int notrace s390_revalidate_registers(struct mci *mci) : "0", "cc"); #endif /* Revalidate clock comparator register */ - if (S390_lowcore.clock_comparator == -1) - set_clock_comparator(S390_lowcore.mcck_clock); - else - set_clock_comparator(S390_lowcore.clock_comparator); + set_clock_comparator(S390_lowcore.clock_comparator); /* Check if old PSW is valid */ if (!mci->wp) /* diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 2bc3eddae34a..c5dbb335716d 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -71,6 +71,7 @@ void arch_cpu_idle(void) } /* Halt the cpu and keep track of cpu time accounting. */ vtime_stop_cpu(); + local_irq_enable(); } void arch_cpu_idle_exit(void) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index e9fadb04e3c6..9556905bd3ce 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -60,11 +60,11 @@ void update_cr_regs(struct task_struct *task) __ctl_store(cr, 0, 2); cr_new[1] = cr[1]; - /* Set or clear transaction execution TXC/PIFO bits 8 and 9. */ + /* Set or clear transaction execution TXC bit 8. */ if (task->thread.per_flags & PER_FLAG_NO_TE) - cr_new[0] = cr[0] & ~(3UL << 54); + cr_new[0] = cr[0] & ~(1UL << 55); else - cr_new[0] = cr[0] | (3UL << 54); + cr_new[0] = cr[0] | (1UL << 55); /* Set or clear transaction execution TDC bits 62 and 63. */ cr_new[2] = cr[2] & ~3UL; if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { @@ -1299,7 +1299,7 @@ int regs_query_register_offset(const char *name) if (!name || *name != 'r') return -EINVAL; - if (strict_strtoul(name + 1, 10, &offset)) + if (kstrtoul(name + 1, 10, &offset)) return -EINVAL; if (offset >= NUM_GPRS) return -EINVAL; diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index c479d2f9605b..737bff38e3ee 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -10,6 +10,9 @@ #include <linux/suspend.h> #include <linux/mm.h> #include <asm/ctl_reg.h> +#include <asm/ipl.h> +#include <asm/cio.h> +#include <asm/pci.h> /* * References to section boundaries @@ -211,3 +214,11 @@ void restore_processor_state(void) __ctl_set_bit(0,28); local_mcck_enable(); } + +/* Called at the end of swsusp_arch_resume */ +void s390_early_resume(void) +{ + lgr_info_log(); + channel_subsystem_reinit(); + zpci_rescan(); +} diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S index c487be4cfc81..6b09fdffbd2f 100644 --- a/arch/s390/kernel/swsusp_asm64.S +++ b/arch/s390/kernel/swsusp_asm64.S @@ -281,11 +281,8 @@ restore_registers: lghi %r2,0 brasl %r14,arch_set_page_states - /* Log potential guest relocation */ - brasl %r14,lgr_info_log - - /* Reinitialize the channel subsystem */ - brasl %r14,channel_subsystem_reinit + /* Call arch specific early resume code */ + brasl %r14,s390_early_resume /* Return 0 */ lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 876546b9cfa1..064c3082ab33 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -92,7 +92,6 @@ void clock_comparator_work(void) struct clock_event_device *cd; S390_lowcore.clock_comparator = -1ULL; - set_clock_comparator(S390_lowcore.clock_comparator); cd = &__get_cpu_var(comparators); cd->event_handler(cd); } diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index d7776281cb60..05d75c413137 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -63,7 +63,7 @@ static int __init vdso_setup(char *s) else if (strncmp(s, "off", 4) == 0) vdso_enabled = 0; else { - rc = strict_strtoul(s, 0, &val); + rc = kstrtoul(s, 0, &val); vdso_enabled = rc ? 0 : !!val; } return !rc; @@ -113,11 +113,11 @@ int vdso_alloc_per_cpu(struct _lowcore *lowcore) clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE << SEGMENT_ORDER); - clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY, + clear_table((unsigned long *) page_table, _PAGE_INVALID, 256*sizeof(unsigned long)); *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table; - *(unsigned long *) page_table = _PAGE_RO + page_frame; + *(unsigned long *) page_table = _PAGE_PROTECT + page_frame; psal = (u32 *) (page_table + 256*sizeof(unsigned long)); aste = psal + 32; diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 9b9c1b78ec67..abcfab55f99b 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -19,6 +19,7 @@ #include <asm/irq_regs.h> #include <asm/cputime.h> #include <asm/vtimer.h> +#include <asm/vtime.h> #include <asm/irq.h> #include "entry.h" diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 3074475c8ae0..3a74d8af0d69 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -119,12 +119,21 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) * The layout is as follows: * - gpr 2 contains the subchannel id (passed as addr) * - gpr 3 contains the virtqueue index (passed as datamatch) + * - gpr 4 contains the index on the bus (optionally) */ - ret = kvm_io_bus_write(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, - vcpu->run->s.regs.gprs[2], - 8, &vcpu->run->s.regs.gprs[3]); + ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, + vcpu->run->s.regs.gprs[2], + 8, &vcpu->run->s.regs.gprs[3], + vcpu->run->s.regs.gprs[4]); srcu_read_unlock(&vcpu->kvm->srcu, idx); - /* kvm_io_bus_write returns -EOPNOTSUPP if it found no match. */ + + /* + * Return cookie in gpr 2, but don't overwrite the register if the + * diagnose will be handled by userspace. + */ + if (ret != -EOPNOTSUPP) + vcpu->run->s.regs.gprs[2] = ret; + /* kvm_io_bus_write_cookie returns -EOPNOTSUPP if it found no match. */ return ret < 0 ? ret : 0; } diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 302e0e52b009..99d789e8a018 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -42,9 +42,11 @@ static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu, ({ \ __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\ int __mask = sizeof(__typeof__(*(gptr))) - 1; \ - int __ret = PTR_RET((void __force *)__uptr); \ + int __ret; \ \ - if (!__ret) { \ + if (IS_ERR((void __force *)__uptr)) { \ + __ret = PTR_ERR((void __force *)__uptr); \ + } else { \ BUG_ON((unsigned long)__uptr & __mask); \ __ret = get_user(x, __uptr); \ } \ @@ -55,9 +57,11 @@ static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu, ({ \ __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\ int __mask = sizeof(__typeof__(*(gptr))) - 1; \ - int __ret = PTR_RET((void __force *)__uptr); \ + int __ret; \ \ - if (!__ret) { \ + if (IS_ERR((void __force *)__uptr)) { \ + __ret = PTR_ERR((void __force *)__uptr); \ + } else { \ BUG_ON((unsigned long)__uptr & __mask); \ __ret = put_user(x, __uptr); \ } \ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 34c1c9a90be2..776dafe918db 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -28,6 +28,7 @@ #include <asm/pgtable.h> #include <asm/nmi.h> #include <asm/switch_to.h> +#include <asm/facility.h> #include <asm/sclp.h> #include "kvm-s390.h" #include "gaccess.h" @@ -84,9 +85,15 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; -static unsigned long long *facilities; +unsigned long *vfacilities; static struct gmap_notifier gmap_notifier; +/* test availability of vfacility */ +static inline int test_vfacility(unsigned long nr) +{ + return __test_facility(nr, (void *) vfacilities); +} + /* Section: not file related */ int kvm_arch_hardware_enable(void *garbage) { @@ -387,7 +394,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->ecb = 6; vcpu->arch.sie_block->ecb2 = 8; vcpu->arch.sie_block->eca = 0xC1002001U; - vcpu->arch.sie_block->fac = (int) (long) facilities; + vcpu->arch.sie_block->fac = (int) (long) vfacilities; hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, (unsigned long) vcpu); @@ -1063,6 +1070,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) return 0; } +void kvm_arch_memslots_updated(struct kvm *kvm) +{ +} + /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, @@ -1129,20 +1140,20 @@ static int __init kvm_s390_init(void) * to hold the maximum amount of facilities. On the other hand, we * only set facilities that are known to work in KVM. */ - facilities = (unsigned long long *) get_zeroed_page(GFP_KERNEL|GFP_DMA); - if (!facilities) { + vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA); + if (!vfacilities) { kvm_exit(); return -ENOMEM; } - memcpy(facilities, S390_lowcore.stfle_fac_list, 16); - facilities[0] &= 0xff82fff3f47c0000ULL; - facilities[1] &= 0x001c000000000000ULL; + memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16); + vfacilities[0] &= 0xff82fff3f47c0000UL; + vfacilities[1] &= 0x001c000000000000UL; return 0; } static void __exit kvm_s390_exit(void) { - free_page((unsigned long) facilities); + free_page((unsigned long) vfacilities); kvm_exit(); } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 028ca9fd2158..dc99f1ca4267 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -24,6 +24,9 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); +/* declare vfacilities extern */ +extern unsigned long *vfacilities; + /* negativ values are error codes, positive values for internal conditions */ #define SIE_INTERCEPT_RERUNVCPU (1<<0) #define SIE_INTERCEPT_UCONTROL (1<<1) @@ -112,6 +115,13 @@ static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu) return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; } +/* Set the condition code in the guest program status word */ +static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc) +{ + vcpu->arch.sie_block->gpsw.mask &= ~(3UL << 44); + vcpu->arch.sie_block->gpsw.mask |= cc << 44; +} + int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); void kvm_s390_tasklet(unsigned long parm); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 4cdc54e63ebc..59200ee275e5 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -164,8 +164,7 @@ static int handle_tpi(struct kvm_vcpu *vcpu) kfree(inti); no_interrupt: /* Set condition code and we're done. */ - vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); - vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44; + kvm_s390_set_psw_cc(vcpu, cc); return 0; } @@ -220,15 +219,13 @@ static int handle_io_inst(struct kvm_vcpu *vcpu) * Set condition code 3 to stop the guest from issueing channel * I/O instructions. */ - vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); - vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44; + kvm_s390_set_psw_cc(vcpu, 3); return 0; } } static int handle_stfl(struct kvm_vcpu *vcpu) { - unsigned int facility_list; int rc; vcpu->stat.instruction_stfl++; @@ -236,15 +233,13 @@ static int handle_stfl(struct kvm_vcpu *vcpu) if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - /* only pass the facility bits, which we can handle */ - facility_list = S390_lowcore.stfl_fac_list & 0xff82fff3; - rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), - &facility_list, sizeof(facility_list)); + vfacilities, 4); if (rc) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - VCPU_EVENT(vcpu, 5, "store facility list value %x", facility_list); - trace_kvm_s390_handle_stfl(vcpu, facility_list); + VCPU_EVENT(vcpu, 5, "store facility list value %x", + *(unsigned int *) vfacilities); + trace_kvm_s390_handle_stfl(vcpu, *(unsigned int *) vfacilities); return 0; } @@ -387,7 +382,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); if (fc > 3) { - vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; /* cc 3 */ + kvm_s390_set_psw_cc(vcpu, 3); return 0; } @@ -397,7 +392,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) if (fc == 0) { vcpu->run->s.regs.gprs[0] = 3 << 28; - vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); /* cc 0 */ + kvm_s390_set_psw_cc(vcpu, 0); return 0; } @@ -431,12 +426,11 @@ static int handle_stsi(struct kvm_vcpu *vcpu) } trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); free_page(mem); - vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + kvm_s390_set_psw_cc(vcpu, 0); vcpu->run->s.regs.gprs[0] = 0; return 0; out_no_data: - /* condition code 3 */ - vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; + kvm_s390_set_psw_cc(vcpu, 3); out_exception: free_page(mem); return rc; @@ -494,12 +488,12 @@ static int handle_epsw(struct kvm_vcpu *vcpu) kvm_s390_get_regs_rre(vcpu, ®1, ®2); /* This basically extracts the mask half of the psw. */ - vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000; + vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000UL; vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32; if (reg2) { - vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000; + vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000UL; vcpu->run->s.regs.gprs[reg2] |= - vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffff; + vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffffUL; } return 0; } diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index c61b9fad43cc..57c87d7d7ede 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -44,7 +44,6 @@ static void __udelay_disabled(unsigned long long usecs) do { set_clock_comparator(end); vtime_stop_cpu(); - local_irq_disable(); } while (get_tod_clock() < end); lockdep_on(); __ctl_load(cr0, 0, 0); @@ -64,7 +63,6 @@ static void __udelay_enabled(unsigned long long usecs) set_clock_comparator(end); } vtime_stop_cpu(); - local_irq_disable(); if (clock_saved) local_tick_enable(clock_saved); } while (get_tod_clock() < end); diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 50ea137a2d3c..1694d738b175 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -86,28 +86,28 @@ static unsigned long follow_table(struct mm_struct *mm, switch (mm->context.asce_bits & _ASCE_TYPE_MASK) { case _ASCE_TYPE_REGION1: table = table + ((address >> 53) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) + if (unlikely(*table & _REGION_ENTRY_INVALID)) return -0x39UL; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); /* fallthrough */ case _ASCE_TYPE_REGION2: table = table + ((address >> 42) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) + if (unlikely(*table & _REGION_ENTRY_INVALID)) return -0x3aUL; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); /* fallthrough */ case _ASCE_TYPE_REGION3: table = table + ((address >> 31) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) + if (unlikely(*table & _REGION_ENTRY_INVALID)) return -0x3bUL; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); /* fallthrough */ case _ASCE_TYPE_SEGMENT: table = table + ((address >> 20) & 0x7ff); - if (unlikely(*table & _SEGMENT_ENTRY_INV)) + if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) return -0x10UL; if (unlikely(*table & _SEGMENT_ENTRY_LARGE)) { - if (write && (*table & _SEGMENT_ENTRY_RO)) + if (write && (*table & _SEGMENT_ENTRY_PROTECT)) return -0x04UL; return (*table & _SEGMENT_ENTRY_ORIGIN_LARGE) + (address & ~_SEGMENT_ENTRY_ORIGIN_LARGE); @@ -117,7 +117,7 @@ static unsigned long follow_table(struct mm_struct *mm, table = table + ((address >> 12) & 0xff); if (unlikely(*table & _PAGE_INVALID)) return -0x11UL; - if (write && (*table & _PAGE_RO)) + if (write && (*table & _PAGE_PROTECT)) return -0x04UL; return (*table & PAGE_MASK) + (address & ~PAGE_MASK); } @@ -130,13 +130,13 @@ static unsigned long follow_table(struct mm_struct *mm, unsigned long *table = (unsigned long *)__pa(mm->pgd); table = table + ((address >> 20) & 0x7ff); - if (unlikely(*table & _SEGMENT_ENTRY_INV)) + if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) return -0x10UL; table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); table = table + ((address >> 12) & 0xff); if (unlikely(*table & _PAGE_INVALID)) return -0x11UL; - if (write && (*table & _PAGE_RO)) + if (write && (*table & _PAGE_PROTECT)) return -0x04UL; return (*table & PAGE_MASK) + (address & ~PAGE_MASK); } diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 3ad65b04ac15..46d517c3c763 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -53,7 +53,7 @@ static void print_prot(struct seq_file *m, unsigned int pr, int level) seq_printf(m, "I\n"); return; } - seq_printf(m, "%s", pr & _PAGE_RO ? "RO " : "RW "); + seq_printf(m, "%s", pr & _PAGE_PROTECT ? "RO " : "RW "); seq_printf(m, "%s", pr & _PAGE_CO ? "CO " : " "); seq_putc(m, '\n'); } @@ -105,12 +105,12 @@ static void note_page(struct seq_file *m, struct pg_state *st, } /* - * The actual page table walker functions. In order to keep the implementation - * of print_prot() short, we only check and pass _PAGE_INVALID and _PAGE_RO - * flags to note_page() if a region, segment or page table entry is invalid or - * read-only. - * After all it's just a hint that the current level being walked contains an - * invalid or read-only entry. + * The actual page table walker functions. In order to keep the + * implementation of print_prot() short, we only check and pass + * _PAGE_INVALID and _PAGE_PROTECT flags to note_page() if a region, + * segment or page table entry is invalid or read-only. + * After all it's just a hint that the current level being walked + * contains an invalid or read-only entry. */ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t *pmd, unsigned long addr) @@ -122,14 +122,14 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) { st->current_address = addr; pte = pte_offset_kernel(pmd, addr); - prot = pte_val(*pte) & (_PAGE_RO | _PAGE_INVALID); + prot = pte_val(*pte) & (_PAGE_PROTECT | _PAGE_INVALID); note_page(m, st, prot, 4); addr += PAGE_SIZE; } } #ifdef CONFIG_64BIT -#define _PMD_PROT_MASK (_SEGMENT_ENTRY_RO | _SEGMENT_ENTRY_CO) +#define _PMD_PROT_MASK (_SEGMENT_ENTRY_PROTECT | _SEGMENT_ENTRY_CO) #else #define _PMD_PROT_MASK 0 #endif diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 1f5315d1215c..5d758db27bdc 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -24,7 +24,7 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, pte_t *ptep, pte; struct page *page; - mask = (write ? _PAGE_RO : 0) | _PAGE_INVALID | _PAGE_SPECIAL; + mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL; ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr); do { @@ -55,8 +55,8 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, struct page *head, *page, *tail; int refs; - result = write ? 0 : _SEGMENT_ENTRY_RO; - mask = result | _SEGMENT_ENTRY_INV; + result = write ? 0 : _SEGMENT_ENTRY_PROTECT; + mask = result | _SEGMENT_ENTRY_INVALID; if ((pmd_val(pmd) & mask) != result) return 0; VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT)); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 121089d57802..248445f92604 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -8,21 +8,127 @@ #include <linux/mm.h> #include <linux/hugetlb.h> +static inline pmd_t __pte_to_pmd(pte_t pte) +{ + int none, young, prot; + pmd_t pmd; + + /* + * Convert encoding pte bits pmd bits + * .IR...wrdytp ..R...I...y. + * empty .10...000000 -> ..0...1...0. + * prot-none, clean, old .11...000001 -> ..0...1...1. + * prot-none, clean, young .11...000101 -> ..1...1...1. + * prot-none, dirty, old .10...001001 -> ..0...1...1. + * prot-none, dirty, young .10...001101 -> ..1...1...1. + * read-only, clean, old .11...010001 -> ..1...1...0. + * read-only, clean, young .01...010101 -> ..1...0...1. + * read-only, dirty, old .11...011001 -> ..1...1...0. + * read-only, dirty, young .01...011101 -> ..1...0...1. + * read-write, clean, old .11...110001 -> ..0...1...0. + * read-write, clean, young .01...110101 -> ..0...0...1. + * read-write, dirty, old .10...111001 -> ..0...1...0. + * read-write, dirty, young .00...111101 -> ..0...0...1. + * Huge ptes are dirty by definition, a clean pte is made dirty + * by the conversion. + */ + if (pte_present(pte)) { + pmd_val(pmd) = pte_val(pte) & PAGE_MASK; + if (pte_val(pte) & _PAGE_INVALID) + pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; + none = (pte_val(pte) & _PAGE_PRESENT) && + !(pte_val(pte) & _PAGE_READ) && + !(pte_val(pte) & _PAGE_WRITE); + prot = (pte_val(pte) & _PAGE_PROTECT) && + !(pte_val(pte) & _PAGE_WRITE); + young = pte_val(pte) & _PAGE_YOUNG; + if (none || young) + pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; + if (prot || (none && young)) + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + } else + pmd_val(pmd) = _SEGMENT_ENTRY_INVALID; + return pmd; +} + +static inline pte_t __pmd_to_pte(pmd_t pmd) +{ + pte_t pte; + + /* + * Convert encoding pmd bits pte bits + * ..R...I...y. .IR...wrdytp + * empty ..0...1...0. -> .10...000000 + * prot-none, old ..0...1...1. -> .10...001001 + * prot-none, young ..1...1...1. -> .10...001101 + * read-only, old ..1...1...0. -> .11...011001 + * read-only, young ..1...0...1. -> .01...011101 + * read-write, old ..0...1...0. -> .10...111001 + * read-write, young ..0...0...1. -> .00...111101 + * Huge ptes are dirty by definition + */ + if (pmd_present(pmd)) { + pte_val(pte) = _PAGE_PRESENT | _PAGE_LARGE | _PAGE_DIRTY | + (pmd_val(pmd) & PAGE_MASK); + if (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) + pte_val(pte) |= _PAGE_INVALID; + if (pmd_prot_none(pmd)) { + if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) + pte_val(pte) |= _PAGE_YOUNG; + } else { + pte_val(pte) |= _PAGE_READ; + if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) + pte_val(pte) |= _PAGE_PROTECT; + else + pte_val(pte) |= _PAGE_WRITE; + if (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) + pte_val(pte) |= _PAGE_YOUNG; + } + } else + pte_val(pte) = _PAGE_INVALID; + return pte; +} void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *pteptr, pte_t pteval) + pte_t *ptep, pte_t pte) { - pmd_t *pmdp = (pmd_t *) pteptr; - unsigned long mask; + pmd_t pmd; + pmd = __pte_to_pmd(pte); if (!MACHINE_HAS_HPAGE) { - pteptr = (pte_t *) pte_page(pteval)[1].index; - mask = pte_val(pteval) & - (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); - pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask; + pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; + pmd_val(pmd) |= pte_page(pte)[1].index; + } else + pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO; + *(pmd_t *) ptep = pmd; +} + +pte_t huge_ptep_get(pte_t *ptep) +{ + unsigned long origin; + pmd_t pmd; + + pmd = *(pmd_t *) ptep; + if (!MACHINE_HAS_HPAGE && pmd_present(pmd)) { + origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN; + pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; + pmd_val(pmd) |= *(unsigned long *) origin; } + return __pmd_to_pte(pmd); +} - pmd_val(*pmdp) = pte_val(pteval); +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pmd_t *pmdp = (pmd_t *) ptep; + pte_t pte = huge_ptep_get(ptep); + + if (MACHINE_HAS_IDTE) + __pmd_idte(addr, pmdp); + else + __pmd_csp(pmdp); + pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; + return pte; } int arch_prepare_hugepage(struct page *page) @@ -58,7 +164,7 @@ void arch_release_hugepage(struct page *page) ptep = (pte_t *) page[1].index; if (!ptep) return; - clear_table((unsigned long *) ptep, _PAGE_TYPE_EMPTY, + clear_table((unsigned long *) ptep, _PAGE_INVALID, PTRS_PER_PTE * sizeof(pte_t)); page_table_free(&init_mm, (unsigned long *) ptep); page[1].index = 0; diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 80adfbf75065..990397420e6b 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -118,7 +118,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) pte = pte_offset_kernel(pmd, address); if (!enable) { __ptep_ipte(address, pte); - pte_val(*pte) = _PAGE_TYPE_EMPTY; + pte_val(*pte) = _PAGE_INVALID; continue; } pte_val(*pte) = __pa(address); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index a8154a1a2c94..bf7c0dc64a76 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -161,7 +161,7 @@ static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) struct gmap_rmap *rmap; struct page *page; - if (*table & _SEGMENT_ENTRY_INV) + if (*table & _SEGMENT_ENTRY_INVALID) return 0; page = pfn_to_page(*table >> PAGE_SHIFT); mp = (struct gmap_pgtable *) page->index; @@ -172,7 +172,7 @@ static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) kfree(rmap); break; } - *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; + *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT; return 1; } @@ -258,7 +258,7 @@ static int gmap_alloc_table(struct gmap *gmap, return -ENOMEM; new = (unsigned long *) page_to_phys(page); crst_table_init(new, init); - if (*table & _REGION_ENTRY_INV) { + if (*table & _REGION_ENTRY_INVALID) { list_add(&page->lru, &gmap->crst_list); *table = (unsigned long) new | _REGION_ENTRY_LENGTH | (*table & _REGION_ENTRY_TYPE_MASK); @@ -292,22 +292,22 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) for (off = 0; off < len; off += PMD_SIZE) { /* Walk the guest addr space page table */ table = gmap->table + (((to + off) >> 53) & 0x7ff); - if (*table & _REGION_ENTRY_INV) + if (*table & _REGION_ENTRY_INVALID) goto out; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + (((to + off) >> 42) & 0x7ff); - if (*table & _REGION_ENTRY_INV) + if (*table & _REGION_ENTRY_INVALID) goto out; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + (((to + off) >> 31) & 0x7ff); - if (*table & _REGION_ENTRY_INV) + if (*table & _REGION_ENTRY_INVALID) goto out; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + (((to + off) >> 20) & 0x7ff); /* Clear segment table entry in guest address space. */ flush |= gmap_unlink_segment(gmap, table); - *table = _SEGMENT_ENTRY_INV; + *table = _SEGMENT_ENTRY_INVALID; } out: spin_unlock(&gmap->mm->page_table_lock); @@ -335,7 +335,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, if ((from | to | len) & (PMD_SIZE - 1)) return -EINVAL; - if (len == 0 || from + len > PGDIR_SIZE || + if (len == 0 || from + len > TASK_MAX_SIZE || from + len < from || to + len < to) return -EINVAL; @@ -345,17 +345,17 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, for (off = 0; off < len; off += PMD_SIZE) { /* Walk the gmap address space page table */ table = gmap->table + (((to + off) >> 53) & 0x7ff); - if ((*table & _REGION_ENTRY_INV) && + if ((*table & _REGION_ENTRY_INVALID) && gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) goto out_unmap; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + (((to + off) >> 42) & 0x7ff); - if ((*table & _REGION_ENTRY_INV) && + if ((*table & _REGION_ENTRY_INVALID) && gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY)) goto out_unmap; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + (((to + off) >> 31) & 0x7ff); - if ((*table & _REGION_ENTRY_INV) && + if ((*table & _REGION_ENTRY_INVALID) && gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY)) goto out_unmap; table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); @@ -363,7 +363,8 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, /* Store 'from' address in an invalid segment table entry. */ flush |= gmap_unlink_segment(gmap, table); - *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off); + *table = (from + off) | (_SEGMENT_ENTRY_INVALID | + _SEGMENT_ENTRY_PROTECT); } spin_unlock(&gmap->mm->page_table_lock); up_read(&gmap->mm->mmap_sem); @@ -384,15 +385,15 @@ static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap) unsigned long *table; table = gmap->table + ((address >> 53) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) + if (unlikely(*table & _REGION_ENTRY_INVALID)) return ERR_PTR(-EFAULT); table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + ((address >> 42) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) + if (unlikely(*table & _REGION_ENTRY_INVALID)) return ERR_PTR(-EFAULT); table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + ((address >> 31) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) + if (unlikely(*table & _REGION_ENTRY_INVALID)) return ERR_PTR(-EFAULT); table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + ((address >> 20) & 0x7ff); @@ -422,11 +423,11 @@ unsigned long __gmap_translate(unsigned long address, struct gmap *gmap) return PTR_ERR(segment_ptr); /* Convert the gmap address to an mm address. */ segment = *segment_ptr; - if (!(segment & _SEGMENT_ENTRY_INV)) { + if (!(segment & _SEGMENT_ENTRY_INVALID)) { page = pfn_to_page(segment >> PAGE_SHIFT); mp = (struct gmap_pgtable *) page->index; return mp->vmaddr | (address & ~PMD_MASK); - } else if (segment & _SEGMENT_ENTRY_RO) { + } else if (segment & _SEGMENT_ENTRY_PROTECT) { vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; return vmaddr | (address & ~PMD_MASK); } @@ -517,8 +518,8 @@ static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table) page = pfn_to_page(__pa(table) >> PAGE_SHIFT); mp = (struct gmap_pgtable *) page->index; list_for_each_entry_safe(rmap, next, &mp->mapper, list) { - *rmap->entry = - _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; + *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID | + _SEGMENT_ENTRY_PROTECT); list_del(&rmap->list); kfree(rmap); flush = 1; @@ -545,13 +546,13 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) /* Convert the gmap address to an mm address. */ while (1) { segment = *segment_ptr; - if (!(segment & _SEGMENT_ENTRY_INV)) { + if (!(segment & _SEGMENT_ENTRY_INVALID)) { /* Page table is present */ page = pfn_to_page(segment >> PAGE_SHIFT); mp = (struct gmap_pgtable *) page->index; return mp->vmaddr | (address & ~PMD_MASK); } - if (!(segment & _SEGMENT_ENTRY_RO)) + if (!(segment & _SEGMENT_ENTRY_PROTECT)) /* Nothing mapped in the gmap address space. */ break; rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap); @@ -586,25 +587,25 @@ void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) while (address < to) { /* Walk the gmap address space page table */ table = gmap->table + ((address >> 53) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) { + if (unlikely(*table & _REGION_ENTRY_INVALID)) { address = (address + PMD_SIZE) & PMD_MASK; continue; } table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + ((address >> 42) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) { + if (unlikely(*table & _REGION_ENTRY_INVALID)) { address = (address + PMD_SIZE) & PMD_MASK; continue; } table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + ((address >> 31) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) { + if (unlikely(*table & _REGION_ENTRY_INVALID)) { address = (address + PMD_SIZE) & PMD_MASK; continue; } table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + ((address >> 20) & 0x7ff); - if (unlikely(*table & _SEGMENT_ENTRY_INV)) { + if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) { address = (address + PMD_SIZE) & PMD_MASK; continue; } @@ -687,7 +688,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) continue; /* Set notification bit in the pgste of the pte */ entry = *ptep; - if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) { + if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { pgste = pgste_get_lock(ptep); pgste_val(pgste) |= PGSTE_IN_BIT; pgste_set_unlock(ptep, pgste); @@ -731,6 +732,11 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte) spin_unlock(&gmap_notifier_lock); } +static inline int page_table_with_pgste(struct page *page) +{ + return atomic_read(&page->_mapcount) == 0; +} + static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, unsigned long vmaddr) { @@ -750,10 +756,11 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, mp->vmaddr = vmaddr & PMD_MASK; INIT_LIST_HEAD(&mp->mapper); page->index = (unsigned long) mp; - atomic_set(&page->_mapcount, 3); + atomic_set(&page->_mapcount, 0); table = (unsigned long *) page_to_phys(page); - clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); - clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); + clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); + clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT, + PAGE_SIZE/2); return table; } @@ -791,26 +798,21 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; if (!(pte_val(*ptep) & _PAGE_INVALID)) { - unsigned long address, bits; - unsigned char skey; + unsigned long address, bits, skey; address = pte_val(*ptep) & PAGE_MASK; - skey = page_get_storage_key(address); + skey = (unsigned long) page_get_storage_key(address); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); + skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); /* Set storage key ACC and FP */ - page_set_storage_key(address, - (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)), - !nq); - + page_set_storage_key(address, skey, !nq); /* Merge host changed & referenced into pgste */ pgste_val(new) |= bits << 52; - /* Transfer skey changed & referenced bit to kvm user bits */ - pgste_val(new) |= bits << 45; /* PGSTE_UR_BIT & PGSTE_UC_BIT */ } /* changing the guest storage key is considered a change of the page */ if ((pgste_val(new) ^ pgste_val(old)) & (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) - pgste_val(new) |= PGSTE_UC_BIT; + pgste_val(new) |= PGSTE_HC_BIT; pgste_set_unlock(ptep, new); pte_unmap_unlock(*ptep, ptl); @@ -821,6 +823,11 @@ EXPORT_SYMBOL(set_guest_storage_key); #else /* CONFIG_PGSTE */ +static inline int page_table_with_pgste(struct page *page) +{ + return 0; +} + static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, unsigned long vmaddr) { @@ -878,7 +885,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) pgtable_page_ctor(page); atomic_set(&page->_mapcount, 1); table = (unsigned long *) page_to_phys(page); - clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); + clear_table(table, _PAGE_INVALID, PAGE_SIZE); spin_lock_bh(&mm->context.list_lock); list_add(&page->lru, &mm->context.pgtable_list); } else { @@ -897,12 +904,12 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) struct page *page; unsigned int bit, mask; - if (mm_has_pgste(mm)) { + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + if (page_table_with_pgste(page)) { gmap_disconnect_pgtable(mm, table); return page_table_free_pgste(table); } /* Free 1K/2K page table fragment of a 4K page */ - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); spin_lock_bh(&mm->context.list_lock); if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) @@ -940,14 +947,14 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) unsigned int bit, mask; mm = tlb->mm; - if (mm_has_pgste(mm)) { + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + if (page_table_with_pgste(page)) { gmap_disconnect_pgtable(mm, table); table = (unsigned long *) (__pa(table) | FRAG_MASK); tlb_remove_table(tlb, table); return; } bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); spin_lock_bh(&mm->context.list_lock); if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) list_del(&page->lru); @@ -1007,7 +1014,6 @@ void tlb_table_flush(struct mmu_gather *tlb) struct mmu_table_batch **batch = &tlb->batch; if (*batch) { - __tlb_flush_mm(tlb->mm); call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); *batch = NULL; } @@ -1017,11 +1023,12 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) { struct mmu_table_batch **batch = &tlb->batch; + tlb->mm->context.flush_mm = 1; if (*batch == NULL) { *batch = (struct mmu_table_batch *) __get_free_page(GFP_NOWAIT | __GFP_NOWARN); if (*batch == NULL) { - __tlb_flush_mm(tlb->mm); + __tlb_flush_mm_lazy(tlb->mm); tlb_remove_table_one(table); return; } @@ -1029,40 +1036,124 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) } (*batch)->tables[(*batch)->nr++] = table; if ((*batch)->nr == MAX_TABLE_BATCH) - tlb_table_flush(tlb); + tlb_flush_mmu(tlb); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE -void thp_split_vma(struct vm_area_struct *vma) +static inline void thp_split_vma(struct vm_area_struct *vma) { unsigned long addr; - struct page *page; - for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { - page = follow_page(vma, addr, FOLL_SPLIT); - } + for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) + follow_page(vma, addr, FOLL_SPLIT); } -void thp_split_mm(struct mm_struct *mm) +static inline void thp_split_mm(struct mm_struct *mm) { - struct vm_area_struct *vma = mm->mmap; + struct vm_area_struct *vma; - while (vma != NULL) { + for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { thp_split_vma(vma); vma->vm_flags &= ~VM_HUGEPAGE; vma->vm_flags |= VM_NOHUGEPAGE; - vma = vma->vm_next; } + mm->def_flags |= VM_NOHUGEPAGE; +} +#else +static inline void thp_split_mm(struct mm_struct *mm) +{ } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb, + struct mm_struct *mm, pud_t *pud, + unsigned long addr, unsigned long end) +{ + unsigned long next, *table, *new; + struct page *page; + pmd_t *pmd; + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); +again: + if (pmd_none_or_clear_bad(pmd)) + continue; + table = (unsigned long *) pmd_deref(*pmd); + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + if (page_table_with_pgste(page)) + continue; + /* Allocate new page table with pgstes */ + new = page_table_alloc_pgste(mm, addr); + if (!new) { + mm->context.has_pgste = 0; + continue; + } + spin_lock(&mm->page_table_lock); + if (likely((unsigned long *) pmd_deref(*pmd) == table)) { + /* Nuke pmd entry pointing to the "short" page table */ + pmdp_flush_lazy(mm, addr, pmd); + pmd_clear(pmd); + /* Copy ptes from old table to new table */ + memcpy(new, table, PAGE_SIZE/2); + clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); + /* Establish new table */ + pmd_populate(mm, pmd, (pte_t *) new); + /* Free old table with rcu, there might be a walker! */ + page_table_free_rcu(tlb, table); + new = NULL; + } + spin_unlock(&mm->page_table_lock); + if (new) { + page_table_free_pgste(new); + goto again; + } + } while (pmd++, addr = next, addr != end); + + return addr; +} + +static unsigned long page_table_realloc_pud(struct mmu_gather *tlb, + struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end) +{ + unsigned long next; + pud_t *pud; + + pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + if (pud_none_or_clear_bad(pud)) + continue; + next = page_table_realloc_pmd(tlb, mm, pud, addr, next); + } while (pud++, addr = next, addr != end); + + return addr; +} + +static void page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long addr, unsigned long end) +{ + unsigned long next; + pgd_t *pgd; + + pgd = pgd_offset(mm, addr); + do { + next = pgd_addr_end(addr, end); + if (pgd_none_or_clear_bad(pgd)) + continue; + next = page_table_realloc_pud(tlb, mm, pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + /* * switch on pgstes for its userspace process (for kvm) */ int s390_enable_sie(void) { struct task_struct *tsk = current; - struct mm_struct *mm, *old_mm; + struct mm_struct *mm = tsk->mm; + struct mmu_gather tlb; /* Do we have switched amode? If no, we cannot do sie */ if (s390_user_mode == HOME_SPACE_MODE) @@ -1072,57 +1163,16 @@ int s390_enable_sie(void) if (mm_has_pgste(tsk->mm)) return 0; - /* lets check if we are allowed to replace the mm */ - task_lock(tsk); - if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || -#ifdef CONFIG_AIO - !hlist_empty(&tsk->mm->ioctx_list) || -#endif - tsk->mm != tsk->active_mm) { - task_unlock(tsk); - return -EINVAL; - } - task_unlock(tsk); - - /* we copy the mm and let dup_mm create the page tables with_pgstes */ - tsk->mm->context.alloc_pgste = 1; - /* make sure that both mms have a correct rss state */ - sync_mm_rss(tsk->mm); - mm = dup_mm(tsk); - tsk->mm->context.alloc_pgste = 0; - if (!mm) - return -ENOMEM; - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE + down_write(&mm->mmap_sem); /* split thp mappings and disable thp for future mappings */ thp_split_mm(mm); - mm->def_flags |= VM_NOHUGEPAGE; -#endif - - /* Now lets check again if something happened */ - task_lock(tsk); - if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || -#ifdef CONFIG_AIO - !hlist_empty(&tsk->mm->ioctx_list) || -#endif - tsk->mm != tsk->active_mm) { - mmput(mm); - task_unlock(tsk); - return -EINVAL; - } - - /* ok, we are alone. No ptrace, no threads, etc. */ - old_mm = tsk->mm; - tsk->mm = tsk->active_mm = mm; - preempt_disable(); - update_mm(mm, tsk); - atomic_inc(&mm->context.attach_count); - atomic_dec(&old_mm->context.attach_count); - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); - preempt_enable(); - task_unlock(tsk); - mmput(old_mm); - return 0; + /* Reallocate the page tables with pgstes */ + mm->context.has_pgste = 1; + tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE); + page_table_realloc(&tlb, mm, 0, TASK_SIZE); + tlb_finish_mmu(&tlb, 0, TASK_SIZE); + up_write(&mm->mmap_sem); + return mm->context.has_pgste ? 0 : -ENOMEM; } EXPORT_SYMBOL_GPL(s390_enable_sie); @@ -1198,9 +1248,9 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) list_del(lh); } ptep = (pte_t *) pgtable; - pte_val(*ptep) = _PAGE_TYPE_EMPTY; + pte_val(*ptep) = _PAGE_INVALID; ptep++; - pte_val(*ptep) = _PAGE_TYPE_EMPTY; + pte_val(*ptep) = _PAGE_INVALID; return pgtable; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 8b268fcc4612..bcfb70b60be6 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -69,7 +69,7 @@ static pte_t __ref *vmem_pte_alloc(unsigned long address) pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t)); if (!pte) return NULL; - clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY, + clear_table((unsigned long *) pte, _PAGE_INVALID, PTRS_PER_PTE * sizeof(pte_t)); return pte; } @@ -101,7 +101,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) { pud_val(*pu_dir) = __pa(address) | _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE | - (ro ? _REGION_ENTRY_RO : 0); + (ro ? _REGION_ENTRY_PROTECT : 0); address += PUD_SIZE; continue; } @@ -118,7 +118,8 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) { pmd_val(*pm_dir) = __pa(address) | _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE | - (ro ? _SEGMENT_ENTRY_RO : 0); + _SEGMENT_ENTRY_YOUNG | + (ro ? _SEGMENT_ENTRY_PROTECT : 0); address += PMD_SIZE; continue; } @@ -131,7 +132,8 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) } pt_dir = pte_offset_kernel(pm_dir, address); - pte_val(*pt_dir) = __pa(address) | (ro ? _PAGE_RO : 0); + pte_val(*pt_dir) = __pa(address) | + pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL); address += PAGE_SIZE; } ret = 0; @@ -154,7 +156,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size) pte_t *pt_dir; pte_t pte; - pte_val(pte) = _PAGE_TYPE_EMPTY; + pte_val(pte) = _PAGE_INVALID; while (address < end) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { @@ -255,7 +257,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) new_page =__pa(vmem_alloc_pages(0)); if (!new_page) goto out; - pte_val(*pt_dir) = __pa(new_page); + pte_val(*pt_dir) = + __pa(new_page) | pgprot_val(PAGE_KERNEL); } address += PAGE_SIZE; } diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile index 086a2e37935d..a9e1dc4ae442 100644 --- a/arch/s390/pci/Makefile +++ b/arch/s390/pci/Makefile @@ -2,5 +2,5 @@ # Makefile for the s390 PCI subsystem. # -obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_msi.o pci_sysfs.o \ +obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_sysfs.o \ pci_event.o pci_debug.o pci_insn.o diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index e2956ad39a4f..f17a8343e360 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -42,45 +42,26 @@ #define SIC_IRQ_MODE_SINGLE 1 #define ZPCI_NR_DMA_SPACES 1 -#define ZPCI_MSI_VEC_BITS 6 #define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS /* list of all detected zpci devices */ -LIST_HEAD(zpci_list); -EXPORT_SYMBOL_GPL(zpci_list); -DEFINE_MUTEX(zpci_list_lock); -EXPORT_SYMBOL_GPL(zpci_list_lock); +static LIST_HEAD(zpci_list); +static DEFINE_SPINLOCK(zpci_list_lock); -static struct pci_hp_callback_ops *hotplug_ops; +static void zpci_enable_irq(struct irq_data *data); +static void zpci_disable_irq(struct irq_data *data); -static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES); -static DEFINE_SPINLOCK(zpci_domain_lock); - -struct callback { - irq_handler_t handler; - void *data; +static struct irq_chip zpci_irq_chip = { + .name = "zPCI", + .irq_unmask = zpci_enable_irq, + .irq_mask = zpci_disable_irq, }; -struct zdev_irq_map { - unsigned long aibv; /* AI bit vector */ - int msi_vecs; /* consecutive MSI-vectors used */ - int __unused; - struct callback cb[ZPCI_NR_MSI_VECS]; /* callback handler array */ - spinlock_t lock; /* protect callbacks against de-reg */ -}; - -struct intr_bucket { - /* amap of adapters, one bit per dev, corresponds to one irq nr */ - unsigned long *alloc; - /* AI summary bit, global page for all devices */ - unsigned long *aisb; - /* pointer to aibv and callback data in zdev */ - struct zdev_irq_map *imap[ZPCI_NR_DEVICES]; - /* protects the whole bucket struct */ - spinlock_t lock; -}; +static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES); +static DEFINE_SPINLOCK(zpci_domain_lock); -static struct intr_bucket *bucket; +static struct airq_iv *zpci_aisb_iv; +static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES]; /* Adapter interrupt definitions */ static void zpci_irq_handler(struct airq_struct *airq); @@ -96,27 +77,8 @@ static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES); struct zpci_iomap_entry *zpci_iomap_start; EXPORT_SYMBOL_GPL(zpci_iomap_start); -/* highest irq summary bit */ -static int __read_mostly aisb_max; - -static struct kmem_cache *zdev_irq_cache; static struct kmem_cache *zdev_fmb_cache; -static inline int irq_to_msi_nr(unsigned int irq) -{ - return irq & ZPCI_MSI_MASK; -} - -static inline int irq_to_dev_nr(unsigned int irq) -{ - return irq >> ZPCI_MSI_VEC_BITS; -} - -static inline struct zdev_irq_map *get_imap(unsigned int irq) -{ - return bucket->imap[irq_to_dev_nr(irq)]; -} - struct zpci_dev *get_zdev(struct pci_dev *pdev) { return (struct zpci_dev *) pdev->sysdata; @@ -126,22 +88,17 @@ struct zpci_dev *get_zdev_by_fid(u32 fid) { struct zpci_dev *tmp, *zdev = NULL; - mutex_lock(&zpci_list_lock); + spin_lock(&zpci_list_lock); list_for_each_entry(tmp, &zpci_list, entry) { if (tmp->fid == fid) { zdev = tmp; break; } } - mutex_unlock(&zpci_list_lock); + spin_unlock(&zpci_list_lock); return zdev; } -bool zpci_fid_present(u32 fid) -{ - return (get_zdev_by_fid(fid) != NULL) ? true : false; -} - static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus) { return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL; @@ -160,8 +117,7 @@ int pci_proc_domain(struct pci_bus *bus) EXPORT_SYMBOL_GPL(pci_proc_domain); /* Modify PCI: Register adapter interruptions */ -static int zpci_register_airq(struct zpci_dev *zdev, unsigned int aisb, - u64 aibv) +static int zpci_set_airq(struct zpci_dev *zdev) { u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT); struct zpci_fib *fib; @@ -172,14 +128,14 @@ static int zpci_register_airq(struct zpci_dev *zdev, unsigned int aisb, return -ENOMEM; fib->isc = PCI_ISC; - fib->noi = zdev->irq_map->msi_vecs; fib->sum = 1; /* enable summary notifications */ - fib->aibv = aibv; - fib->aibvo = 0; /* every function has its own page */ - fib->aisb = (u64) bucket->aisb + aisb / 8; - fib->aisbo = aisb & ZPCI_MSI_MASK; + fib->noi = airq_iv_end(zdev->aibv); + fib->aibv = (unsigned long) zdev->aibv->vector; + fib->aibvo = 0; /* each zdev has its own interrupt vector */ + fib->aisb = (unsigned long) zpci_aisb_iv->vector + (zdev->aisb/64)*8; + fib->aisbo = zdev->aisb & 63; - rc = s390pci_mod_fc(req, fib); + rc = zpci_mod_fc(req, fib); pr_debug("%s mpcifc returned noi: %d\n", __func__, fib->noi); free_page((unsigned long) fib); @@ -209,7 +165,7 @@ static int mod_pci(struct zpci_dev *zdev, int fn, u8 dmaas, struct mod_pci_args fib->iota = args->iota; fib->fmb_addr = args->fmb_addr; - rc = s390pci_mod_fc(req, fib); + rc = zpci_mod_fc(req, fib); free_page((unsigned long) fib); return rc; } @@ -234,7 +190,7 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas) } /* Modify PCI: Unregister adapter interruptions */ -static int zpci_unregister_airq(struct zpci_dev *zdev) +static int zpci_clear_airq(struct zpci_dev *zdev) { struct mod_pci_args args = { 0, 0, 0, 0 }; @@ -283,7 +239,7 @@ static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len) u64 data; int rc; - rc = s390pci_load(&data, req, offset); + rc = zpci_load(&data, req, offset); if (!rc) { data = data << ((8 - len) * 8); data = le64_to_cpu(data); @@ -301,25 +257,46 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len) data = cpu_to_le64(data); data = data >> ((8 - len) * 8); - rc = s390pci_store(data, req, offset); + rc = zpci_store(data, req, offset); return rc; } -void enable_irq(unsigned int irq) +static int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag) +{ + int offset, pos; + u32 mask_bits; + + if (msi->msi_attrib.is_msix) { + offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_VECTOR_CTRL; + msi->masked = readl(msi->mask_base + offset); + writel(flag, msi->mask_base + offset); + } else if (msi->msi_attrib.maskbit) { + pos = (long) msi->mask_base; + pci_read_config_dword(msi->dev, pos, &mask_bits); + mask_bits &= ~(mask); + mask_bits |= flag & mask; + pci_write_config_dword(msi->dev, pos, mask_bits); + } else + return 0; + + msi->msi_attrib.maskbit = !!flag; + return 1; +} + +static void zpci_enable_irq(struct irq_data *data) { - struct msi_desc *msi = irq_get_msi_desc(irq); + struct msi_desc *msi = irq_get_msi_desc(data->irq); zpci_msi_set_mask_bits(msi, 1, 0); } -EXPORT_SYMBOL_GPL(enable_irq); -void disable_irq(unsigned int irq) +static void zpci_disable_irq(struct irq_data *data) { - struct msi_desc *msi = irq_get_msi_desc(irq); + struct msi_desc *msi = irq_get_msi_desc(data->irq); zpci_msi_set_mask_bits(msi, 1, 1); } -EXPORT_SYMBOL_GPL(disable_irq); void pcibios_fixup_bus(struct pci_bus *bus) { @@ -404,152 +381,147 @@ static struct pci_ops pci_root_ops = { .write = pci_write, }; -/* store the last handled bit to implement fair scheduling of devices */ -static DEFINE_PER_CPU(unsigned long, next_sbit); - static void zpci_irq_handler(struct airq_struct *airq) { - unsigned long sbit, mbit, last = 0, start = __get_cpu_var(next_sbit); - int rescan = 0, max = aisb_max; - struct zdev_irq_map *imap; + unsigned long si, ai; + struct airq_iv *aibv; + int irqs_on = 0; inc_irq_stat(IRQIO_PCI); - sbit = start; - -scan: - /* find summary_bit */ - for_each_set_bit_left_cont(sbit, bucket->aisb, max) { - clear_bit(63 - (sbit & 63), bucket->aisb + (sbit >> 6)); - last = sbit; + for (si = 0;;) { + /* Scan adapter summary indicator bit vector */ + si = airq_iv_scan(zpci_aisb_iv, si, airq_iv_end(zpci_aisb_iv)); + if (si == -1UL) { + if (irqs_on++) + /* End of second scan with interrupts on. */ + break; + /* First scan complete, reenable interrupts. */ + zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); + si = 0; + continue; + } - /* find vector bit */ - imap = bucket->imap[sbit]; - for_each_set_bit_left(mbit, &imap->aibv, imap->msi_vecs) { + /* Scan the adapter interrupt vector for this device. */ + aibv = zpci_aibv[si]; + for (ai = 0;;) { + ai = airq_iv_scan(aibv, ai, airq_iv_end(aibv)); + if (ai == -1UL) + break; inc_irq_stat(IRQIO_MSI); - clear_bit(63 - mbit, &imap->aibv); - - spin_lock(&imap->lock); - if (imap->cb[mbit].handler) - imap->cb[mbit].handler(mbit, - imap->cb[mbit].data); - spin_unlock(&imap->lock); + airq_iv_lock(aibv, ai); + generic_handle_irq(airq_iv_get_data(aibv, ai)); + airq_iv_unlock(aibv, ai); } } - - if (rescan) - goto out; - - /* scan the skipped bits */ - if (start > 0) { - sbit = 0; - max = start; - start = 0; - goto scan; - } - - /* enable interrupts again */ - set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); - - /* check again to not lose initiative */ - rmb(); - max = aisb_max; - sbit = find_first_bit_left(bucket->aisb, max); - if (sbit != max) { - rescan++; - goto scan; - } -out: - /* store next device bit to scan */ - __get_cpu_var(next_sbit) = (++last >= aisb_max) ? 0 : last; } -/* msi_vecs - number of requested interrupts, 0 place function to error state */ -static int zpci_setup_msi(struct pci_dev *pdev, int msi_vecs) +int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { struct zpci_dev *zdev = get_zdev(pdev); - unsigned int aisb, msi_nr; + unsigned int hwirq, irq, msi_vecs; + unsigned long aisb; struct msi_desc *msi; + struct msi_msg msg; int rc; - /* store the number of used MSI vectors */ - zdev->irq_map->msi_vecs = min(msi_vecs, ZPCI_NR_MSI_VECS); - - spin_lock(&bucket->lock); - aisb = find_first_zero_bit(bucket->alloc, PAGE_SIZE); - /* alloc map exhausted? */ - if (aisb == PAGE_SIZE) { - spin_unlock(&bucket->lock); - return -EIO; - } - set_bit(aisb, bucket->alloc); - spin_unlock(&bucket->lock); + pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec); + if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI) + return -EINVAL; + msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX); + msi_vecs = min_t(unsigned int, msi_vecs, CONFIG_PCI_NR_MSI); + /* Allocate adapter summary indicator bit */ + rc = -EIO; + aisb = airq_iv_alloc_bit(zpci_aisb_iv); + if (aisb == -1UL) + goto out; zdev->aisb = aisb; - if (aisb + 1 > aisb_max) - aisb_max = aisb + 1; - /* wire up IRQ shortcut pointer */ - bucket->imap[zdev->aisb] = zdev->irq_map; - pr_debug("%s: imap[%u] linked to %p\n", __func__, zdev->aisb, zdev->irq_map); + /* Create adapter interrupt vector */ + rc = -ENOMEM; + zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK); + if (!zdev->aibv) + goto out_si; - /* TODO: irq number 0 wont be found if we return less than requested MSIs. - * ignore it for now and fix in common code. - */ - msi_nr = aisb << ZPCI_MSI_VEC_BITS; + /* Wire up shortcut pointer */ + zpci_aibv[aisb] = zdev->aibv; + /* Request MSI interrupts */ + hwirq = 0; list_for_each_entry(msi, &pdev->msi_list, list) { - rc = zpci_setup_msi_irq(zdev, msi, msi_nr, - aisb << ZPCI_MSI_VEC_BITS); + rc = -EIO; + irq = irq_alloc_desc(0); /* Alloc irq on node 0 */ + if (irq == NO_IRQ) + goto out_msi; + rc = irq_set_msi_desc(irq, msi); if (rc) - return rc; - msi_nr++; + goto out_msi; + irq_set_chip_and_handler(irq, &zpci_irq_chip, + handle_simple_irq); + msg.data = hwirq; + msg.address_lo = zdev->msi_addr & 0xffffffff; + msg.address_hi = zdev->msi_addr >> 32; + write_msi_msg(irq, &msg); + airq_iv_set_data(zdev->aibv, hwirq, irq); + hwirq++; } - rc = zpci_register_airq(zdev, aisb, (u64) &zdev->irq_map->aibv); - if (rc) { - clear_bit(aisb, bucket->alloc); - dev_err(&pdev->dev, "register MSI failed with: %d\n", rc); - return rc; + /* Enable adapter interrupts */ + rc = zpci_set_airq(zdev); + if (rc) + goto out_msi; + + return (msi_vecs == nvec) ? 0 : msi_vecs; + +out_msi: + list_for_each_entry(msi, &pdev->msi_list, list) { + if (hwirq-- == 0) + break; + irq_set_msi_desc(msi->irq, NULL); + irq_free_desc(msi->irq); + msi->msg.address_lo = 0; + msi->msg.address_hi = 0; + msi->msg.data = 0; + msi->irq = 0; } - return (zdev->irq_map->msi_vecs == msi_vecs) ? - 0 : zdev->irq_map->msi_vecs; + zpci_aibv[aisb] = NULL; + airq_iv_release(zdev->aibv); +out_si: + airq_iv_free_bit(zpci_aisb_iv, aisb); +out: + dev_err(&pdev->dev, "register MSI failed with: %d\n", rc); + return rc; } -static void zpci_teardown_msi(struct pci_dev *pdev) +void arch_teardown_msi_irqs(struct pci_dev *pdev) { struct zpci_dev *zdev = get_zdev(pdev); struct msi_desc *msi; - int aisb, rc; + int rc; - rc = zpci_unregister_airq(zdev); + pr_info("%s: on pdev: %p\n", __func__, pdev); + + /* Disable adapter interrupts */ + rc = zpci_clear_airq(zdev); if (rc) { dev_err(&pdev->dev, "deregister MSI failed with: %d\n", rc); return; } - msi = list_first_entry(&pdev->msi_list, struct msi_desc, list); - aisb = irq_to_dev_nr(msi->irq); - - list_for_each_entry(msi, &pdev->msi_list, list) - zpci_teardown_msi_irq(zdev, msi); - - clear_bit(aisb, bucket->alloc); - if (aisb + 1 == aisb_max) - aisb_max--; -} - -int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) -{ - pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec); - if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI) - return -EINVAL; - return zpci_setup_msi(pdev, nvec); -} + /* Release MSI interrupts */ + list_for_each_entry(msi, &pdev->msi_list, list) { + zpci_msi_set_mask_bits(msi, 1, 1); + irq_set_msi_desc(msi->irq, NULL); + irq_free_desc(msi->irq); + msi->msg.address_lo = 0; + msi->msg.address_hi = 0; + msi->msg.data = 0; + msi->irq = 0; + } -void arch_teardown_msi_irqs(struct pci_dev *pdev) -{ - pr_info("%s: on pdev: %p\n", __func__, pdev); - zpci_teardown_msi(pdev); + zpci_aibv[zdev->aisb] = NULL; + airq_iv_release(zdev->aibv); + airq_iv_free_bit(zpci_aisb_iv, zdev->aisb); } static void zpci_map_resources(struct zpci_dev *zdev) @@ -564,8 +536,6 @@ static void zpci_map_resources(struct zpci_dev *zdev) continue; pdev->resource[i].start = (resource_size_t) pci_iomap(pdev, i, 0); pdev->resource[i].end = pdev->resource[i].start + len - 1; - pr_debug("BAR%i: -> start: %Lx end: %Lx\n", - i, pdev->resource[i].start, pdev->resource[i].end); } } @@ -589,162 +559,47 @@ struct zpci_dev *zpci_alloc_device(void) /* Alloc memory for our private pci device data */ zdev = kzalloc(sizeof(*zdev), GFP_KERNEL); - if (!zdev) - return ERR_PTR(-ENOMEM); - - /* Alloc aibv & callback space */ - zdev->irq_map = kmem_cache_zalloc(zdev_irq_cache, GFP_KERNEL); - if (!zdev->irq_map) - goto error; - WARN_ON((u64) zdev->irq_map & 0xff); - return zdev; - -error: - kfree(zdev); - return ERR_PTR(-ENOMEM); + return zdev ? : ERR_PTR(-ENOMEM); } void zpci_free_device(struct zpci_dev *zdev) { - kmem_cache_free(zdev_irq_cache, zdev->irq_map); kfree(zdev); } -/* - * Too late for any s390 specific setup, since interrupts must be set up - * already which requires DMA setup too and the pci scan will access the - * config space, which only works if the function handle is enabled. - */ -int pcibios_enable_device(struct pci_dev *pdev, int mask) -{ - struct resource *res; - u16 cmd; - int i; - - pci_read_config_word(pdev, PCI_COMMAND, &cmd); - - for (i = 0; i < PCI_BAR_COUNT; i++) { - res = &pdev->resource[i]; - - if (res->flags & IORESOURCE_IO) - return -EINVAL; - - if (res->flags & IORESOURCE_MEM) - cmd |= PCI_COMMAND_MEMORY; - } - pci_write_config_word(pdev, PCI_COMMAND, cmd); - return 0; -} - int pcibios_add_platform_entries(struct pci_dev *pdev) { return zpci_sysfs_add_device(&pdev->dev); } -int zpci_request_irq(unsigned int irq, irq_handler_t handler, void *data) -{ - int msi_nr = irq_to_msi_nr(irq); - struct zdev_irq_map *imap; - struct msi_desc *msi; - - msi = irq_get_msi_desc(irq); - if (!msi) - return -EIO; - - imap = get_imap(irq); - spin_lock_init(&imap->lock); - - pr_debug("%s: register handler for IRQ:MSI %d:%d\n", __func__, irq >> 6, msi_nr); - imap->cb[msi_nr].handler = handler; - imap->cb[msi_nr].data = data; - - /* - * The generic MSI code returns with the interrupt disabled on the - * card, using the MSI mask bits. Firmware doesn't appear to unmask - * at that level, so we do it here by hand. - */ - zpci_msi_set_mask_bits(msi, 1, 0); - return 0; -} - -void zpci_free_irq(unsigned int irq) -{ - struct zdev_irq_map *imap = get_imap(irq); - int msi_nr = irq_to_msi_nr(irq); - unsigned long flags; - - pr_debug("%s: for irq: %d\n", __func__, irq); - - spin_lock_irqsave(&imap->lock, flags); - imap->cb[msi_nr].handler = NULL; - imap->cb[msi_nr].data = NULL; - spin_unlock_irqrestore(&imap->lock, flags); -} - -int request_irq(unsigned int irq, irq_handler_t handler, - unsigned long irqflags, const char *devname, void *dev_id) -{ - pr_debug("%s: irq: %d handler: %p flags: %lx dev: %s\n", - __func__, irq, handler, irqflags, devname); - - return zpci_request_irq(irq, handler, dev_id); -} -EXPORT_SYMBOL_GPL(request_irq); - -void free_irq(unsigned int irq, void *dev_id) -{ - zpci_free_irq(irq); -} -EXPORT_SYMBOL_GPL(free_irq); - static int __init zpci_irq_init(void) { - int cpu, rc; - - bucket = kzalloc(sizeof(*bucket), GFP_KERNEL); - if (!bucket) - return -ENOMEM; - - bucket->aisb = (unsigned long *) get_zeroed_page(GFP_KERNEL); - if (!bucket->aisb) { - rc = -ENOMEM; - goto out_aisb; - } - - bucket->alloc = (unsigned long *) get_zeroed_page(GFP_KERNEL); - if (!bucket->alloc) { - rc = -ENOMEM; - goto out_alloc; - } + int rc; rc = register_adapter_interrupt(&zpci_airq); if (rc) - goto out_ai; + goto out; /* Set summary to 1 to be called every time for the ISC. */ *zpci_airq.lsi_ptr = 1; - for_each_online_cpu(cpu) - per_cpu(next_sbit, cpu) = 0; + rc = -ENOMEM; + zpci_aisb_iv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC); + if (!zpci_aisb_iv) + goto out_airq; - spin_lock_init(&bucket->lock); - set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); + zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); return 0; -out_ai: - free_page((unsigned long) bucket->alloc); -out_alloc: - free_page((unsigned long) bucket->aisb); -out_aisb: - kfree(bucket); +out_airq: + unregister_adapter_interrupt(&zpci_airq); +out: return rc; } static void zpci_irq_exit(void) { - free_page((unsigned long) bucket->alloc); - free_page((unsigned long) bucket->aisb); + airq_iv_release(zpci_aisb_iv); unregister_adapter_interrupt(&zpci_airq); - kfree(bucket); } static struct resource *zpci_alloc_bus_resource(unsigned long start, unsigned long size, @@ -801,16 +656,49 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry) int pcibios_add_device(struct pci_dev *pdev) { struct zpci_dev *zdev = get_zdev(pdev); + struct resource *res; + int i; + + zdev->pdev = pdev; + zpci_map_resources(zdev); + + for (i = 0; i < PCI_BAR_COUNT; i++) { + res = &pdev->resource[i]; + if (res->parent || !res->flags) + continue; + pci_claim_resource(pdev, i); + } + + return 0; +} + +int pcibios_enable_device(struct pci_dev *pdev, int mask) +{ + struct zpci_dev *zdev = get_zdev(pdev); + struct resource *res; + u16 cmd; + int i; zdev->pdev = pdev; zpci_debug_init_device(zdev); zpci_fmb_enable_device(zdev); zpci_map_resources(zdev); + pci_read_config_word(pdev, PCI_COMMAND, &cmd); + for (i = 0; i < PCI_BAR_COUNT; i++) { + res = &pdev->resource[i]; + + if (res->flags & IORESOURCE_IO) + return -EINVAL; + + if (res->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + pci_write_config_word(pdev, PCI_COMMAND, cmd); return 0; } -void pcibios_release_device(struct pci_dev *pdev) +void pcibios_disable_device(struct pci_dev *pdev) { struct zpci_dev *zdev = get_zdev(pdev); @@ -898,6 +786,8 @@ int zpci_enable_device(struct zpci_dev *zdev) rc = zpci_dma_init_device(zdev); if (rc) goto out_dma; + + zdev->state = ZPCI_FN_STATE_ONLINE; return 0; out_dma: @@ -926,18 +816,16 @@ int zpci_create_device(struct zpci_dev *zdev) rc = zpci_enable_device(zdev); if (rc) goto out_free; - - zdev->state = ZPCI_FN_STATE_ONLINE; } rc = zpci_scan_bus(zdev); if (rc) goto out_disable; - mutex_lock(&zpci_list_lock); + spin_lock(&zpci_list_lock); list_add_tail(&zdev->entry, &zpci_list); - if (hotplug_ops) - hotplug_ops->create_slot(zdev); - mutex_unlock(&zpci_list_lock); + spin_unlock(&zpci_list_lock); + + zpci_init_slot(zdev); return 0; @@ -967,15 +855,10 @@ static inline int barsize(u8 size) static int zpci_mem_init(void) { - zdev_irq_cache = kmem_cache_create("PCI_IRQ_cache", sizeof(struct zdev_irq_map), - L1_CACHE_BYTES, SLAB_HWCACHE_ALIGN, NULL); - if (!zdev_irq_cache) - goto error_zdev; - zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb), 16, 0, NULL); if (!zdev_fmb_cache) - goto error_fmb; + goto error_zdev; /* TODO: use realloc */ zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start), @@ -986,8 +869,6 @@ static int zpci_mem_init(void) error_iomap: kmem_cache_destroy(zdev_fmb_cache); -error_fmb: - kmem_cache_destroy(zdev_irq_cache); error_zdev: return -ENOMEM; } @@ -995,28 +876,10 @@ error_zdev: static void zpci_mem_exit(void) { kfree(zpci_iomap_start); - kmem_cache_destroy(zdev_irq_cache); kmem_cache_destroy(zdev_fmb_cache); } -void zpci_register_hp_ops(struct pci_hp_callback_ops *ops) -{ - mutex_lock(&zpci_list_lock); - hotplug_ops = ops; - mutex_unlock(&zpci_list_lock); -} -EXPORT_SYMBOL_GPL(zpci_register_hp_ops); - -void zpci_deregister_hp_ops(void) -{ - mutex_lock(&zpci_list_lock); - hotplug_ops = NULL; - mutex_unlock(&zpci_list_lock); -} -EXPORT_SYMBOL_GPL(zpci_deregister_hp_ops); - -unsigned int s390_pci_probe; -EXPORT_SYMBOL_GPL(s390_pci_probe); +static unsigned int s390_pci_probe; char * __init pcibios_setup(char *str) { @@ -1044,16 +907,12 @@ static int __init pci_base_init(void) rc = zpci_debug_init(); if (rc) - return rc; + goto out; rc = zpci_mem_init(); if (rc) goto out_mem; - rc = zpci_msihash_init(); - if (rc) - goto out_hash; - rc = zpci_irq_init(); if (rc) goto out_irq; @@ -1062,7 +921,7 @@ static int __init pci_base_init(void) if (rc) goto out_dma; - rc = clp_find_pci_devices(); + rc = clp_scan_pci_devices(); if (rc) goto out_find; @@ -1073,11 +932,15 @@ out_find: out_dma: zpci_irq_exit(); out_irq: - zpci_msihash_exit(); -out_hash: zpci_mem_exit(); out_mem: zpci_debug_exit(); +out: return rc; } -subsys_initcall(pci_base_init); +subsys_initcall_sync(pci_base_init); + +void zpci_rescan(void) +{ + clp_rescan_pci_devices_simple(); +} diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 2e9539625d93..475563c3d1e4 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -36,9 +36,9 @@ static inline u8 clp_instr(void *data) return cc; } -static void *clp_alloc_block(void) +static void *clp_alloc_block(gfp_t gfp_mask) { - return (void *) __get_free_pages(GFP_KERNEL, get_order(CLP_BLK_SIZE)); + return (void *) __get_free_pages(gfp_mask, get_order(CLP_BLK_SIZE)); } static void clp_free_block(void *ptr) @@ -70,7 +70,7 @@ static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid) struct clp_req_rsp_query_pci_grp *rrb; int rc; - rrb = clp_alloc_block(); + rrb = clp_alloc_block(GFP_KERNEL); if (!rrb) return -ENOMEM; @@ -113,7 +113,7 @@ static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh) struct clp_req_rsp_query_pci *rrb; int rc; - rrb = clp_alloc_block(); + rrb = clp_alloc_block(GFP_KERNEL); if (!rrb) return -ENOMEM; @@ -179,9 +179,9 @@ error: static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) { struct clp_req_rsp_set_pci *rrb; - int rc, retries = 1000; + int rc, retries = 100; - rrb = clp_alloc_block(); + rrb = clp_alloc_block(GFP_KERNEL); if (!rrb) return -ENOMEM; @@ -199,7 +199,7 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) retries--; if (retries < 0) break; - msleep(1); + msleep(20); } } while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY); @@ -245,49 +245,12 @@ int clp_disable_fh(struct zpci_dev *zdev) return rc; } -static void clp_check_pcifn_entry(struct clp_fh_list_entry *entry) +static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, + void (*cb)(struct clp_fh_list_entry *entry)) { - int present, rc; - - if (!entry->vendor_id) - return; - - /* TODO: be a little bit more scalable */ - present = zpci_fid_present(entry->fid); - - if (present) - pr_debug("%s: device %x already present\n", __func__, entry->fid); - - /* skip already used functions */ - if (present && entry->config_state) - return; - - /* aev 306: function moved to stand-by state */ - if (present && !entry->config_state) { - /* - * The handle is already disabled, that means no iota/irq freeing via - * the firmware interfaces anymore. Need to free resources manually - * (DMA memory, debug, sysfs)... - */ - zpci_stop_device(get_zdev_by_fid(entry->fid)); - return; - } - - rc = clp_add_pci_device(entry->fid, entry->fh, entry->config_state); - if (rc) - pr_err("Failed to add fid: 0x%x\n", entry->fid); -} - -int clp_find_pci_devices(void) -{ - struct clp_req_rsp_list_pci *rrb; u64 resume_token = 0; int entries, i, rc; - rrb = clp_alloc_block(); - if (!rrb) - return -ENOMEM; - do { memset(rrb, 0, sizeof(*rrb)); rrb->request.hdr.len = sizeof(rrb->request); @@ -316,12 +279,101 @@ int clp_find_pci_devices(void) resume_token = rrb->response.resume_token; for (i = 0; i < entries; i++) - clp_check_pcifn_entry(&rrb->response.fh_list[i]); + cb(&rrb->response.fh_list[i]); } while (resume_token); pr_debug("Maximum number of supported PCI functions: %u\n", rrb->response.max_fn); out: + return rc; +} + +static void __clp_add(struct clp_fh_list_entry *entry) +{ + if (!entry->vendor_id) + return; + + clp_add_pci_device(entry->fid, entry->fh, entry->config_state); +} + +static void __clp_rescan(struct clp_fh_list_entry *entry) +{ + struct zpci_dev *zdev; + + if (!entry->vendor_id) + return; + + zdev = get_zdev_by_fid(entry->fid); + if (!zdev) { + clp_add_pci_device(entry->fid, entry->fh, entry->config_state); + return; + } + + if (!entry->config_state) { + /* + * The handle is already disabled, that means no iota/irq freeing via + * the firmware interfaces anymore. Need to free resources manually + * (DMA memory, debug, sysfs)... + */ + zpci_stop_device(zdev); + } +} + +static void __clp_update(struct clp_fh_list_entry *entry) +{ + struct zpci_dev *zdev; + + if (!entry->vendor_id) + return; + + zdev = get_zdev_by_fid(entry->fid); + if (!zdev) + return; + + zdev->fh = entry->fh; +} + +int clp_scan_pci_devices(void) +{ + struct clp_req_rsp_list_pci *rrb; + int rc; + + rrb = clp_alloc_block(GFP_KERNEL); + if (!rrb) + return -ENOMEM; + + rc = clp_list_pci(rrb, __clp_add); + + clp_free_block(rrb); + return rc; +} + +int clp_rescan_pci_devices(void) +{ + struct clp_req_rsp_list_pci *rrb; + int rc; + + rrb = clp_alloc_block(GFP_KERNEL); + if (!rrb) + return -ENOMEM; + + rc = clp_list_pci(rrb, __clp_rescan); + + clp_free_block(rrb); + return rc; +} + +int clp_rescan_pci_devices_simple(void) +{ + struct clp_req_rsp_list_pci *rrb; + int rc; + + rrb = clp_alloc_block(GFP_NOWAIT); + if (!rrb) + return -ENOMEM; + + rc = clp_list_pci(rrb, __clp_update); + clp_free_block(rrb); return rc; } diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index a2343c1f6e04..7e5573acb063 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -10,6 +10,7 @@ #include <linux/export.h> #include <linux/iommu-helper.h> #include <linux/dma-mapping.h> +#include <linux/vmalloc.h> #include <linux/pci.h> #include <asm/pci_dma.h> @@ -170,8 +171,8 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, */ goto no_refresh; - rc = s390pci_refresh_trans((u64) zdev->fh << 32, start_dma_addr, - nr_pages * PAGE_SIZE); + rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr, + nr_pages * PAGE_SIZE); no_refresh: spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags); @@ -407,7 +408,6 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int zpci_dma_init_device(struct zpci_dev *zdev) { - unsigned int bitmap_order; int rc; spin_lock_init(&zdev->iommu_bitmap_lock); @@ -421,12 +421,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev) zdev->iommu_size = (unsigned long) high_memory - PAGE_OFFSET; zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT; - bitmap_order = get_order(zdev->iommu_pages / 8); - pr_info("iommu_size: 0x%lx iommu_pages: 0x%lx bitmap_order: %i\n", - zdev->iommu_size, zdev->iommu_pages, bitmap_order); - - zdev->iommu_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, - bitmap_order); + zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8); if (!zdev->iommu_bitmap) { rc = -ENOMEM; goto out_reg; @@ -451,8 +446,7 @@ void zpci_dma_exit_device(struct zpci_dev *zdev) { zpci_unregister_ioat(zdev, 0); dma_cleanup_tables(zdev); - free_pages((unsigned long) zdev->iommu_bitmap, - get_order(zdev->iommu_pages / 8)); + vfree(zdev->iommu_bitmap); zdev->iommu_bitmap = NULL; zdev->next_bit = 0; } diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index ec62e3a0dc09..0aecaf954845 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -69,7 +69,7 @@ static void zpci_event_log_avail(struct zpci_ccdf_avail *ccdf) clp_add_pci_device(ccdf->fid, ccdf->fh, 0); break; case 0x0306: - clp_find_pci_devices(); + clp_rescan_pci_devices(); break; default: break; diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index 22eeb9d7ffeb..85267c058af8 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -27,7 +27,7 @@ static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status) return cc; } -int s390pci_mod_fc(u64 req, struct zpci_fib *fib) +int zpci_mod_fc(u64 req, struct zpci_fib *fib) { u8 cc, status; @@ -61,7 +61,7 @@ static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status) return cc; } -int s390pci_refresh_trans(u64 fn, u64 addr, u64 range) +int zpci_refresh_trans(u64 fn, u64 addr, u64 range) { u8 cc, status; @@ -78,7 +78,7 @@ int s390pci_refresh_trans(u64 fn, u64 addr, u64 range) } /* Set Interruption Controls */ -void set_irq_ctrl(u16 ctl, char *unused, u8 isc) +void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc) { asm volatile ( " .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n" @@ -109,7 +109,7 @@ static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status) return cc; } -int s390pci_load(u64 *data, u64 req, u64 offset) +int zpci_load(u64 *data, u64 req, u64 offset) { u8 status; int cc; @@ -125,7 +125,7 @@ int s390pci_load(u64 *data, u64 req, u64 offset) __func__, cc, status, req, offset); return (cc > 0) ? -EIO : cc; } -EXPORT_SYMBOL_GPL(s390pci_load); +EXPORT_SYMBOL_GPL(zpci_load); /* PCI Store */ static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status) @@ -147,7 +147,7 @@ static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status) return cc; } -int s390pci_store(u64 data, u64 req, u64 offset) +int zpci_store(u64 data, u64 req, u64 offset) { u8 status; int cc; @@ -163,7 +163,7 @@ int s390pci_store(u64 data, u64 req, u64 offset) __func__, cc, status, req, offset); return (cc > 0) ? -EIO : cc; } -EXPORT_SYMBOL_GPL(s390pci_store); +EXPORT_SYMBOL_GPL(zpci_store); /* PCI Store Block */ static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status) @@ -183,7 +183,7 @@ static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status) return cc; } -int s390pci_store_block(const u64 *data, u64 req, u64 offset) +int zpci_store_block(const u64 *data, u64 req, u64 offset) { u8 status; int cc; @@ -199,4 +199,4 @@ int s390pci_store_block(const u64 *data, u64 req, u64 offset) __func__, cc, status, req, offset); return (cc > 0) ? -EIO : cc; } -EXPORT_SYMBOL_GPL(s390pci_store_block); +EXPORT_SYMBOL_GPL(zpci_store_block); diff --git a/arch/s390/pci/pci_msi.c b/arch/s390/pci/pci_msi.c deleted file mode 100644 index b097aed05a9b..000000000000 --- a/arch/s390/pci/pci_msi.c +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright IBM Corp. 2012 - * - * Author(s): - * Jan Glauber <jang@linux.vnet.ibm.com> - */ - -#define COMPONENT "zPCI" -#define pr_fmt(fmt) COMPONENT ": " fmt - -#include <linux/kernel.h> -#include <linux/err.h> -#include <linux/rculist.h> -#include <linux/hash.h> -#include <linux/pci.h> -#include <linux/msi.h> -#include <asm/hw_irq.h> - -/* mapping of irq numbers to msi_desc */ -static struct hlist_head *msi_hash; -static const unsigned int msi_hash_bits = 8; -#define MSI_HASH_BUCKETS (1U << msi_hash_bits) -#define msi_hashfn(nr) hash_long(nr, msi_hash_bits) - -static DEFINE_SPINLOCK(msi_map_lock); - -struct msi_desc *__irq_get_msi_desc(unsigned int irq) -{ - struct msi_map *map; - - hlist_for_each_entry_rcu(map, - &msi_hash[msi_hashfn(irq)], msi_chain) - if (map->irq == irq) - return map->msi; - return NULL; -} - -int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag) -{ - if (msi->msi_attrib.is_msix) { - int offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL; - msi->masked = readl(msi->mask_base + offset); - writel(flag, msi->mask_base + offset); - } else { - if (msi->msi_attrib.maskbit) { - int pos; - u32 mask_bits; - - pos = (long) msi->mask_base; - pci_read_config_dword(msi->dev, pos, &mask_bits); - mask_bits &= ~(mask); - mask_bits |= flag & mask; - pci_write_config_dword(msi->dev, pos, mask_bits); - } else { - return 0; - } - } - - msi->msi_attrib.maskbit = !!flag; - return 1; -} - -int zpci_setup_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi, - unsigned int nr, int offset) -{ - struct msi_map *map; - struct msi_msg msg; - int rc; - - map = kmalloc(sizeof(*map), GFP_KERNEL); - if (map == NULL) - return -ENOMEM; - - map->irq = nr; - map->msi = msi; - zdev->msi_map[nr & ZPCI_MSI_MASK] = map; - INIT_HLIST_NODE(&map->msi_chain); - - pr_debug("%s hashing irq: %u to bucket nr: %llu\n", - __func__, nr, msi_hashfn(nr)); - hlist_add_head_rcu(&map->msi_chain, &msi_hash[msi_hashfn(nr)]); - - spin_lock(&msi_map_lock); - rc = irq_set_msi_desc(nr, msi); - if (rc) { - spin_unlock(&msi_map_lock); - hlist_del_rcu(&map->msi_chain); - kfree(map); - zdev->msi_map[nr & ZPCI_MSI_MASK] = NULL; - return rc; - } - spin_unlock(&msi_map_lock); - - msg.data = nr - offset; - msg.address_lo = zdev->msi_addr & 0xffffffff; - msg.address_hi = zdev->msi_addr >> 32; - write_msi_msg(nr, &msg); - return 0; -} - -void zpci_teardown_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi) -{ - int irq = msi->irq & ZPCI_MSI_MASK; - struct msi_map *map; - - msi->msg.address_lo = 0; - msi->msg.address_hi = 0; - msi->msg.data = 0; - msi->irq = 0; - zpci_msi_set_mask_bits(msi, 1, 1); - - spin_lock(&msi_map_lock); - map = zdev->msi_map[irq]; - hlist_del_rcu(&map->msi_chain); - kfree(map); - zdev->msi_map[irq] = NULL; - spin_unlock(&msi_map_lock); -} - -/* - * The msi hash table has 256 entries which is good for 4..20 - * devices (a typical device allocates 10 + CPUs MSI's). Maybe make - * the hash table size adjustable later. - */ -int __init zpci_msihash_init(void) -{ - unsigned int i; - - msi_hash = kmalloc(MSI_HASH_BUCKETS * sizeof(*msi_hash), GFP_KERNEL); - if (!msi_hash) - return -ENOMEM; - - for (i = 0; i < MSI_HASH_BUCKETS; i++) - INIT_HLIST_HEAD(&msi_hash[i]); - return 0; -} - -void __init zpci_msihash_exit(void) -{ - kfree(msi_hash); -} diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index e99a2557f186..cf8a12ff733b 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -48,11 +48,38 @@ static ssize_t show_pfgid(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR(pfgid, S_IRUGO, show_pfgid, NULL); +static void recover_callback(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct zpci_dev *zdev = get_zdev(pdev); + int ret; + + pci_stop_and_remove_bus_device(pdev); + ret = zpci_disable_device(zdev); + if (ret) + return; + + ret = zpci_enable_device(zdev); + if (ret) + return; + + pci_rescan_bus(zdev->bus); +} + +static ssize_t store_recover(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + int rc = device_schedule_callback(dev, recover_callback); + return rc ? rc : count; +} +static DEVICE_ATTR(recover, S_IWUSR, NULL, store_recover); + static struct device_attribute *zpci_dev_attrs[] = { &dev_attr_function_id, &dev_attr_function_handle, &dev_attr_pchid, &dev_attr_pfgid, + &dev_attr_recover, NULL, }; |