diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-28 09:58:46 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-28 09:58:46 -0700 |
commit | 14bc84ce0b41787acc08aefabe718479c5dde60e (patch) | |
tree | 865d39bf0c4682e11c9c7bef0712872b2a5eca55 /arch/s390/include/asm/pgtable.h | |
parent | 2decb2682f80759f631c8332f9a2a34a02150a03 (diff) | |
parent | 57127645d79d2e83e801f141f7d03f64accf28aa (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
Pull s390 updates from Martin Schwidefsky:
"One additional new feature for 4.1, a new PRNG based on SHA-512 for
the zcrypt driver.
Two memory management related changes, the page table reallocation for
KVM is removed, and with file ptes gone the encoding of page table
entries is improved.
And three bug fixes"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux:
s390/zcrypt: Introduce new SHA-512 based Pseudo Random Generator.
s390/mm: change swap pte encoding and pgtable cleanup
s390/mm: correct transfer of dirty & young bits in __pmd_to_pte
s390/bpf: add dependency to z196 features
s390/3215: free memory in error path
s390/kvm: remove delayed reallocation of page tables for KVM
kexec: allocate the kexec control page with KEXEC_CONTROL_MEMORY_GFP
Diffstat (limited to 'arch/s390/include/asm/pgtable.h')
-rw-r--r-- | arch/s390/include/asm/pgtable.h | 167 |
1 files changed, 72 insertions, 95 deletions
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 989cfae9e202..fc642399b489 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -12,12 +12,9 @@ #define _ASM_S390_PGTABLE_H /* - * The Linux memory management assumes a three-level page table setup. For - * s390 31 bit we "fold" the mid level into the top-level page table, so - * that we physically have the same two-level page table as the s390 mmu - * expects in 31 bit mode. For s390 64 bit we use three of the five levels - * the hardware provides (region first and region second tables are not - * used). + * The Linux memory management assumes a three-level page table setup. + * For s390 64 bit we use up to four of the five levels the hardware + * provides (region first tables are not used). * * The "pgd_xxx()" functions are trivial for a folded two-level * setup: the pgd is never bad, and a pmd always exists (as it's folded @@ -101,8 +98,8 @@ extern unsigned long zero_page_mask; #ifndef __ASSEMBLY__ /* - * The vmalloc and module area will always be on the topmost area of the kernel - * mapping. We reserve 96MB (31bit) / 128GB (64bit) for vmalloc and modules. + * The vmalloc and module area will always be on the topmost area of the + * kernel mapping. We reserve 128GB (64bit) for vmalloc and modules. * On 64 bit kernels we have a 2GB area at the top of the vmalloc area where * modules will reside. That makes sure that inter module branches always * happen without trampolines and in addition the placement within a 2GB frame @@ -131,38 +128,6 @@ static inline int is_module_addr(void *addr) } /* - * A 31 bit pagetable entry of S390 has following format: - * | PFRA | | OS | - * 0 0IP0 - * 00000000001111111111222222222233 - * 01234567890123456789012345678901 - * - * I Page-Invalid Bit: Page is not available for address-translation - * P Page-Protection Bit: Store access not possible for page - * - * A 31 bit segmenttable entry of S390 has following format: - * | P-table origin | |PTL - * 0 IC - * 00000000001111111111222222222233 - * 01234567890123456789012345678901 - * - * I Segment-Invalid Bit: Segment is not available for address-translation - * C Common-Segment Bit: Segment is not private (PoP 3-30) - * PTL Page-Table-Length: Page-table length (PTL+1*16 entries -> up to 256) - * - * The 31 bit segmenttable origin of S390 has following format: - * - * |S-table origin | | STL | - * X **GPS - * 00000000001111111111222222222233 - * 01234567890123456789012345678901 - * - * X Space-Switch event: - * G Segment-Invalid Bit: * - * P Private-Space Bit: Segment is not private (PoP 3-30) - * S Storage-Alteration: - * STL Segment-Table-Length: Segment-table length (STL+1*16 entries -> up to 2048) - * * A 64 bit pagetable entry of S390 has following format: * | PFRA |0IPC| OS | * 0000000000111111111122222222223333333333444444444455555555556666 @@ -220,7 +185,6 @@ static inline int is_module_addr(void *addr) /* Software bits in the page table entry */ #define _PAGE_PRESENT 0x001 /* SW pte present bit */ -#define _PAGE_TYPE 0x002 /* SW pte type bit */ #define _PAGE_YOUNG 0x004 /* SW pte young bit */ #define _PAGE_DIRTY 0x008 /* SW pte dirty bit */ #define _PAGE_READ 0x010 /* SW pte read bit */ @@ -240,31 +204,34 @@ static inline int is_module_addr(void *addr) * table lock held. * * The following table gives the different possible bit combinations for - * the pte hardware and software bits in the last 12 bits of a pte: + * the pte hardware and software bits in the last 12 bits of a pte + * (. unassigned bit, x don't care, t swap type): * * 842100000000 * 000084210000 * 000000008421 - * .IR...wrdytp - * empty .10...000000 - * swap .10...xxxx10 - * file .11...xxxxx0 - * prot-none, clean, old .11...000001 - * prot-none, clean, young .11...000101 - * prot-none, dirty, old .10...001001 - * prot-none, dirty, young .10...001101 - * read-only, clean, old .11...010001 - * read-only, clean, young .01...010101 - * read-only, dirty, old .11...011001 - * read-only, dirty, young .01...011101 - * read-write, clean, old .11...110001 - * read-write, clean, young .01...110101 - * read-write, dirty, old .10...111001 - * read-write, dirty, young .00...111101 + * .IR.uswrdy.p + * empty .10.00000000 + * swap .11..ttttt.0 + * prot-none, clean, old .11.xx0000.1 + * prot-none, clean, young .11.xx0001.1 + * prot-none, dirty, old .10.xx0010.1 + * prot-none, dirty, young .10.xx0011.1 + * read-only, clean, old .11.xx0100.1 + * read-only, clean, young .01.xx0101.1 + * read-only, dirty, old .11.xx0110.1 + * read-only, dirty, young .01.xx0111.1 + * read-write, clean, old .11.xx1100.1 + * read-write, clean, young .01.xx1101.1 + * read-write, dirty, old .10.xx1110.1 + * read-write, dirty, young .00.xx1111.1 + * HW-bits: R read-only, I invalid + * SW-bits: p present, y young, d dirty, r read, w write, s special, + * u unused, l large * - * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001 - * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400 - * pte_swap is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402 + * pte_none is true for the bit pattern .10.00000000, pte == 0x400 + * pte_swap is true for the bit pattern .11..ooooo.0, (pte & 0x201) == 0x200 + * pte_present is true for the bit pattern .xx.xxxxxx.1, (pte & 0x001) == 0x001 */ /* Bits in the segment/region table address-space-control-element */ @@ -335,6 +302,8 @@ static inline int is_module_addr(void *addr) * read-write, dirty, young 11..0...0...11 * The segment table origin is used to distinguish empty (origin==0) from * read-write, old segment table entries (origin!=0) + * HW-bits: R read-only, I invalid + * SW-bits: y young, d dirty, r read, w write */ #define _SEGMENT_ENTRY_SPLIT_BIT 11 /* THP splitting bit number */ @@ -423,6 +392,15 @@ static inline int mm_has_pgste(struct mm_struct *mm) return 0; } +static inline int mm_alloc_pgste(struct mm_struct *mm) +{ +#ifdef CONFIG_PGSTE + if (unlikely(mm->context.alloc_pgste)) + return 1; +#endif + return 0; +} + /* * In the case that a guest uses storage keys * faults should no longer be backed by zero pages @@ -582,10 +560,9 @@ static inline int pte_none(pte_t pte) static inline int pte_swap(pte_t pte) { - /* Bit pattern: (pte & 0x603) == 0x402 */ - return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | - _PAGE_TYPE | _PAGE_PRESENT)) - == (_PAGE_INVALID | _PAGE_TYPE); + /* Bit pattern: (pte & 0x201) == 0x200 */ + return (pte_val(pte) & (_PAGE_PROTECT | _PAGE_PRESENT)) + == _PAGE_PROTECT; } static inline int pte_special(pte_t pte) @@ -1586,51 +1563,51 @@ static inline int has_transparent_hugepage(void) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* - * 31 bit swap entry format: - * A page-table entry has some bits we have to treat in a special way. - * Bits 0, 20 and bit 23 have to be zero, otherwise an specification - * exception will occur instead of a page translation exception. The - * specifiation exception has the bad habit not to store necessary - * information in the lowcore. - * Bits 21, 22, 30 and 31 are used to indicate the page type. - * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402 - * This leaves the bits 1-19 and bits 24-29 to store type and offset. - * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19 - * plus 24 for the offset. - * 0| offset |0110|o|type |00| - * 0 0000000001111111111 2222 2 22222 33 - * 0 1234567890123456789 0123 4 56789 01 - * * 64 bit swap entry format: * A page-table entry has some bits we have to treat in a special way. * Bits 52 and bit 55 have to be zero, otherwise an specification * exception will occur instead of a page translation exception. The * specifiation exception has the bad habit not to store necessary * information in the lowcore. - * Bits 53, 54, 62 and 63 are used to indicate the page type. - * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402 - * This leaves the bits 0-51 and bits 56-61 to store type and offset. - * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51 - * plus 56 for the offset. - * | offset |0110|o|type |00| - * 0000000000111111111122222222223333333333444444444455 5555 5 55566 66 - * 0123456789012345678901234567890123456789012345678901 2345 6 78901 23 + * Bits 54 and 63 are used to indicate the page type. + * A swap pte is indicated by bit pattern (pte & 0x201) == 0x200 + * This leaves the bits 0-51 and bits 56-62 to store type and offset. + * We use the 5 bits from 57-61 for the type and the 52 bits from 0-51 + * for the offset. + * | offset |01100|type |00| + * |0000000000111111111122222222223333333333444444444455|55555|55566|66| + * |0123456789012345678901234567890123456789012345678901|23456|78901|23| */ -#define __SWP_OFFSET_MASK (~0UL >> 11) +#define __SWP_OFFSET_MASK ((1UL << 52) - 1) +#define __SWP_OFFSET_SHIFT 12 +#define __SWP_TYPE_MASK ((1UL << 5) - 1) +#define __SWP_TYPE_SHIFT 2 static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) { pte_t pte; - offset &= __SWP_OFFSET_MASK; - pte_val(pte) = _PAGE_INVALID | _PAGE_TYPE | ((type & 0x1f) << 2) | - ((offset & 1UL) << 7) | ((offset & ~1UL) << 11); + + pte_val(pte) = _PAGE_INVALID | _PAGE_PROTECT; + pte_val(pte) |= (offset & __SWP_OFFSET_MASK) << __SWP_OFFSET_SHIFT; + pte_val(pte) |= (type & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT; return pte; } -#define __swp_type(entry) (((entry).val >> 2) & 0x1f) -#define __swp_offset(entry) (((entry).val >> 11) | (((entry).val >> 7) & 1)) -#define __swp_entry(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) }) +static inline unsigned long __swp_type(swp_entry_t entry) +{ + return (entry.val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK; +} + +static inline unsigned long __swp_offset(swp_entry_t entry) +{ + return (entry.val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK; +} + +static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset) +{ + return (swp_entry_t) { pte_val(mk_swap_pte(type, offset)) }; +} #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) |