summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/bugs.c55
-rw-r--r--arch/x86/kernel/cpu/cacheinfo.c2
-rw-r--r--arch/x86/kernel/cpu/common.c9
-rw-r--r--arch/x86/kernel/cpu/cyrix.c4
-rw-r--r--arch/x86/kernel/cpu/intel.c52
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c2
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c11
-rw-r--r--arch/x86/kernel/cpu/sgx/driver.c10
-rw-r--r--arch/x86/kernel/cpu/sgx/encl.c108
-rw-r--r--arch/x86/kernel/cpu/sgx/encl.h8
-rw-r--r--arch/x86/kernel/cpu/sgx/ioctl.c17
-rw-r--r--arch/x86/kernel/cpu/sgx/main.c31
13 files changed, 191 insertions, 120 deletions
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index ce5b27db65e1..a8dc7fe5f100 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -811,7 +811,7 @@ static void init_amd_k8(struct cpuinfo_x86 *c)
* (model = 0x14) and later actually support it.
* (AMD Erratum #110, docId: 25759).
*/
- if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) {
+ if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM) && !cpu_has(c, X86_FEATURE_HYPERVISOR)) {
clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
if (!rdmsrl_amd_safe(0xc001100d, &value)) {
value &= ~BIT_64(32);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index f84d59cd180b..c10d93d2773b 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1092,6 +1092,8 @@ do_cmd_auto:
case RETBLEED_MITIGATION_IBPB:
setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+ setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
+ mitigate_smt = true;
/*
* IBPB on entry already obviates the need for
@@ -1101,8 +1103,6 @@ do_cmd_auto:
setup_clear_cpu_cap(X86_FEATURE_UNRET);
setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
- mitigate_smt = true;
-
/*
* There is no need for RSB filling: entry_ibpb() ensures
* all predictions, including the RSB, are invalidated,
@@ -1553,7 +1553,7 @@ static void __init spec_ctrl_disable_kernel_rrsba(void)
rrsba_disabled = true;
}
-static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode)
+static void __init spectre_v2_select_rsb_mitigation(enum spectre_v2_mitigation mode)
{
/*
* Similar to context switches, there are two types of RSB attacks
@@ -1577,27 +1577,30 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_
*/
switch (mode) {
case SPECTRE_V2_NONE:
- return;
+ break;
- case SPECTRE_V2_EIBRS_LFENCE:
case SPECTRE_V2_EIBRS:
+ case SPECTRE_V2_EIBRS_LFENCE:
+ case SPECTRE_V2_EIBRS_RETPOLINE:
if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
- setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE);
pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n");
+ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE);
}
- return;
+ break;
- case SPECTRE_V2_EIBRS_RETPOLINE:
case SPECTRE_V2_RETPOLINE:
case SPECTRE_V2_LFENCE:
case SPECTRE_V2_IBRS:
+ pr_info("Spectre v2 / SpectreRSB: Filling RSB on context switch and VMEXIT\n");
+ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);
- pr_info("Spectre v2 / SpectreRSB : Filling RSB on VMEXIT\n");
- return;
- }
+ break;
- pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit");
- dump_stack();
+ default:
+ pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation\n");
+ dump_stack();
+ break;
+ }
}
/*
@@ -1653,10 +1656,11 @@ static void __init bhi_select_mitigation(void)
return;
}
- if (spec_ctrl_bhi_dis())
+ if (!IS_ENABLED(CONFIG_X86_64))
return;
- if (!IS_ENABLED(CONFIG_X86_64))
+ /* Mitigate in hardware if supported */
+ if (spec_ctrl_bhi_dis())
return;
/* Mitigate KVM by default */
@@ -1822,10 +1826,7 @@ static void __init spectre_v2_select_mitigation(void)
*
* FIXME: Is this pointless for retbleed-affected AMD?
*/
- setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
- pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
-
- spectre_v2_determine_rsb_fill_type_at_vmexit(mode);
+ spectre_v2_select_rsb_mitigation(mode);
/*
* Retpoline protects the kernel, but doesn't protect firmware. IBRS
@@ -2607,6 +2608,7 @@ static void __init srso_select_mitigation(void)
if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
if (has_microcode) {
setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+ setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
srso_mitigation = SRSO_MITIGATION_IBPB;
/*
@@ -2616,6 +2618,13 @@ static void __init srso_select_mitigation(void)
*/
setup_clear_cpu_cap(X86_FEATURE_UNRET);
setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
+
+ /*
+ * There is no need for RSB filling: entry_ibpb() ensures
+ * all predictions, including the RSB, are invalidated,
+ * regardless of IBPB implementation.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
}
} else {
pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
@@ -2624,8 +2633,8 @@ static void __init srso_select_mitigation(void)
break;
case SRSO_CMD_IBPB_ON_VMEXIT:
- if (IS_ENABLED(CONFIG_CPU_SRSO)) {
- if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) {
+ if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
+ if (has_microcode) {
setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT;
@@ -2637,9 +2646,9 @@ static void __init srso_select_mitigation(void)
setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
}
} else {
- pr_err("WARNING: kernel not compiled with CPU_SRSO.\n");
+ pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
goto pred_cmd;
- }
+ }
break;
default:
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index b5e36bd0425b..a33c972cecf5 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -795,7 +795,7 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
/* If bit 31 is set, this is an unknown format */
- for (j = 0 ; j < 3 ; j++)
+ for (j = 0 ; j < 4 ; j++)
if (regs[j] & (1 << 31))
regs[j] = 0;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index dbaea8a6175b..1ebd67c95d86 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1345,9 +1345,12 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
if (vulnerable_to_rfds(x86_arch_cap_msr))
setup_force_cpu_bug(X86_BUG_RFDS);
- /* When virtualized, eIBRS could be hidden, assume vulnerable */
- if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) &&
- !cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
+ /*
+ * Intel parts with eIBRS are vulnerable to BHI attacks. Parts with
+ * BHI_NO still need to use the BHI mitigation to prevent Intra-mode
+ * attacks. When virtualized, eIBRS could be hidden, assume vulnerable.
+ */
+ if (!cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
(boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) ||
boot_cpu_has(X86_FEATURE_HYPERVISOR)))
setup_force_cpu_bug(X86_BUG_BHI);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 7227c15299d0..7de799ab2a04 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -152,8 +152,8 @@ static void geode_configure(void)
u8 ccr3;
local_irq_save(flags);
- /* Suspend on halt power saving and enable #SUSP pin */
- setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88);
+ /* Suspend on halt power saving */
+ setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x08);
ccr3 = getCx86(CX86_CCR3);
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 2b1cd4202e75..e7145ee656df 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -749,26 +749,37 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
}
#endif
-#define TLB_INST_4K 0x01
-#define TLB_INST_4M 0x02
-#define TLB_INST_2M_4M 0x03
+#define TLB_INST_4K 0x01
+#define TLB_INST_4M 0x02
+#define TLB_INST_2M_4M 0x03
-#define TLB_INST_ALL 0x05
-#define TLB_INST_1G 0x06
+#define TLB_INST_ALL 0x05
+#define TLB_INST_1G 0x06
-#define TLB_DATA_4K 0x11
-#define TLB_DATA_4M 0x12
-#define TLB_DATA_2M_4M 0x13
-#define TLB_DATA_4K_4M 0x14
+#define TLB_DATA_4K 0x11
+#define TLB_DATA_4M 0x12
+#define TLB_DATA_2M_4M 0x13
+#define TLB_DATA_4K_4M 0x14
-#define TLB_DATA_1G 0x16
+#define TLB_DATA_1G 0x16
+#define TLB_DATA_1G_2M_4M 0x17
-#define TLB_DATA0_4K 0x21
-#define TLB_DATA0_4M 0x22
-#define TLB_DATA0_2M_4M 0x23
+#define TLB_DATA0_4K 0x21
+#define TLB_DATA0_4M 0x22
+#define TLB_DATA0_2M_4M 0x23
-#define STLB_4K 0x41
-#define STLB_4K_2M 0x42
+#define STLB_4K 0x41
+#define STLB_4K_2M 0x42
+
+/*
+ * All of leaf 0x2's one-byte TLB descriptors implies the same number of
+ * entries for their respective TLB types. The 0x63 descriptor is an
+ * exception: it implies 4 dTLB entries for 1GB pages 32 dTLB entries
+ * for 2MB or 4MB pages. Encode descriptor 0x63 dTLB entry count for
+ * 2MB/4MB pages here, as its count for dTLB 1GB pages is already at the
+ * intel_tlb_table[] mapping.
+ */
+#define TLB_0x63_2M_4M_ENTRIES 32
static const struct _tlb_table intel_tlb_table[] = {
{ 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" },
@@ -790,7 +801,8 @@ static const struct _tlb_table intel_tlb_table[] = {
{ 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" },
{ 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" },
{ 0x61, TLB_INST_4K, 48, " TLB_INST 4 KByte pages, full associative" },
- { 0x63, TLB_DATA_1G, 4, " TLB_DATA 1 GByte pages, 4-way set associative" },
+ { 0x63, TLB_DATA_1G_2M_4M, 4, " TLB_DATA 1 GByte pages, 4-way set associative"
+ " (plus 32 entries TLB_DATA 2 MByte or 4 MByte pages, not encoded here)" },
{ 0x6b, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 8-way associative" },
{ 0x6c, TLB_DATA_2M_4M, 128, " TLB_DATA 2 MByte or 4 MByte pages, 8-way associative" },
{ 0x6d, TLB_DATA_1G, 16, " TLB_DATA 1 GByte pages, fully associative" },
@@ -890,6 +902,12 @@ static void intel_tlb_lookup(const unsigned char desc)
if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
break;
+ case TLB_DATA_1G_2M_4M:
+ if (tlb_lld_2m[ENTRIES] < TLB_0x63_2M_4M_ENTRIES)
+ tlb_lld_2m[ENTRIES] = TLB_0x63_2M_4M_ENTRIES;
+ if (tlb_lld_4m[ENTRIES] < TLB_0x63_2M_4M_ENTRIES)
+ tlb_lld_4m[ENTRIES] = TLB_0x63_2M_4M_ENTRIES;
+ fallthrough;
case TLB_DATA_1G:
if (tlb_lld_1g[ENTRIES] < intel_tlb_table[k].entries)
tlb_lld_1g[ENTRIES] = intel_tlb_table[k].entries;
@@ -913,7 +931,7 @@ static void intel_detect_tlb(struct cpuinfo_x86 *c)
cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
/* If bit 31 is set, this is an unknown format */
- for (j = 0 ; j < 3 ; j++)
+ for (j = 0 ; j < 4 ; j++)
if (regs[j] & (1 << 31))
regs[j] = 0;
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 6a95a52d08da..8143089c9b98 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -861,7 +861,7 @@ static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t siz
return ret;
}
- for_each_node(nid) {
+ for_each_node_with_cpus(nid) {
cpu = cpumask_first(cpumask_of_node(nid));
c = &cpu_data(cpu);
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 19762b47fbec..b6424322f5d3 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -16,7 +16,6 @@
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/kexec.h>
-#include <linux/i8253.h>
#include <linux/random.h>
#include <asm/processor.h>
#include <asm/hypervisor.h>
@@ -445,16 +444,6 @@ static void __init ms_hyperv_init_platform(void)
if (efi_enabled(EFI_BOOT))
x86_platform.get_nmi_reason = hv_get_nmi_reason;
- /*
- * Hyper-V VMs have a PIT emulation quirk such that zeroing the
- * counter register during PIT shutdown restarts the PIT. So it
- * continues to interrupt @18.2 HZ. Setting i8253_clear_counter
- * to false tells pit_shutdown() not to zero the counter so that
- * the PIT really is shutdown. Generation 2 VMs don't have a PIT,
- * and setting this value has no effect.
- */
- i8253_clear_counter_on_shutdown = false;
-
#if IS_ENABLED(CONFIG_HYPERV)
/*
* Setup the hook to get control post apic initialization.
diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c
index aa9b8b868867..afccb69cd9a2 100644
--- a/arch/x86/kernel/cpu/sgx/driver.c
+++ b/arch/x86/kernel/cpu/sgx/driver.c
@@ -150,13 +150,15 @@ int __init sgx_drv_init(void)
u64 xfrm_mask;
int ret;
- if (!cpu_feature_enabled(X86_FEATURE_SGX_LC))
+ if (!cpu_feature_enabled(X86_FEATURE_SGX_LC)) {
+ pr_info("SGX disabled: SGX launch control CPU feature is not available, /dev/sgx_enclave disabled.\n");
return -ENODEV;
+ }
cpuid_count(SGX_CPUID, 0, &eax, &ebx, &ecx, &edx);
if (!(eax & 1)) {
- pr_err("SGX disabled: SGX1 instruction support not available.\n");
+ pr_info("SGX disabled: SGX1 instruction support not available, /dev/sgx_enclave disabled.\n");
return -ENODEV;
}
@@ -173,8 +175,10 @@ int __init sgx_drv_init(void)
}
ret = misc_register(&sgx_dev_enclave);
- if (ret)
+ if (ret) {
+ pr_info("SGX disabled: Unable to register the /dev/sgx_enclave driver (%d).\n", ret);
return ret;
+ }
return 0;
}
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index fa5777af8da1..2e93bafa7c47 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -232,25 +232,10 @@ static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,
return epc_page;
}
-static struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
- unsigned long addr,
- unsigned long vm_flags)
+static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
+ struct sgx_encl_page *entry)
{
- unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
struct sgx_epc_page *epc_page;
- struct sgx_encl_page *entry;
-
- entry = xa_load(&encl->page_array, PFN_DOWN(addr));
- if (!entry)
- return ERR_PTR(-EFAULT);
-
- /*
- * Verify that the faulted page has equal or higher build time
- * permissions than the VMA permissions (i.e. the subset of {VM_READ,
- * VM_WRITE, VM_EXECUTE} in vma->vm_flags).
- */
- if ((entry->vm_max_prot_bits & vm_prot_bits) != vm_prot_bits)
- return ERR_PTR(-EFAULT);
/* Entry successfully located. */
if (entry->epc_page) {
@@ -276,6 +261,40 @@ static struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
return entry;
}
+static struct sgx_encl_page *sgx_encl_load_page_in_vma(struct sgx_encl *encl,
+ unsigned long addr,
+ unsigned long vm_flags)
+{
+ unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
+ struct sgx_encl_page *entry;
+
+ entry = xa_load(&encl->page_array, PFN_DOWN(addr));
+ if (!entry)
+ return ERR_PTR(-EFAULT);
+
+ /*
+ * Verify that the page has equal or higher build time
+ * permissions than the VMA permissions (i.e. the subset of {VM_READ,
+ * VM_WRITE, VM_EXECUTE} in vma->vm_flags).
+ */
+ if ((entry->vm_max_prot_bits & vm_prot_bits) != vm_prot_bits)
+ return ERR_PTR(-EFAULT);
+
+ return __sgx_encl_load_page(encl, entry);
+}
+
+struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
+ unsigned long addr)
+{
+ struct sgx_encl_page *entry;
+
+ entry = xa_load(&encl->page_array, PFN_DOWN(addr));
+ if (!entry)
+ return ERR_PTR(-EFAULT);
+
+ return __sgx_encl_load_page(encl, entry);
+}
+
static vm_fault_t sgx_vma_fault(struct vm_fault *vmf)
{
unsigned long addr = (unsigned long)vmf->address;
@@ -297,7 +316,7 @@ static vm_fault_t sgx_vma_fault(struct vm_fault *vmf)
mutex_lock(&encl->lock);
- entry = sgx_encl_load_page(encl, addr, vma->vm_flags);
+ entry = sgx_encl_load_page_in_vma(encl, addr, vma->vm_flags);
if (IS_ERR(entry)) {
mutex_unlock(&encl->lock);
@@ -445,7 +464,7 @@ static struct sgx_encl_page *sgx_encl_reserve_page(struct sgx_encl *encl,
for ( ; ; ) {
mutex_lock(&encl->lock);
- entry = sgx_encl_load_page(encl, addr, vm_flags);
+ entry = sgx_encl_load_page_in_vma(encl, addr, vm_flags);
if (PTR_ERR(entry) != -EBUSY)
break;
@@ -702,7 +721,7 @@ int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm)
spin_lock(&encl->mm_lock);
list_add_rcu(&encl_mm->list, &encl->mm_list);
- /* Pairs with smp_rmb() in sgx_reclaimer_block(). */
+ /* Pairs with smp_rmb() in sgx_zap_enclave_ptes(). */
smp_wmb();
encl->mm_list_version++;
spin_unlock(&encl->mm_lock);
@@ -918,7 +937,52 @@ int sgx_encl_test_and_clear_young(struct mm_struct *mm,
}
/**
+ * sgx_zap_enclave_ptes() - remove PTEs mapping the address from enclave
+ * @encl: the enclave
+ * @addr: page aligned pointer to single page for which PTEs will be removed
+ *
+ * Multiple VMAs may have an enclave page mapped. Remove the PTE mapping
+ * @addr from each VMA. Ensure that page fault handler is ready to handle
+ * new mappings of @addr before calling this function.
+ */
+void sgx_zap_enclave_ptes(struct sgx_encl *encl, unsigned long addr)
+{
+ unsigned long mm_list_version;
+ struct sgx_encl_mm *encl_mm;
+ struct vm_area_struct *vma;
+ int idx, ret;
+
+ do {
+ mm_list_version = encl->mm_list_version;
+
+ /* Pairs with smp_wmb() in sgx_encl_mm_add(). */
+ smp_rmb();
+
+ idx = srcu_read_lock(&encl->srcu);
+
+ list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
+ if (!mmget_not_zero(encl_mm->mm))
+ continue;
+
+ mmap_read_lock(encl_mm->mm);
+
+ ret = sgx_encl_find(encl_mm->mm, addr, &vma);
+ if (!ret && encl == vma->vm_private_data)
+ zap_vma_ptes(vma, addr, PAGE_SIZE);
+
+ mmap_read_unlock(encl_mm->mm);
+
+ mmput_async(encl_mm->mm);
+ }
+
+ srcu_read_unlock(&encl->srcu, idx);
+ } while (unlikely(encl->mm_list_version != mm_list_version));
+}
+
+/**
* sgx_alloc_va_page() - Allocate a Version Array (VA) page
+ * @reclaim: Reclaim EPC pages directly if none available. Enclave
+ * mutex should not be held if this is set.
*
* Allocate a free EPC page and convert it to a Version Array (VA) page.
*
@@ -926,12 +990,12 @@ int sgx_encl_test_and_clear_young(struct mm_struct *mm,
* a VA page,
* -errno otherwise
*/
-struct sgx_epc_page *sgx_alloc_va_page(void)
+struct sgx_epc_page *sgx_alloc_va_page(bool reclaim)
{
struct sgx_epc_page *epc_page;
int ret;
- epc_page = sgx_alloc_epc_page(NULL, true);
+ epc_page = sgx_alloc_epc_page(NULL, reclaim);
if (IS_ERR(epc_page))
return ERR_CAST(epc_page);
diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h
index 332ef3568267..b5e9602be127 100644
--- a/arch/x86/kernel/cpu/sgx/encl.h
+++ b/arch/x86/kernel/cpu/sgx/encl.h
@@ -113,11 +113,15 @@ int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index,
void sgx_encl_put_backing(struct sgx_backing *backing);
int sgx_encl_test_and_clear_young(struct mm_struct *mm,
struct sgx_encl_page *page);
-
-struct sgx_epc_page *sgx_alloc_va_page(void);
+void sgx_zap_enclave_ptes(struct sgx_encl *encl, unsigned long addr);
+struct sgx_epc_page *sgx_alloc_va_page(bool reclaim);
unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page);
void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset);
bool sgx_va_page_full(struct sgx_va_page *va_page);
void sgx_encl_free_epc_page(struct sgx_epc_page *page);
+struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
+ unsigned long addr);
+struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl, bool reclaim);
+void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page);
#endif /* _X86_ENCL_H */
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 217777c029ee..14ee6d218003 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -17,7 +17,7 @@
#include "encl.h"
#include "encls.h"
-static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl)
+struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl, bool reclaim)
{
struct sgx_va_page *va_page = NULL;
void *err;
@@ -30,7 +30,7 @@ static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl)
if (!va_page)
return ERR_PTR(-ENOMEM);
- va_page->epc_page = sgx_alloc_va_page();
+ va_page->epc_page = sgx_alloc_va_page(reclaim);
if (IS_ERR(va_page->epc_page)) {
err = ERR_CAST(va_page->epc_page);
kfree(va_page);
@@ -43,7 +43,7 @@ static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl)
return va_page;
}
-static void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
+void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
{
encl->page_cnt--;
@@ -64,7 +64,14 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
struct file *backing;
long ret;
- va_page = sgx_encl_grow(encl);
+ /*
+ * ECREATE would detect this too, but checking here also ensures
+ * that the 'encl_size' calculations below can never overflow.
+ */
+ if (!is_power_of_2(secs->size))
+ return -EINVAL;
+
+ va_page = sgx_encl_grow(encl, true);
if (IS_ERR(va_page))
return PTR_ERR(va_page);
else if (va_page)
@@ -306,7 +313,7 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
return PTR_ERR(epc_page);
}
- va_page = sgx_encl_grow(encl);
+ va_page = sgx_encl_grow(encl, true);
if (IS_ERR(va_page)) {
ret = PTR_ERR(va_page);
goto err_out_free;
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index ad453b4387a4..fd786f1bc4e8 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -122,36 +122,9 @@ static void sgx_reclaimer_block(struct sgx_epc_page *epc_page)
struct sgx_encl_page *page = epc_page->owner;
unsigned long addr = page->desc & PAGE_MASK;
struct sgx_encl *encl = page->encl;
- unsigned long mm_list_version;
- struct sgx_encl_mm *encl_mm;
- struct vm_area_struct *vma;
- int idx, ret;
-
- do {
- mm_list_version = encl->mm_list_version;
-
- /* Pairs with smp_rmb() in sgx_encl_mm_add(). */
- smp_rmb();
-
- idx = srcu_read_lock(&encl->srcu);
-
- list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
- if (!mmget_not_zero(encl_mm->mm))
- continue;
-
- mmap_read_lock(encl_mm->mm);
-
- ret = sgx_encl_find(encl_mm->mm, addr, &vma);
- if (!ret && encl == vma->vm_private_data)
- zap_vma_ptes(vma, addr, PAGE_SIZE);
-
- mmap_read_unlock(encl_mm->mm);
-
- mmput_async(encl_mm->mm);
- }
+ int ret;
- srcu_read_unlock(&encl->srcu, idx);
- } while (unlikely(encl->mm_list_version != mm_list_version));
+ sgx_zap_enclave_ptes(encl, addr);
mutex_lock(&encl->lock);