From 2bf1071a8d50928a4ae366bb3108833166c2b70c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 5 Jul 2018 18:47:00 +1000 Subject: powerpc/64s: Remove POWER9 DD1 support POWER9 DD1 was never a product. It is no longer supported by upstream firmware, and it is not effectively supported in Linux due to lack of testing. Signed-off-by: Nicholas Piggin Reviewed-by: Michael Ellerman [mpe: Remove arch_make_huge_pte() entirely] Signed-off-by: Michael Ellerman --- drivers/misc/cxl/cxl.h | 8 -------- drivers/misc/cxl/cxllib.c | 4 ---- drivers/misc/cxl/pci.c | 41 ++++++++++++++++------------------------- 3 files changed, 16 insertions(+), 37 deletions(-) (limited to 'drivers') diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 918d4fb742d1..505f973e13f3 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -865,14 +865,6 @@ static inline bool cxl_is_power9(void) return false; } -static inline bool cxl_is_power9_dd1(void) -{ - if ((pvr_version_is(PVR_POWER9)) && - cpu_has_feature(CPU_FTR_POWER9_DD1)) - return true; - return false; -} - ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf, loff_t off, size_t count); diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c index 0bc7c31cf739..5a3f91255258 100644 --- a/drivers/misc/cxl/cxllib.c +++ b/drivers/misc/cxl/cxllib.c @@ -102,10 +102,6 @@ int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg) rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl); if (rc) return rc; - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { - /* workaround for DD1 - nbwind = capiind */ - cfg->dsnctl |= ((u64)0x02 << (63-47)); - } cfg->version = CXL_XSL_CONFIG_CURRENT_VERSION; cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE; diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 429d6de1dde7..2af0d4c47b76 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -465,23 +465,21 @@ int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg) /* nMMU_ID Defaults to: b’000001001’*/ xsl_dsnctl |= ((u64)0x09 << (63-28)); - if (!(cxl_is_power9_dd1())) { - /* - * Used to identify CAPI packets which should be sorted into - * the Non-Blocking queues by the PHB. This field should match - * the PHB PBL_NBW_CMPM register - * nbwind=0x03, bits [57:58], must include capi indicator. - * Not supported on P9 DD1. - */ - xsl_dsnctl |= (nbwind << (63-55)); + /* + * Used to identify CAPI packets which should be sorted into + * the Non-Blocking queues by the PHB. This field should match + * the PHB PBL_NBW_CMPM register + * nbwind=0x03, bits [57:58], must include capi indicator. + * Not supported on P9 DD1. + */ + xsl_dsnctl |= (nbwind << (63-55)); - /* - * Upper 16b address bits of ASB_Notify messages sent to the - * system. Need to match the PHB’s ASN Compare/Mask Register. - * Not supported on P9 DD1. - */ - xsl_dsnctl |= asnind; - } + /* + * Upper 16b address bits of ASB_Notify messages sent to the + * system. Need to match the PHB’s ASN Compare/Mask Register. + * Not supported on P9 DD1. + */ + xsl_dsnctl |= asnind; *reg = xsl_dsnctl; return 0; @@ -539,15 +537,8 @@ static int init_implementation_adapter_regs_psl9(struct cxl *adapter, /* Snoop machines */ cxl_p1_write(adapter, CXL_PSL9_APCDEDALLOC, 0x800F000200000000ULL); - if (cxl_is_power9_dd1()) { - /* Disabling deadlock counter CAR */ - cxl_p1_write(adapter, CXL_PSL9_GP_CT, 0x0020000000000001ULL); - /* Enable NORST */ - cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0x8000000000000000ULL); - } else { - /* Enable NORST and DD2 features */ - cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0xC000000000000000ULL); - } + /* Enable NORST and DD2 features */ + cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0xC000000000000000ULL); /* * Check if PSL has data-cache. We need to flush adapter datacache -- cgit v1.2.3 From 00a5c58d9499bd0c290b57205f43a70f2e69d3f6 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 4 Jul 2018 16:13:46 +1000 Subject: KVM: PPC: Make iommu_table::it_userspace big endian We are going to reuse multilevel TCE code for the userspace copy of the TCE table and since it is big endian, let's make the copy big endian too. Reviewed-by: David Gibson Signed-off-by: Alexey Kardashevskiy Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman --- drivers/vfio/vfio_iommu_spapr_tce.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 759a5bdd40e1..8ab124a67311 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -230,7 +230,7 @@ static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl, decrement_locked_vm(mm, cb >> PAGE_SHIFT); return -ENOMEM; } - tbl->it_userspace = uas; + tbl->it_userspace = (__be64 *) uas; return 0; } @@ -482,20 +482,20 @@ static void tce_iommu_unuse_page_v2(struct tce_container *container, struct mm_iommu_table_group_mem_t *mem = NULL; int ret; unsigned long hpa = 0; - unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); + __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); if (!pua) return; - ret = tce_iommu_prereg_ua_to_hpa(container, *pua, IOMMU_PAGE_SIZE(tbl), - &hpa, &mem); + ret = tce_iommu_prereg_ua_to_hpa(container, be64_to_cpu(*pua), + IOMMU_PAGE_SIZE(tbl), &hpa, &mem); if (ret) - pr_debug("%s: tce %lx at #%lx was not cached, ret=%d\n", - __func__, *pua, entry, ret); + pr_debug("%s: tce %llx at #%lx was not cached, ret=%d\n", + __func__, be64_to_cpu(*pua), entry, ret); if (mem) mm_iommu_mapped_dec(mem); - *pua = 0; + *pua = cpu_to_be64(0); } static int tce_iommu_clear(struct tce_container *container, @@ -607,8 +607,7 @@ static long tce_iommu_build_v2(struct tce_container *container, for (i = 0; i < pages; ++i) { struct mm_iommu_table_group_mem_t *mem = NULL; - unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, - entry + i); + __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry + i); ret = tce_iommu_prereg_ua_to_hpa(container, tce, IOMMU_PAGE_SIZE(tbl), &hpa, &mem); @@ -642,7 +641,7 @@ static long tce_iommu_build_v2(struct tce_container *container, if (dirtmp != DMA_NONE) tce_iommu_unuse_page_v2(container, tbl, entry + i); - *pua = tce; + *pua = cpu_to_be64(tce); tce += IOMMU_PAGE_SIZE(tbl); } -- cgit v1.2.3 From 090bad39b237aad92d8e01baa033699cf0c81cbe Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 4 Jul 2018 16:13:47 +1000 Subject: powerpc/powernv: Add indirect levels to it_userspace We want to support sparse memory and therefore huge chunks of DMA windows do not need to be mapped. If a DMA window big enough to require 2 or more indirect levels, and a DMA window is used to map all RAM (which is a default case for 64bit window), we can actually save some memory by not allocation TCE for regions which we are not going to map anyway. The hardware tables alreary support indirect levels but we also keep host-physical-to-userspace translation array which is allocated by vmalloc() and is a flat array which might use quite some memory. This converts it_userspace from vmalloc'ed array to a multi level table. As the format becomes platform dependend, this replaces the direct access to it_usespace with a iommu_table_ops::useraddrptr hook which returns a pointer to the userspace copy of a TCE; future extension will return NULL if the level was not allocated. This should not change non-KVM handling of TCE tables and it_userspace will not be allocated for non-KVM tables. Reviewed-by: David Gibson Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- drivers/vfio/vfio_iommu_spapr_tce.c | 46 ------------------------------------- 1 file changed, 46 deletions(-) (limited to 'drivers') diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 8ab124a67311..54ae6c2be1b7 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -211,44 +211,6 @@ static long tce_iommu_register_pages(struct tce_container *container, return 0; } -static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl, - struct mm_struct *mm) -{ - unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) * - tbl->it_size, PAGE_SIZE); - unsigned long *uas; - long ret; - - BUG_ON(tbl->it_userspace); - - ret = try_increment_locked_vm(mm, cb >> PAGE_SHIFT); - if (ret) - return ret; - - uas = vzalloc(cb); - if (!uas) { - decrement_locked_vm(mm, cb >> PAGE_SHIFT); - return -ENOMEM; - } - tbl->it_userspace = (__be64 *) uas; - - return 0; -} - -static void tce_iommu_userspace_view_free(struct iommu_table *tbl, - struct mm_struct *mm) -{ - unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) * - tbl->it_size, PAGE_SIZE); - - if (!tbl->it_userspace) - return; - - vfree(tbl->it_userspace); - tbl->it_userspace = NULL; - decrement_locked_vm(mm, cb >> PAGE_SHIFT); -} - static bool tce_page_is_contained(struct page *page, unsigned page_shift) { /* @@ -599,12 +561,6 @@ static long tce_iommu_build_v2(struct tce_container *container, unsigned long hpa; enum dma_data_direction dirtmp; - if (!tbl->it_userspace) { - ret = tce_iommu_userspace_view_alloc(tbl, container->mm); - if (ret) - return ret; - } - for (i = 0; i < pages; ++i) { struct mm_iommu_table_group_mem_t *mem = NULL; __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry + i); @@ -685,7 +641,6 @@ static void tce_iommu_free_table(struct tce_container *container, { unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT; - tce_iommu_userspace_view_free(tbl, container->mm); iommu_tce_table_put(tbl); decrement_locked_vm(container->mm, pages); } @@ -1200,7 +1155,6 @@ static void tce_iommu_release_ownership(struct tce_container *container, continue; tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); - tce_iommu_userspace_view_free(tbl, container->mm); if (tbl->it_map) iommu_release_ownership(tbl); -- cgit v1.2.3 From a68bd1267b7286b1687905651b404e765046de25 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 4 Jul 2018 16:13:49 +1000 Subject: powerpc/powernv/ioda: Allocate indirect TCE levels on demand At the moment we allocate the entire TCE table, twice (hardware part and userspace translation cache). This normally works as we normally have contigous memory and the guest will map entire RAM for 64bit DMA. However if we have sparse RAM (one example is a memory device), then we will allocate TCEs which will never be used as the guest only maps actual memory for DMA. If it is a single level TCE table, there is nothing we can really do but if it a multilevel table, we can skip allocating TCEs we know we won't need. This adds ability to allocate only first level, saving memory. This changes iommu_table::free() to avoid allocating of an extra level; iommu_table::set() will do this when needed. This adds @alloc parameter to iommu_table::exchange() to tell the callback if it can allocate an extra level; the flag is set to "false" for the realmode KVM handlers of H_PUT_TCE hcalls and the callback returns H_TOO_HARD. This still requires the entire table to be counted in mm::locked_vm. To be conservative, this only does on-demand allocation when the usespace cache table is requested which is the case of VFIO. The example math for a system replicating a powernv setup with NVLink2 in a guest: 16GB RAM mapped at 0x0 128GB GPU RAM window (16GB of actual RAM) mapped at 0x244000000000 the table to cover that all with 64K pages takes: (((0x244000000000 + 0x2000000000) >> 16)*8)>>20 = 4556MB If we allocate only necessary TCE levels, we will only need: (((0x400000000 + 0x400000000) >> 16)*8)>>20 = 4MB (plus some for indirect levels). Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- drivers/vfio/vfio_iommu_spapr_tce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 54ae6c2be1b7..11a4c194d6e3 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -631,7 +631,7 @@ static long tce_iommu_create_table(struct tce_container *container, page_shift, window_size, levels, ptbl); WARN_ON(!ret && !(*ptbl)->it_ops->free); - WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size)); + WARN_ON(!ret && ((*ptbl)->it_allocated_size > table_size)); return ret; } -- cgit v1.2.3