summaryrefslogtreecommitdiff
path: root/drivers/iommu
diff options
context:
space:
mode:
authorMax Krummenacher <max.krummenacher@toradex.com>2024-02-29 21:24:13 +0100
committerMax Krummenacher <max.krummenacher@toradex.com>2024-02-29 21:24:26 +0100
commit99e9ab88844a2a2e1c048264315eb0d72eb0bf88 (patch)
tree0d89b05b1816b8cac1be77ac9b3c297762502e67 /drivers/iommu
parent756e3199ba82d68ba07e5acf555d13aaa1da3da1 (diff)
parentd761b18f6bc83bce94251467e3c0974243318456 (diff)
Merge remote-tracking branch 'fslc/5.15-2.2.x-imx' into toradex_5.15-2.2.x-imx
Signed-off-by: Max Krummenacher <max.krummenacher@toradex.com> Conflicts: drivers/gpu/drm/bridge/lontium-lt8912b.c drivers/usb/dwc3/drd.c
Diffstat (limited to 'drivers/iommu')
-rw-r--r--drivers/iommu/amd/iommu_v2.c4
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c27
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c18
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c1
-rw-r--r--drivers/iommu/arm/arm-smmu/qcom_iommu.c7
-rw-r--r--drivers/iommu/dma-iommu.c3
-rw-r--r--drivers/iommu/intel/dmar.c18
-rw-r--r--drivers/iommu/intel/iommu.c22
-rw-r--r--drivers/iommu/intel/pasid.c2
-rw-r--r--drivers/iommu/rockchip-iommu.c43
-rw-r--r--drivers/iommu/sprd-iommu.c1
11 files changed, 79 insertions, 67 deletions
diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c
index c96cf9b21719..29a3a62b7a3a 100644
--- a/drivers/iommu/amd/iommu_v2.c
+++ b/drivers/iommu/amd/iommu_v2.c
@@ -264,8 +264,8 @@ static void put_pasid_state(struct pasid_state *pasid_state)
static void put_pasid_state_wait(struct pasid_state *pasid_state)
{
- refcount_dec(&pasid_state->count);
- wait_event(pasid_state->wq, !refcount_read(&pasid_state->count));
+ if (!refcount_dec_and_test(&pasid_state->count))
+ wait_event(pasid_state->wq, !refcount_read(&pasid_state->count));
free_pasid_state(pasid_state);
}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index e2e80eb2840c..01748742c684 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -186,6 +186,15 @@ static void arm_smmu_free_shared_cd(struct arm_smmu_ctx_desc *cd)
}
}
+/*
+ * Cloned from the MAX_TLBI_OPS in arch/arm64/include/asm/tlbflush.h, this
+ * is used as a threshold to replace per-page TLBI commands to issue in the
+ * command queue with an address-space TLBI command, when SMMU w/o a range
+ * invalidation feature handles too many per-page TLBI commands, which will
+ * otherwise result in a soft lockup.
+ */
+#define CMDQ_MAX_TLBI_OPS (1 << (PAGE_SHIFT - 3))
+
static void arm_smmu_mm_invalidate_range(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end)
@@ -200,10 +209,22 @@ static void arm_smmu_mm_invalidate_range(struct mmu_notifier *mn,
* range. So do a simple translation here by calculating size correctly.
*/
size = end - start;
+ if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_RANGE_INV)) {
+ if (size >= CMDQ_MAX_TLBI_OPS * PAGE_SIZE)
+ size = 0;
+ }
+
+ if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM)) {
+ if (!size)
+ arm_smmu_tlb_inv_asid(smmu_domain->smmu,
+ smmu_mn->cd->asid);
+ else
+ arm_smmu_tlb_inv_range_asid(start, size,
+ smmu_mn->cd->asid,
+ PAGE_SIZE, false,
+ smmu_domain);
+ }
- if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM))
- arm_smmu_tlb_inv_range_asid(start, size, smmu_mn->cd->asid,
- PAGE_SIZE, false, smmu_domain);
arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, start, size);
}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index c8bc1e43285f..f74cd3c497df 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1890,13 +1890,23 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
/* Get the leaf page size */
tg = __ffs(smmu_domain->domain.pgsize_bitmap);
+ num_pages = size >> tg;
+
/* Convert page size of 12,14,16 (log2) to 1,2,3 */
cmd->tlbi.tg = (tg - 10) / 2;
- /* Determine what level the granule is at */
- cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
-
- num_pages = size >> tg;
+ /*
+ * Determine what level the granule is at. For non-leaf, both
+ * io-pgtable and SVA pass a nominal last-level granule because
+ * they don't know what level(s) actually apply, so ignore that
+ * and leave TTL=0. However for various errata reasons we still
+ * want to use a range command, so avoid the SVA corner case
+ * where both scale and num could be 0 as well.
+ */
+ if (cmd->tlbi.leaf)
+ cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
+ else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
+ num_pages++;
}
cmds.num = 0;
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 50453d38400c..272c9b35c9f0 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -226,6 +226,7 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
{ .compatible = "qcom,adreno" },
+ { .compatible = "qcom,adreno-gmu" },
{ .compatible = "qcom,mdp4" },
{ .compatible = "qcom,mdss" },
{ .compatible = "qcom,sc7180-mdss" },
diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
index a47cb654b704..9438203f08de 100644
--- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c
+++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
@@ -273,6 +273,13 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain,
ctx->secure_init = true;
}
+ /* Disable context bank before programming */
+ iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0);
+
+ /* Clear context bank fault address fault status registers */
+ iommu_writel(ctx, ARM_SMMU_CB_FAR, 0);
+ iommu_writel(ctx, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT);
+
/* TTBRs */
iommu_writeq(ctx, ARM_SMMU_CB_TTBR0,
pgtbl_cfg.arm_lpae_s1_cfg.ttbr |
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 42970ffb813d..66c158cc0e6c 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -25,6 +25,7 @@
#include <linux/vmalloc.h>
#include <linux/crash_dump.h>
#include <linux/dma-direct.h>
+#include <trace/events/swiotlb.h>
struct iommu_dma_msi_page {
struct list_head list;
@@ -817,6 +818,8 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
void *padding_start;
size_t padding_size, aligned_size;
+ trace_swiotlb_bounced(dev, phys, size, swiotlb_force);
+
aligned_size = iova_align(iovad, size);
phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size,
iova_mask(iovad), dir, attrs);
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 7c20083d4a79..0ad33d8d99d1 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1518,6 +1518,15 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
{
struct qi_desc desc;
+ /*
+ * VT-d spec, section 4.3:
+ *
+ * Software is recommended to not submit any Device-TLB invalidation
+ * requests while address remapping hardware is disabled.
+ */
+ if (!(iommu->gcmd & DMA_GCMD_TE))
+ return;
+
if (mask) {
addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
@@ -1583,6 +1592,15 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
+ /*
+ * VT-d spec, section 4.3:
+ *
+ * Software is recommended to not submit any Device-TLB invalidation
+ * requests while address remapping hardware is disabled.
+ */
+ if (!(iommu->gcmd & DMA_GCMD_TE))
+ return;
+
desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
QI_DEV_IOTLB_PFSID(pfsid);
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 29538471c528..46b2751c3f00 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -3564,13 +3564,6 @@ static int iommu_suspend(void)
struct intel_iommu *iommu = NULL;
unsigned long flag;
- for_each_active_iommu(iommu, drhd) {
- iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
- GFP_KERNEL);
- if (!iommu->iommu_state)
- goto nomem;
- }
-
iommu_flush_all();
for_each_active_iommu(iommu, drhd) {
@@ -3590,12 +3583,6 @@ static int iommu_suspend(void)
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
return 0;
-
-nomem:
- for_each_active_iommu(iommu, drhd)
- kfree(iommu->iommu_state);
-
- return -ENOMEM;
}
static void iommu_resume(void)
@@ -3627,9 +3614,6 @@ static void iommu_resume(void)
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
-
- for_each_active_iommu(iommu, drhd)
- kfree(iommu->iommu_state);
}
static struct syscore_ops iommu_syscore_ops = {
@@ -4481,8 +4465,8 @@ static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *op
*/
static void domain_context_clear(struct device_domain_info *info)
{
- if (!info->iommu || !info->dev || !dev_is_pci(info->dev))
- return;
+ if (!dev_is_pci(info->dev))
+ domain_context_clear_one(info, info->bus, info->devfn);
pci_for_each_dma_alias(to_pci_dev(info->dev),
&domain_context_clear_one_cb, info);
@@ -5765,7 +5749,7 @@ static void quirk_igfx_skip_te_disable(struct pci_dev *dev)
ver = (dev->device >> 8) & 0xff;
if (ver != 0x45 && ver != 0x46 && ver != 0x4c &&
ver != 0x4e && ver != 0x8a && ver != 0x98 &&
- ver != 0x9a && ver != 0xa7)
+ ver != 0x9a && ver != 0xa7 && ver != 0x7d)
return;
if (risky_device(dev))
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 6dbc43b414ca..dc9d665d7365 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -187,7 +187,7 @@ attach_out:
device_attach_pasid_table(info, pasid_table);
if (!ecap_coherent(info->iommu->ecap))
- clflush_cache_range(pasid_table->table, size);
+ clflush_cache_range(pasid_table->table, (1 << order) * PAGE_SIZE);
return 0;
}
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index e3557f8dc44e..f9f6492c430d 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -98,8 +98,6 @@ struct rk_iommu_ops {
phys_addr_t (*pt_address)(u32 dte);
u32 (*mk_dtentries)(dma_addr_t pt_dma);
u32 (*mk_ptentries)(phys_addr_t page, int prot);
- phys_addr_t (*dte_addr_phys)(u32 addr);
- u32 (*dma_addr_dte)(dma_addr_t dt_dma);
u64 dma_bit_mask;
};
@@ -277,8 +275,8 @@ static u32 rk_mk_pte(phys_addr_t page, int prot)
/*
* In v2:
* 31:12 - Page address bit 31:0
- * 11:9 - Page address bit 34:32
- * 8:4 - Page address bit 39:35
+ * 11: 8 - Page address bit 35:32
+ * 7: 4 - Page address bit 39:36
* 3 - Security
* 2 - Writable
* 1 - Readable
@@ -505,7 +503,7 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu)
/*
* Check if register DTE_ADDR is working by writing DTE_ADDR_DUMMY
- * and verifying that upper 5 nybbles are read back.
+ * and verifying that upper 5 (v1) or 7 (v2) nybbles are read back.
*/
for (i = 0; i < iommu->num_mmu; i++) {
dte_addr = rk_ops->pt_address(DTE_ADDR_DUMMY);
@@ -530,33 +528,6 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu)
return 0;
}
-static inline phys_addr_t rk_dte_addr_phys(u32 addr)
-{
- return (phys_addr_t)addr;
-}
-
-static inline u32 rk_dma_addr_dte(dma_addr_t dt_dma)
-{
- return dt_dma;
-}
-
-#define DT_HI_MASK GENMASK_ULL(39, 32)
-#define DTE_BASE_HI_MASK GENMASK(11, 4)
-#define DT_SHIFT 28
-
-static inline phys_addr_t rk_dte_addr_phys_v2(u32 addr)
-{
- u64 addr64 = addr;
- return (phys_addr_t)(addr64 & RK_DTE_PT_ADDRESS_MASK) |
- ((addr64 & DTE_BASE_HI_MASK) << DT_SHIFT);
-}
-
-static inline u32 rk_dma_addr_dte_v2(dma_addr_t dt_dma)
-{
- return (dt_dma & RK_DTE_PT_ADDRESS_MASK) |
- ((dt_dma & DT_HI_MASK) >> DT_SHIFT);
-}
-
static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova)
{
void __iomem *base = iommu->bases[index];
@@ -576,7 +547,7 @@ static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova)
page_offset = rk_iova_page_offset(iova);
mmu_dte_addr = rk_iommu_read(base, RK_MMU_DTE_ADDR);
- mmu_dte_addr_phys = rk_ops->dte_addr_phys(mmu_dte_addr);
+ mmu_dte_addr_phys = rk_ops->pt_address(mmu_dte_addr);
dte_addr_phys = mmu_dte_addr_phys + (4 * dte_index);
dte_addr = phys_to_virt(dte_addr_phys);
@@ -966,7 +937,7 @@ static int rk_iommu_enable(struct rk_iommu *iommu)
for (i = 0; i < iommu->num_mmu; i++) {
rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR,
- rk_ops->dma_addr_dte(rk_domain->dt_dma));
+ rk_ops->mk_dtentries(rk_domain->dt_dma));
rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE);
rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, RK_MMU_IRQ_MASK);
}
@@ -1373,8 +1344,6 @@ static struct rk_iommu_ops iommu_data_ops_v1 = {
.pt_address = &rk_dte_pt_address,
.mk_dtentries = &rk_mk_dte,
.mk_ptentries = &rk_mk_pte,
- .dte_addr_phys = &rk_dte_addr_phys,
- .dma_addr_dte = &rk_dma_addr_dte,
.dma_bit_mask = DMA_BIT_MASK(32),
};
@@ -1382,8 +1351,6 @@ static struct rk_iommu_ops iommu_data_ops_v2 = {
.pt_address = &rk_dte_pt_address_v2,
.mk_dtentries = &rk_mk_dte_v2,
.mk_ptentries = &rk_mk_pte_v2,
- .dte_addr_phys = &rk_dte_addr_phys_v2,
- .dma_addr_dte = &rk_dma_addr_dte_v2,
.dma_bit_mask = DMA_BIT_MASK(40),
};
diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c
index 723940e84161..6b11770e3d75 100644
--- a/drivers/iommu/sprd-iommu.c
+++ b/drivers/iommu/sprd-iommu.c
@@ -147,6 +147,7 @@ static struct iommu_domain *sprd_iommu_domain_alloc(unsigned int domain_type)
dom->domain.geometry.aperture_start = 0;
dom->domain.geometry.aperture_end = SZ_256M - 1;
+ dom->domain.geometry.force_aperture = true;
return &dom->domain;
}