From 6d1bcb957be2850e0776f24c289e1f87c256baeb Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:43:07 +0100 Subject: iommu: Remove empty iommu_tlb_range_add() callback from iommu_ops Commit add02cfdc9bc ("iommu: Introduce Interface for IOMMU TLB Flushing") added three new TLB flushing operations to the IOMMU API so that the underlying driver operations can be batched when unmapping large regions of IO virtual address space. However, the ->iotlb_range_add() callback has not been implemented by any IOMMU drivers (amd_iommu.c implements it as an empty function, which incurs the overhead of an indirect branch). Instead, drivers either flush the entire IOTLB in the ->iotlb_sync() callback or perform the necessary invalidation during ->unmap(). Attempting to implement ->iotlb_range_add() for arm-smmu-v3.c revealed two major issues: 1. The page size used to map the region in the page-table is not known, and so it is not generally possible to issue TLB flushes in the most efficient manner. 2. The only mutable state passed to the callback is a pointer to the iommu_domain, which can be accessed concurrently and therefore requires expensive synchronisation to keep track of the outstanding flushes. Remove the callback entirely in preparation for extending ->unmap() and ->iotlb_sync() to update a token on the caller's stack. Signed-off-by: Will Deacon --- include/linux/iommu.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index fdc355ccc570..1e21431262d9 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -201,7 +201,6 @@ struct iommu_sva_ops { * @map: map a physically contiguous memory region to an iommu domain * @unmap: unmap a physically contiguous memory region from an iommu domain * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain - * @iotlb_range_add: Add a given iova range to the flush queue for this domain * @iotlb_sync_map: Sync mappings created recently using @map to the hardware * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush * queue @@ -244,8 +243,6 @@ struct iommu_ops { size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, size_t size); void (*flush_iotlb_all)(struct iommu_domain *domain); - void (*iotlb_range_add)(struct iommu_domain *domain, - unsigned long iova, size_t size); void (*iotlb_sync_map)(struct iommu_domain *domain); void (*iotlb_sync)(struct iommu_domain *domain); phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); @@ -476,13 +473,6 @@ static inline void iommu_flush_tlb_all(struct iommu_domain *domain) domain->ops->flush_iotlb_all(domain); } -static inline void iommu_tlb_range_add(struct iommu_domain *domain, - unsigned long iova, size_t size) -{ - if (domain->ops->iotlb_range_add) - domain->ops->iotlb_range_add(domain, iova, size); -} - static inline void iommu_tlb_sync(struct iommu_domain *domain) { if (domain->ops->iotlb_sync) @@ -637,11 +627,6 @@ static inline void iommu_flush_tlb_all(struct iommu_domain *domain) { } -static inline void iommu_tlb_range_add(struct iommu_domain *domain, - unsigned long iova, size_t size) -{ -} - static inline void iommu_tlb_sync(struct iommu_domain *domain) { } -- cgit v1.2.3 From 298f78895b081911e0b3605f07d79ebd3d4cf7b0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:43:34 +0100 Subject: iommu/io-pgtable: Rename iommu_gather_ops to iommu_flush_ops In preparation for TLB flush gathering in the IOMMU API, rename the iommu_gather_ops structure in io-pgtable to iommu_flush_ops, which better describes its purpose and avoids the potential for confusion between different levels of the API. $ find linux/ -type f -name '*.[ch]' | xargs sed -i 's/gather_ops/flush_ops/g' Signed-off-by: Will Deacon --- include/linux/io-pgtable.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index b5a450a3bb47..6292ea15d674 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -17,7 +17,7 @@ enum io_pgtable_fmt { }; /** - * struct iommu_gather_ops - IOMMU callbacks for TLB and page table management. + * struct iommu_flush_ops - IOMMU callbacks for TLB and page table management. * * @tlb_flush_all: Synchronously invalidate the entire TLB context. * @tlb_add_flush: Queue up a TLB invalidation for a virtual address range. @@ -28,7 +28,7 @@ enum io_pgtable_fmt { * Note that these can all be called in atomic context and must therefore * not block. */ -struct iommu_gather_ops { +struct iommu_flush_ops { void (*tlb_flush_all)(void *cookie); void (*tlb_add_flush)(unsigned long iova, size_t size, size_t granule, bool leaf, void *cookie); @@ -84,7 +84,7 @@ struct io_pgtable_cfg { unsigned int ias; unsigned int oas; bool coherent_walk; - const struct iommu_gather_ops *tlb; + const struct iommu_flush_ops *tlb; struct device *iommu_dev; /* Low-level data specific to the table format */ -- cgit v1.2.3 From a7d20dc19d9ea7012227be5144353012ffa3ddc4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:43:48 +0100 Subject: iommu: Introduce struct iommu_iotlb_gather for batching TLB flushes To permit batching of TLB flushes across multiple calls to the IOMMU driver's ->unmap() implementation, introduce a new structure for tracking the address range to be flushed and the granularity at which the flushing is required. This is hooked into the IOMMU API and its caller are updated to make use of the new structure. Subsequent patches will plumb this into the IOMMU drivers as well, but for now the gathering information is ignored. Signed-off-by: Will Deacon --- include/linux/iommu.h | 43 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 1e21431262d9..aaf073010a9a 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -191,6 +191,23 @@ struct iommu_sva_ops { #ifdef CONFIG_IOMMU_API +/** + * struct iommu_iotlb_gather - Range information for a pending IOTLB flush + * + * @start: IOVA representing the start of the range to be flushed + * @end: IOVA representing the end of the range to be flushed (exclusive) + * @pgsize: The interval at which to perform the flush + * + * This structure is intended to be updated by multiple calls to the + * ->unmap() function in struct iommu_ops before eventually being passed + * into ->iotlb_sync(). + */ +struct iommu_iotlb_gather { + unsigned long start; + unsigned long end; + size_t pgsize; +}; + /** * struct iommu_ops - iommu ops and capabilities * @capable: check capability @@ -375,6 +392,13 @@ static inline struct iommu_device *dev_to_iommu_device(struct device *dev) return (struct iommu_device *)dev_get_drvdata(dev); } +static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) +{ + *gather = (struct iommu_iotlb_gather) { + .start = ULONG_MAX, + }; +} + #define IOMMU_GROUP_NOTIFY_ADD_DEVICE 1 /* Device added */ #define IOMMU_GROUP_NOTIFY_DEL_DEVICE 2 /* Pre Device removed */ #define IOMMU_GROUP_NOTIFY_BIND_DRIVER 3 /* Pre Driver bind */ @@ -399,7 +423,8 @@ extern int iommu_map(struct iommu_domain *domain, unsigned long iova, extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size); extern size_t iommu_unmap_fast(struct iommu_domain *domain, - unsigned long iova, size_t size); + unsigned long iova, size_t size, + struct iommu_iotlb_gather *iotlb_gather); extern size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg,unsigned int nents, int prot); extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova); @@ -473,10 +498,13 @@ static inline void iommu_flush_tlb_all(struct iommu_domain *domain) domain->ops->flush_iotlb_all(domain); } -static inline void iommu_tlb_sync(struct iommu_domain *domain) +static inline void iommu_tlb_sync(struct iommu_domain *domain, + struct iommu_iotlb_gather *iotlb_gather) { if (domain->ops->iotlb_sync) domain->ops->iotlb_sync(domain); + + iommu_iotlb_gather_init(iotlb_gather); } /* PCI device grouping function */ @@ -557,6 +585,7 @@ struct iommu_group {}; struct iommu_fwspec {}; struct iommu_device {}; struct iommu_fault_param {}; +struct iommu_iotlb_gather {}; static inline bool iommu_present(struct bus_type *bus) { @@ -611,7 +640,8 @@ static inline size_t iommu_unmap(struct iommu_domain *domain, } static inline size_t iommu_unmap_fast(struct iommu_domain *domain, - unsigned long iova, int gfp_order) + unsigned long iova, int gfp_order, + struct iommu_iotlb_gather *iotlb_gather) { return 0; } @@ -627,7 +657,8 @@ static inline void iommu_flush_tlb_all(struct iommu_domain *domain) { } -static inline void iommu_tlb_sync(struct iommu_domain *domain) +static inline void iommu_tlb_sync(struct iommu_domain *domain, + struct iommu_iotlb_gather *iotlb_gather) { } @@ -812,6 +843,10 @@ static inline struct iommu_device *dev_to_iommu_device(struct device *dev) return NULL; } +static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) +{ +} + static inline void iommu_device_unregister(struct iommu_device *iommu) { } -- cgit v1.2.3 From 4fcf8544fc677fc8af135f1d86b3ba69c4ad429d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:43:57 +0100 Subject: iommu: Introduce iommu_iotlb_gather_add_page() Introduce a helper function for drivers to use when updating an iommu_iotlb_gather structure in response to an ->unmap() call, rather than having to open-code the logic in every page-table implementation. Signed-off-by: Will Deacon --- include/linux/iommu.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index aaf073010a9a..ad41aee55bc6 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -507,6 +507,31 @@ static inline void iommu_tlb_sync(struct iommu_domain *domain, iommu_iotlb_gather_init(iotlb_gather); } +static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain, + struct iommu_iotlb_gather *gather, + unsigned long iova, size_t size) +{ + unsigned long start = iova, end = start + size; + + /* + * If the new page is disjoint from the current range or is mapped at + * a different granularity, then sync the TLB so that the gather + * structure can be rewritten. + */ + if (gather->pgsize != size || + end < gather->start || start > gather->end) { + if (gather->pgsize) + iommu_tlb_sync(domain, gather); + gather->pgsize = size; + } + + if (gather->end < end) + gather->end = end; + + if (gather->start > start) + gather->start = start; +} + /* PCI device grouping function */ extern struct iommu_group *pci_device_group(struct device *dev); /* Generic device grouping function */ @@ -847,6 +872,12 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) { } +static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain, + struct iommu_iotlb_gather *gather, + unsigned long iova, size_t size) +{ +} + static inline void iommu_device_unregister(struct iommu_device *iommu) { } -- cgit v1.2.3 From 56f8af5e9d38f120cba2c2adb0786fa2dbc901a4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:44:06 +0100 Subject: iommu: Pass struct iommu_iotlb_gather to ->unmap() and ->iotlb_sync() To allow IOMMU drivers to batch up TLB flushing operations and postpone them until ->iotlb_sync() is called, extend the prototypes for the ->unmap() and ->iotlb_sync() IOMMU ops callbacks to take a pointer to the current iommu_iotlb_gather structure. All affected IOMMU drivers are updated, but there should be no functional change since the extra parameter is ignored for now. Signed-off-by: Will Deacon --- include/linux/iommu.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ad41aee55bc6..64ebaff33455 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -258,10 +258,11 @@ struct iommu_ops { int (*map)(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot); size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, - size_t size); + size_t size, struct iommu_iotlb_gather *iotlb_gather); void (*flush_iotlb_all)(struct iommu_domain *domain); void (*iotlb_sync_map)(struct iommu_domain *domain); - void (*iotlb_sync)(struct iommu_domain *domain); + void (*iotlb_sync)(struct iommu_domain *domain, + struct iommu_iotlb_gather *iotlb_gather); phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); int (*add_device)(struct device *dev); void (*remove_device)(struct device *dev); @@ -502,7 +503,7 @@ static inline void iommu_tlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather) { if (domain->ops->iotlb_sync) - domain->ops->iotlb_sync(domain); + domain->ops->iotlb_sync(domain, iotlb_gather); iommu_iotlb_gather_init(iotlb_gather); } -- cgit v1.2.3 From 3445545b2248300319b6965208e77140c960c3fd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:44:16 +0100 Subject: iommu/io-pgtable: Introduce tlb_flush_walk() and tlb_flush_leaf() In preparation for deferring TLB flushes to iommu_tlb_sync(), introduce two new synchronous invalidation helpers to the io-pgtable API, which allow the unmap() code to force invalidation in cases where it cannot be deferred (e.g. when replacing a table with a block or when TLBI_ON_MAP is set). Signed-off-by: Will Deacon --- include/linux/io-pgtable.h | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 6292ea15d674..27275575b305 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -19,17 +19,31 @@ enum io_pgtable_fmt { /** * struct iommu_flush_ops - IOMMU callbacks for TLB and page table management. * - * @tlb_flush_all: Synchronously invalidate the entire TLB context. - * @tlb_add_flush: Queue up a TLB invalidation for a virtual address range. - * @tlb_sync: Ensure any queued TLB invalidation has taken effect, and - * any corresponding page table updates are visible to the - * IOMMU. + * @tlb_flush_all: Synchronously invalidate the entire TLB context. + * @tlb_flush_walk: Synchronously invalidate all intermediate TLB state + * (sometimes referred to as the "walk cache") for a virtual + * address range. + * @tlb_flush_leaf: Synchronously invalidate all leaf TLB state for a virtual + * address range. + * @tlb_add_flush: Optional callback to queue up leaf TLB invalidation for a + * virtual address range. This function exists purely as an + * optimisation for IOMMUs that cannot batch TLB invalidation + * operations efficiently and are therefore better suited to + * issuing them early rather than deferring them until + * iommu_tlb_sync(). + * @tlb_sync: Ensure any queued TLB invalidation has taken effect, and + * any corresponding page table updates are visible to the + * IOMMU. * * Note that these can all be called in atomic context and must therefore * not block. */ struct iommu_flush_ops { void (*tlb_flush_all)(void *cookie); + void (*tlb_flush_walk)(unsigned long iova, size_t size, size_t granule, + void *cookie); + void (*tlb_flush_leaf)(unsigned long iova, size_t size, size_t granule, + void *cookie); void (*tlb_add_flush)(unsigned long iova, size_t size, size_t granule, bool leaf, void *cookie); void (*tlb_sync)(void *cookie); -- cgit v1.2.3 From 10b7a7d912697afd681a0bcfced9e05543aded35 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:44:32 +0100 Subject: iommu/io-pgtable-arm: Call ->tlb_flush_walk() and ->tlb_flush_leaf() Now that all IOMMU drivers using the io-pgtable API implement the ->tlb_flush_walk() and ->tlb_flush_leaf() callbacks, we can use them in the io-pgtable code instead of ->tlb_add_flush() immediately followed by ->tlb_sync(). Signed-off-by: Will Deacon --- include/linux/io-pgtable.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 27275575b305..0618aac59e74 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -198,6 +198,20 @@ static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop) iop->cfg.tlb->tlb_flush_all(iop->cookie); } +static inline void +io_pgtable_tlb_flush_walk(struct io_pgtable *iop, unsigned long iova, + size_t size, size_t granule) +{ + iop->cfg.tlb->tlb_flush_walk(iova, size, granule, iop->cookie); +} + +static inline void +io_pgtable_tlb_flush_leaf(struct io_pgtable *iop, unsigned long iova, + size_t size, size_t granule) +{ + iop->cfg.tlb->tlb_flush_leaf(iova, size, granule, iop->cookie); +} + static inline void io_pgtable_tlb_add_flush(struct io_pgtable *iop, unsigned long iova, size_t size, size_t granule, bool leaf) { -- cgit v1.2.3 From abfd6fe0cd535d31ee83b668be6eb59ce6a8469d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:44:41 +0100 Subject: iommu/io-pgtable: Replace ->tlb_add_flush() with ->tlb_add_page() The ->tlb_add_flush() callback in the io-pgtable API now looks a bit silly: - It takes a size and a granule, which are always the same - It takes a 'bool leaf', which is always true - It only ever flushes a single page With that in mind, replace it with an optional ->tlb_add_page() callback that drops the useless parameters. Signed-off-by: Will Deacon --- include/linux/io-pgtable.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 0618aac59e74..99e04bd2baa1 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -25,12 +25,11 @@ enum io_pgtable_fmt { * address range. * @tlb_flush_leaf: Synchronously invalidate all leaf TLB state for a virtual * address range. - * @tlb_add_flush: Optional callback to queue up leaf TLB invalidation for a - * virtual address range. This function exists purely as an - * optimisation for IOMMUs that cannot batch TLB invalidation - * operations efficiently and are therefore better suited to - * issuing them early rather than deferring them until - * iommu_tlb_sync(). + * @tlb_add_page: Optional callback to queue up leaf TLB invalidation for a + * single page. This function exists purely as an optimisation + * for IOMMUs that cannot batch TLB invalidation operations + * efficiently and are therefore better suited to issuing them + * early rather than deferring them until iommu_tlb_sync(). * @tlb_sync: Ensure any queued TLB invalidation has taken effect, and * any corresponding page table updates are visible to the * IOMMU. @@ -44,8 +43,7 @@ struct iommu_flush_ops { void *cookie); void (*tlb_flush_leaf)(unsigned long iova, size_t size, size_t granule, void *cookie); - void (*tlb_add_flush)(unsigned long iova, size_t size, size_t granule, - bool leaf, void *cookie); + void (*tlb_add_page)(unsigned long iova, size_t granule, void *cookie); void (*tlb_sync)(void *cookie); }; @@ -212,10 +210,12 @@ io_pgtable_tlb_flush_leaf(struct io_pgtable *iop, unsigned long iova, iop->cfg.tlb->tlb_flush_leaf(iova, size, granule, iop->cookie); } -static inline void io_pgtable_tlb_add_flush(struct io_pgtable *iop, - unsigned long iova, size_t size, size_t granule, bool leaf) +static inline void +io_pgtable_tlb_add_page(struct io_pgtable *iop, unsigned long iova, + size_t granule) { - iop->cfg.tlb->tlb_add_flush(iova, size, granule, leaf, iop->cookie); + if (iop->cfg.tlb->tlb_add_page) + iop->cfg.tlb->tlb_add_page(iova, granule, iop->cookie); } static inline void io_pgtable_tlb_sync(struct io_pgtable *iop) -- cgit v1.2.3 From e953f7f2fa78d1c7fd064171f88457c6b1e21af9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:44:50 +0100 Subject: iommu/io-pgtable: Remove unused ->tlb_sync() callback The ->tlb_sync() callback is no longer used, so it can be removed. Signed-off-by: Will Deacon --- include/linux/io-pgtable.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 99e04bd2baa1..843310484fe2 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -30,9 +30,6 @@ enum io_pgtable_fmt { * for IOMMUs that cannot batch TLB invalidation operations * efficiently and are therefore better suited to issuing them * early rather than deferring them until iommu_tlb_sync(). - * @tlb_sync: Ensure any queued TLB invalidation has taken effect, and - * any corresponding page table updates are visible to the - * IOMMU. * * Note that these can all be called in atomic context and must therefore * not block. @@ -44,7 +41,6 @@ struct iommu_flush_ops { void (*tlb_flush_leaf)(unsigned long iova, size_t size, size_t granule, void *cookie); void (*tlb_add_page)(unsigned long iova, size_t granule, void *cookie); - void (*tlb_sync)(void *cookie); }; /** @@ -218,11 +214,6 @@ io_pgtable_tlb_add_page(struct io_pgtable *iop, unsigned long iova, iop->cfg.tlb->tlb_add_page(iova, granule, iop->cookie); } -static inline void io_pgtable_tlb_sync(struct io_pgtable *iop) -{ - iop->cfg.tlb->tlb_sync(iop->cookie); -} - /** * struct io_pgtable_init_fns - Alloc/free a set of page tables for a * particular format. -- cgit v1.2.3 From a2d3a382d6c682e22b263c9e7f0d857c3fa6c9d6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:44:58 +0100 Subject: iommu/io-pgtable: Pass struct iommu_iotlb_gather to ->unmap() Update the io-pgtable ->unmap() function to take an iommu_iotlb_gather pointer as an argument, and update the callers as appropriate. Signed-off-by: Will Deacon --- include/linux/io-pgtable.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 843310484fe2..fe27d93c8ad9 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -1,7 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __IO_PGTABLE_H #define __IO_PGTABLE_H + #include +#include /* * Public API for use by IOMMU drivers @@ -136,7 +138,7 @@ struct io_pgtable_ops { int (*map)(struct io_pgtable_ops *ops, unsigned long iova, phys_addr_t paddr, size_t size, int prot); size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova, - size_t size); + size_t size, struct iommu_iotlb_gather *gather); phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops, unsigned long iova); }; -- cgit v1.2.3 From 3951c41af4a65ba418e6b1b973d398552bedb84f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:45:15 +0100 Subject: iommu/io-pgtable: Pass struct iommu_iotlb_gather to ->tlb_add_page() With all the pieces in place, we can finally propagate the iommu_iotlb_gather structure from the call to unmap() down to the IOMMU drivers' implementation of ->tlb_add_page(). Currently everybody ignores it, but the machinery is now there to defer invalidation. Signed-off-by: Will Deacon --- include/linux/io-pgtable.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index fe27d93c8ad9..6b1b8be3ebec 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -28,10 +28,10 @@ enum io_pgtable_fmt { * @tlb_flush_leaf: Synchronously invalidate all leaf TLB state for a virtual * address range. * @tlb_add_page: Optional callback to queue up leaf TLB invalidation for a - * single page. This function exists purely as an optimisation - * for IOMMUs that cannot batch TLB invalidation operations - * efficiently and are therefore better suited to issuing them - * early rather than deferring them until iommu_tlb_sync(). + * single page. IOMMUs that cannot batch TLB invalidation + * operations efficiently will typically issue them here, but + * others may decide to update the iommu_iotlb_gather structure + * and defer the invalidation until iommu_tlb_sync() instead. * * Note that these can all be called in atomic context and must therefore * not block. @@ -42,7 +42,8 @@ struct iommu_flush_ops { void *cookie); void (*tlb_flush_leaf)(unsigned long iova, size_t size, size_t granule, void *cookie); - void (*tlb_add_page)(unsigned long iova, size_t granule, void *cookie); + void (*tlb_add_page)(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, void *cookie); }; /** @@ -209,11 +210,12 @@ io_pgtable_tlb_flush_leaf(struct io_pgtable *iop, unsigned long iova, } static inline void -io_pgtable_tlb_add_page(struct io_pgtable *iop, unsigned long iova, +io_pgtable_tlb_add_page(struct io_pgtable *iop, + struct iommu_iotlb_gather * gather, unsigned long iova, size_t granule) { if (iop->cfg.tlb->tlb_add_page) - iop->cfg.tlb->tlb_add_page(iova, granule, iop->cookie); + iop->cfg.tlb->tlb_add_page(gather, iova, granule, iop->cookie); } /** -- cgit v1.2.3 From b9c6ff94e43a0ee053e0c1d983fba1ac4953b762 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Tue, 23 Jul 2019 19:00:37 +0000 Subject: iommu/amd: Re-factor guest virtual APIC (de-)activation code Re-factore the logic for activate/deactivate guest virtual APIC mode (GAM) into helper functions, and export them for other drivers (e.g. SVM). to support run-time activate/deactivate of SVM AVIC. Cc: Joerg Roedel Signed-off-by: Suravee Suthikulpanit Signed-off-by: Joerg Roedel --- include/linux/amd-iommu.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 4a4d00646040..21e950e4ab62 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -184,6 +184,9 @@ extern int amd_iommu_register_ga_log_notifier(int (*notifier)(u32)); extern int amd_iommu_update_ga(int cpu, bool is_run, void *data); +extern int amd_iommu_activate_guest_mode(void *data); +extern int amd_iommu_deactivate_guest_mode(void *data); + #else /* defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) */ static inline int @@ -198,6 +201,15 @@ amd_iommu_update_ga(int cpu, bool is_run, void *data) return 0; } +static inline int amd_iommu_activate_guest_mode(void *data) +{ + return 0; +} + +static inline int amd_iommu_deactivate_guest_mode(void *data) +{ + return 0; +} #endif /* defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) */ #endif /* _ASM_X86_AMD_IOMMU_H */ -- cgit v1.2.3 From 3846a3b9511c5166082a93536d919a9c42abcd91 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Wed, 7 Aug 2019 11:26:45 +0300 Subject: iommu/omap: fix boot issue on remoteprocs with AMMU/Unicache Support has been added to the OMAP IOMMU driver to fix a boot hang issue on OMAP remoteprocs with AMMU/Unicache, caused by an improper AMMU/Unicache state upon initial deassertion of the processor reset. The issue is described in detail in the next three paragraphs. All the Cortex M3/M4 IPU processor subsystems in OMAP SoCs have a AMMU/Unicache IP that dictates the memory attributes for addresses seen by the processor cores. The AMMU/Unicache is configured/enabled by the SCACHE_CONFIG.BYPASS bit - a value of 1 enables the cache and mandates all addresses accessed by M3/M4 be defined in the AMMU. This bit is not programmable from the host processor. The M3/M4 boot sequence starts out with the AMMU/Unicache in disabled state, and SYS/BIOS programs the AMMU regions and enables the Unicache during one of its initial boot steps. This SCACHE_CONFIG.BYPASS bit is however enabled by default whenever a RET reset is applied to the IP, irrespective of whether it was previously enabled or not. The AMMU registers lose their context whenever this reset is applied. The reset is effective as long as the MMU portion of the subsystem is enabled and clocked. This behavior is common to all the IPU and DSP subsystems that have an AMMU/Unicache. The IPU boot sequence involves enabling and programming the MMU, and loading the processor and releasing the reset(s) for the processor. The PM setup code currently sets the target state for most of the power domains to RET. The L2 MMU can be enabled, programmed and accessed properly just fine with the domain in hardware supervised mode, while the power domain goes through a RET->ON->RET transition during the programming sequence. However, the ON->RET transition asserts a RET reset, and the SCACHE_CONFIG.BYPASS bit gets auto-set. An AMMU fault is thrown immediately when the M3/M4 core's reset is released since the first instruction address itself will not be defined in any valid AMMU regions. The ON->RET transition happens automatically on the power domain after enabling the iommu due to the hardware supervised mode. This patch adds and invokes the .set_pwrdm_constraint pdata ops, if present, during the OMAP IOMMU enable and disable functions to resolve the above boot hang issue. The ops will allow to invoke a mach-omap2 layer API pwrdm_set_next_pwrst() in a multi-arch kernel environment. The ops also returns the current power domain state while enforcing the constraint so that the driver can store it and use it to set back the power domain state while releasing the constraint. The pdata ops implementation restricts the target power domain to ON during enable, and back to the original power domain state during disable, and thereby eliminating the conditions for the boot issue. The implementation is effective only when the original power domain state is either RET or OFF, and is a no-op when it is ON or INACTIVE. The .set_pwrdm_constraint ops need to be plugged in pdata-quirks for the affected remote processors to be able to boot properly. Note that the current issue is seen only on kernels with the affected power domains programmed to enter RET. For eg., IPU1 on DRA7xx is in a separate domain and is susceptible to this bug, while the IPU2 subsystem is within CORE power domain, and CORE RET is not supported on this SoC. IPUs on OMAP4 and OMAP5 are also susceptible since they are in CORE power domain, and CORE RET is a valid power target on these SoCs. Signed-off-by: Suman Anna Signed-off-by: Joerg Roedel --- include/linux/platform_data/iommu-omap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h index 44d913a7580c..1ed60265a20e 100644 --- a/include/linux/platform_data/iommu-omap.h +++ b/include/linux/platform_data/iommu-omap.h @@ -13,4 +13,6 @@ struct iommu_platform_data { const char *reset_name; int (*assert_reset)(struct platform_device *pdev, const char *name); int (*deassert_reset)(struct platform_device *pdev, const char *name); + int (*set_pwrdm_constraint)(struct platform_device *pdev, bool request, + u8 *pwrst); }; -- cgit v1.2.3 From 74c116df66d25a9fb48d44ce545a505edc5fbbba Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Wed, 7 Aug 2019 11:26:46 +0300 Subject: iommu/omap: add pdata ops for omap_device_enable/idle Add two new platform data ops to allow the OMAP iommu driver to be able to invoke the omap_device_enable and omap_device_idle from within the driver. These are being added to streamline the sequence between managing the hard reset lines and the clocks during the suspend path, as the default device pm_domain callback sequences in omap_device layer are not conducive for the OMAP IOMMU driver. This could have been done by expanding the existing pdata ops for reset management (like in the OMAP remoteproc driver), but this was chosen to avoid adding additional code in the separate file in the mach-omap2 layer. Signed-off-by: Suman Anna Signed-off-by: Joerg Roedel --- include/linux/platform_data/iommu-omap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h index 1ed60265a20e..8474a0208b34 100644 --- a/include/linux/platform_data/iommu-omap.h +++ b/include/linux/platform_data/iommu-omap.h @@ -13,6 +13,8 @@ struct iommu_platform_data { const char *reset_name; int (*assert_reset)(struct platform_device *pdev, const char *name); int (*deassert_reset)(struct platform_device *pdev, const char *name); + int (*device_enable)(struct platform_device *pdev); + int (*device_idle)(struct platform_device *pdev); int (*set_pwrdm_constraint)(struct platform_device *pdev, bool request, u8 *pwrst); }; -- cgit v1.2.3 From d9c4d8a6cc0f852adf3829fbe40e2e3f6213b0c6 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Wed, 7 Aug 2019 11:26:50 +0300 Subject: iommu/omap: introduce new API for runtime suspend/resume control This patch adds the support for the OMAP IOMMUs to be suspended during the auto suspend/resume of the OMAP remoteproc devices. The remote processors are auto suspended after a certain time of idle or inactivity period. This is done by introducing two new API, omap_iommu_domain_deactivate() and omap_iommu_domain_activate() to allow the client users/master devices of the IOMMU devices to deactivate & activate the IOMMU devices from their runtime suspend/resume operations. There is no API exposed by the IOMMU layer at present, and so these new API are added directly in the OMAP IOMMU driver to minimize framework changes. The API simply decrements and increments the runtime usage count of the IOMMU devices and let the context be saved/restored using the existing runtime pm callbacks. Signed-off-by: Suman Anna Signed-off-by: Joerg Roedel --- include/linux/omap-iommu.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/omap-iommu.h b/include/linux/omap-iommu.h index 153bf25b4df3..36b645726813 100644 --- a/include/linux/omap-iommu.h +++ b/include/linux/omap-iommu.h @@ -10,12 +10,20 @@ #ifndef _OMAP_IOMMU_H_ #define _OMAP_IOMMU_H_ +struct iommu_domain; + #ifdef CONFIG_OMAP_IOMMU extern void omap_iommu_save_ctx(struct device *dev); extern void omap_iommu_restore_ctx(struct device *dev); + +int omap_iommu_domain_deactivate(struct iommu_domain *domain); +int omap_iommu_domain_activate(struct iommu_domain *domain); #else static inline void omap_iommu_save_ctx(struct device *dev) {} static inline void omap_iommu_restore_ctx(struct device *dev) {} + +static inline int omap_iommu_domain_deactivate(struct iommu_domain *domain) {} +static inline int omap_iommu_domain_activate(struct iommu_domain *domain) {} #endif #endif -- cgit v1.2.3 From 73499ad21d595638213f2a5f8b9b58259fa0cae2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Aug 2019 18:09:13 +0200 Subject: iommu/omap: Fix compilation warnings A recent patch introduced a new compiler warning because two functions with non-void return type have no return statement in omap-iommu.h for CONFIG_OMAP_IOMMU=n. Fix this by adding return statements to these functions. Fixes: d9c4d8a6cc0f8 ('iommu/omap: introduce new API for runtime suspend/resume control') Signed-off-by: Joerg Roedel --- include/linux/omap-iommu.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/omap-iommu.h b/include/linux/omap-iommu.h index 36b645726813..2c32ca09df02 100644 --- a/include/linux/omap-iommu.h +++ b/include/linux/omap-iommu.h @@ -22,8 +22,15 @@ int omap_iommu_domain_activate(struct iommu_domain *domain); static inline void omap_iommu_save_ctx(struct device *dev) {} static inline void omap_iommu_restore_ctx(struct device *dev) {} -static inline int omap_iommu_domain_deactivate(struct iommu_domain *domain) {} -static inline int omap_iommu_domain_activate(struct iommu_domain *domain) {} +static inline int omap_iommu_domain_deactivate(struct iommu_domain *domain) +{ + return -ENODEV; +} + +static inline int omap_iommu_domain_activate(struct iommu_domain *domain) +{ + return -ENODEV; +} #endif #endif -- cgit v1.2.3 From 8a69961c7f7583742ab9064feab5ea533a6b1b97 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 19 Aug 2019 15:22:47 +0200 Subject: iommu: Add helpers to set/get default domain type Add a couple of functions to allow changing the default domain type from architecture code and a function for iommu drivers to request whether the default domain is passthrough. Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 64ebaff33455..29bac5345563 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -436,6 +436,9 @@ extern void iommu_get_resv_regions(struct device *dev, struct list_head *list); extern void iommu_put_resv_regions(struct device *dev, struct list_head *list); extern int iommu_request_dm_for_dev(struct device *dev); extern int iommu_request_dma_domain_for_dev(struct device *dev); +extern void iommu_set_default_passthrough(bool cmd_line); +extern void iommu_set_default_translated(bool cmd_line); +extern bool iommu_default_passthrough(void); extern struct iommu_resv_region * iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, enum iommu_resv_type type); @@ -736,6 +739,19 @@ static inline int iommu_request_dma_domain_for_dev(struct device *dev) return -ENODEV; } +static inline void iommu_set_default_passthrough(bool cmd_line) +{ +} + +static inline void iommu_set_default_translated(bool cmd_line) +{ +} + +static inline bool iommu_default_passthrough(void) +{ + return true; +} + static inline int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) { -- cgit v1.2.3 From 29746d012588f1de8517fc6921683c3844120989 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Sat, 24 Aug 2019 11:01:46 +0800 Subject: dt-bindings: mediatek: Add binding for mt8183 IOMMU and SMI This patch adds decriptions for mt8183 IOMMU and SMI. mt8183 has only one M4U like mt8173 and is also MTK IOMMU gen2 which uses ARM Short-Descriptor translation table format. The mt8183 M4U-SMI HW diagram is as below: EMI | M4U | ---------- | | gals0-rx gals1-rx | | | | gals0-tx gals1-tx | | ------------ SMI Common ------------ | +-----+-----+--------+-----+-----+-------+-------+ | | | | | | | | | | gals-rx gals-rx | gals-rx gals-rx gals-rx | | | | | | | | | | | | | | | | | | gals-tx gals-tx | gals-tx gals-tx gals-tx | | | | | | | | larb0 larb1 IPU0 IPU1 larb4 larb5 larb6 CCU disp vdec img cam venc img cam All the connections are HW fixed, SW can NOT adjust it. Compared with mt8173, we add a GALS(Global Async Local Sync) module between SMI-common and M4U, and additional GALS between larb2/3/5/6 and SMI-common. GALS can help synchronize for the modules in different clock frequency, it can be seen as a "asynchronous fifo". GALS can only help transfer the command/data while it doesn't have the configuring register, thus it has the special "smi" clock and it doesn't have the "apb" clock. From the diagram above, we add "gals0" and "gals1" clocks for smi-common and add a "gals" clock for smi-larb. >From the diagram above, IPU0/IPU1(Image Processor Unit) and CCU(Camera Control Unit) is connected with smi-common directly, we can take them as "larb2", "larb3" and "larb7", and their register spaces are different with the normal larb. Signed-off-by: Yong Wu Reviewed-by: Rob Herring Reviewed-by: Evan Green Signed-off-by: Joerg Roedel --- include/dt-bindings/memory/mt8183-larb-port.h | 130 ++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 include/dt-bindings/memory/mt8183-larb-port.h (limited to 'include') diff --git a/include/dt-bindings/memory/mt8183-larb-port.h b/include/dt-bindings/memory/mt8183-larb-port.h new file mode 100644 index 000000000000..2c579f305162 --- /dev/null +++ b/include/dt-bindings/memory/mt8183-larb-port.h @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2018 MediaTek Inc. + * Author: Yong Wu + */ +#ifndef __DTS_IOMMU_PORT_MT8183_H +#define __DTS_IOMMU_PORT_MT8183_H + +#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) + +#define M4U_LARB0_ID 0 +#define M4U_LARB1_ID 1 +#define M4U_LARB2_ID 2 +#define M4U_LARB3_ID 3 +#define M4U_LARB4_ID 4 +#define M4U_LARB5_ID 5 +#define M4U_LARB6_ID 6 +#define M4U_LARB7_ID 7 + +/* larb0 */ +#define M4U_PORT_DISP_OVL0 MTK_M4U_ID(M4U_LARB0_ID, 0) +#define M4U_PORT_DISP_2L_OVL0_LARB0 MTK_M4U_ID(M4U_LARB0_ID, 1) +#define M4U_PORT_DISP_2L_OVL1_LARB0 MTK_M4U_ID(M4U_LARB0_ID, 2) +#define M4U_PORT_DISP_RDMA0 MTK_M4U_ID(M4U_LARB0_ID, 3) +#define M4U_PORT_DISP_RDMA1 MTK_M4U_ID(M4U_LARB0_ID, 4) +#define M4U_PORT_DISP_WDMA0 MTK_M4U_ID(M4U_LARB0_ID, 5) +#define M4U_PORT_MDP_RDMA0 MTK_M4U_ID(M4U_LARB0_ID, 6) +#define M4U_PORT_MDP_WROT0 MTK_M4U_ID(M4U_LARB0_ID, 7) +#define M4U_PORT_MDP_WDMA0 MTK_M4U_ID(M4U_LARB0_ID, 8) +#define M4U_PORT_DISP_FAKE0 MTK_M4U_ID(M4U_LARB0_ID, 9) + +/* larb1 */ +#define M4U_PORT_HW_VDEC_MC_EXT MTK_M4U_ID(M4U_LARB1_ID, 0) +#define M4U_PORT_HW_VDEC_PP_EXT MTK_M4U_ID(M4U_LARB1_ID, 1) +#define M4U_PORT_HW_VDEC_VLD_EXT MTK_M4U_ID(M4U_LARB1_ID, 2) +#define M4U_PORT_HW_VDEC_AVC_MV_EXT MTK_M4U_ID(M4U_LARB1_ID, 3) +#define M4U_PORT_HW_VDEC_PRED_RD_EXT MTK_M4U_ID(M4U_LARB1_ID, 4) +#define M4U_PORT_HW_VDEC_PRED_WR_EXT MTK_M4U_ID(M4U_LARB1_ID, 5) +#define M4U_PORT_HW_VDEC_PPWRAP_EXT MTK_M4U_ID(M4U_LARB1_ID, 6) + +/* larb2 VPU0 */ +#define M4U_PORT_IMG_IPUO MTK_M4U_ID(M4U_LARB2_ID, 0) +#define M4U_PORT_IMG_IPU3O MTK_M4U_ID(M4U_LARB2_ID, 1) +#define M4U_PORT_IMG_IPUI MTK_M4U_ID(M4U_LARB2_ID, 2) + +/* larb3 VPU1 */ +#define M4U_PORT_CAM_IPUO MTK_M4U_ID(M4U_LARB3_ID, 0) +#define M4U_PORT_CAM_IPU2O MTK_M4U_ID(M4U_LARB3_ID, 1) +#define M4U_PORT_CAM_IPU3O MTK_M4U_ID(M4U_LARB3_ID, 2) +#define M4U_PORT_CAM_IPUI MTK_M4U_ID(M4U_LARB3_ID, 3) +#define M4U_PORT_CAM_IPU2I MTK_M4U_ID(M4U_LARB3_ID, 4) + +/* larb4 */ +#define M4U_PORT_VENC_RCPU MTK_M4U_ID(M4U_LARB4_ID, 0) +#define M4U_PORT_VENC_REC MTK_M4U_ID(M4U_LARB4_ID, 1) +#define M4U_PORT_VENC_BSDMA MTK_M4U_ID(M4U_LARB4_ID, 2) +#define M4U_PORT_VENC_SV_COMV MTK_M4U_ID(M4U_LARB4_ID, 3) +#define M4U_PORT_VENC_RD_COMV MTK_M4U_ID(M4U_LARB4_ID, 4) +#define M4U_PORT_JPGENC_RDMA MTK_M4U_ID(M4U_LARB4_ID, 5) +#define M4U_PORT_JPGENC_BSDMA MTK_M4U_ID(M4U_LARB4_ID, 6) +#define M4U_PORT_VENC_CUR_LUMA MTK_M4U_ID(M4U_LARB4_ID, 7) +#define M4U_PORT_VENC_CUR_CHROMA MTK_M4U_ID(M4U_LARB4_ID, 8) +#define M4U_PORT_VENC_REF_LUMA MTK_M4U_ID(M4U_LARB4_ID, 9) +#define M4U_PORT_VENC_REF_CHROMA MTK_M4U_ID(M4U_LARB4_ID, 10) + +/* larb5 */ +#define M4U_PORT_CAM_IMGI MTK_M4U_ID(M4U_LARB5_ID, 0) +#define M4U_PORT_CAM_IMG2O MTK_M4U_ID(M4U_LARB5_ID, 1) +#define M4U_PORT_CAM_IMG3O MTK_M4U_ID(M4U_LARB5_ID, 2) +#define M4U_PORT_CAM_VIPI MTK_M4U_ID(M4U_LARB5_ID, 3) +#define M4U_PORT_CAM_LCEI MTK_M4U_ID(M4U_LARB5_ID, 4) +#define M4U_PORT_CAM_SMXI MTK_M4U_ID(M4U_LARB5_ID, 5) +#define M4U_PORT_CAM_SMXO MTK_M4U_ID(M4U_LARB5_ID, 6) +#define M4U_PORT_CAM_WPE0_RDMA1 MTK_M4U_ID(M4U_LARB5_ID, 7) +#define M4U_PORT_CAM_WPE0_RDMA0 MTK_M4U_ID(M4U_LARB5_ID, 8) +#define M4U_PORT_CAM_WPE0_WDMA MTK_M4U_ID(M4U_LARB5_ID, 9) +#define M4U_PORT_CAM_FDVT_RP MTK_M4U_ID(M4U_LARB5_ID, 10) +#define M4U_PORT_CAM_FDVT_WR MTK_M4U_ID(M4U_LARB5_ID, 11) +#define M4U_PORT_CAM_FDVT_RB MTK_M4U_ID(M4U_LARB5_ID, 12) +#define M4U_PORT_CAM_WPE1_RDMA0 MTK_M4U_ID(M4U_LARB5_ID, 13) +#define M4U_PORT_CAM_WPE1_RDMA1 MTK_M4U_ID(M4U_LARB5_ID, 14) +#define M4U_PORT_CAM_WPE1_WDMA MTK_M4U_ID(M4U_LARB5_ID, 15) +#define M4U_PORT_CAM_DPE_RDMA MTK_M4U_ID(M4U_LARB5_ID, 16) +#define M4U_PORT_CAM_DPE_WDMA MTK_M4U_ID(M4U_LARB5_ID, 17) +#define M4U_PORT_CAM_MFB_RDMA0 MTK_M4U_ID(M4U_LARB5_ID, 18) +#define M4U_PORT_CAM_MFB_RDMA1 MTK_M4U_ID(M4U_LARB5_ID, 19) +#define M4U_PORT_CAM_MFB_WDMA MTK_M4U_ID(M4U_LARB5_ID, 20) +#define M4U_PORT_CAM_RSC_RDMA0 MTK_M4U_ID(M4U_LARB5_ID, 21) +#define M4U_PORT_CAM_RSC_WDMA MTK_M4U_ID(M4U_LARB5_ID, 22) +#define M4U_PORT_CAM_OWE_RDMA MTK_M4U_ID(M4U_LARB5_ID, 23) +#define M4U_PORT_CAM_OWE_WDMA MTK_M4U_ID(M4U_LARB5_ID, 24) + +/* larb6 */ +#define M4U_PORT_CAM_IMGO MTK_M4U_ID(M4U_LARB6_ID, 0) +#define M4U_PORT_CAM_RRZO MTK_M4U_ID(M4U_LARB6_ID, 1) +#define M4U_PORT_CAM_AAO MTK_M4U_ID(M4U_LARB6_ID, 2) +#define M4U_PORT_CAM_AFO MTK_M4U_ID(M4U_LARB6_ID, 3) +#define M4U_PORT_CAM_LSCI0 MTK_M4U_ID(M4U_LARB6_ID, 4) +#define M4U_PORT_CAM_LSCI1 MTK_M4U_ID(M4U_LARB6_ID, 5) +#define M4U_PORT_CAM_PDO MTK_M4U_ID(M4U_LARB6_ID, 6) +#define M4U_PORT_CAM_BPCI MTK_M4U_ID(M4U_LARB6_ID, 7) +#define M4U_PORT_CAM_LCSO MTK_M4U_ID(M4U_LARB6_ID, 8) +#define M4U_PORT_CAM_CAM_RSSO_A MTK_M4U_ID(M4U_LARB6_ID, 9) +#define M4U_PORT_CAM_UFEO MTK_M4U_ID(M4U_LARB6_ID, 10) +#define M4U_PORT_CAM_SOCO MTK_M4U_ID(M4U_LARB6_ID, 11) +#define M4U_PORT_CAM_SOC1 MTK_M4U_ID(M4U_LARB6_ID, 12) +#define M4U_PORT_CAM_SOC2 MTK_M4U_ID(M4U_LARB6_ID, 13) +#define M4U_PORT_CAM_CCUI MTK_M4U_ID(M4U_LARB6_ID, 14) +#define M4U_PORT_CAM_CCUO MTK_M4U_ID(M4U_LARB6_ID, 15) +#define M4U_PORT_CAM_RAWI_A MTK_M4U_ID(M4U_LARB6_ID, 16) +#define M4U_PORT_CAM_CCUG MTK_M4U_ID(M4U_LARB6_ID, 17) +#define M4U_PORT_CAM_PSO MTK_M4U_ID(M4U_LARB6_ID, 18) +#define M4U_PORT_CAM_AFO_1 MTK_M4U_ID(M4U_LARB6_ID, 19) +#define M4U_PORT_CAM_LSCI_2 MTK_M4U_ID(M4U_LARB6_ID, 20) +#define M4U_PORT_CAM_PDI MTK_M4U_ID(M4U_LARB6_ID, 21) +#define M4U_PORT_CAM_FLKO MTK_M4U_ID(M4U_LARB6_ID, 22) +#define M4U_PORT_CAM_LMVO MTK_M4U_ID(M4U_LARB6_ID, 23) +#define M4U_PORT_CAM_UFGO MTK_M4U_ID(M4U_LARB6_ID, 24) +#define M4U_PORT_CAM_SPARE MTK_M4U_ID(M4U_LARB6_ID, 25) +#define M4U_PORT_CAM_SPARE_2 MTK_M4U_ID(M4U_LARB6_ID, 26) +#define M4U_PORT_CAM_SPARE_3 MTK_M4U_ID(M4U_LARB6_ID, 27) +#define M4U_PORT_CAM_SPARE_4 MTK_M4U_ID(M4U_LARB6_ID, 28) +#define M4U_PORT_CAM_SPARE_5 MTK_M4U_ID(M4U_LARB6_ID, 29) +#define M4U_PORT_CAM_SPARE_6 MTK_M4U_ID(M4U_LARB6_ID, 30) + +/* CCU */ +#define M4U_PORT_CCU0 MTK_M4U_ID(M4U_LARB7_ID, 0) +#define M4U_PORT_CCU1 MTK_M4U_ID(M4U_LARB7_ID, 1) + +#endif -- cgit v1.2.3 From 73d50811bc91d2a173213a78b6b43ac762f6cc54 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Sat, 24 Aug 2019 11:01:53 +0800 Subject: iommu/io-pgtable-arm-v7s: Rename the quirk from MTK_4GB to MTK_EXT In previous mt2712/mt8173, MediaTek extend the v7s to support 4GB dram. But in the latest mt8183, We extend it to support the PA up to 34bit. Then the "MTK_4GB" name is not so fit, This patch only change the quirk name to "MTK_EXT". Signed-off-by: Yong Wu Acked-by: Will Deacon Signed-off-by: Joerg Roedel --- include/linux/io-pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index b5a450a3bb47..915fb7303aa3 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -65,7 +65,7 @@ struct io_pgtable_cfg { * (unmapped) entries but the hardware might do so anyway, perform * TLB maintenance when mapping as well as when unmapping. * - * IO_PGTABLE_QUIRK_ARM_MTK_4GB: (ARM v7s format) Set bit 9 in all + * IO_PGTABLE_QUIRK_ARM_MTK_EXT: (ARM v7s format) Set bit 9 in all * PTEs, for Mediatek IOMMUs which treat it as a 33rd address bit * when the SoC is in "4GB mode" and they can only access the high * remap of DRAM (0x1_00000000 to 0x1_ffffffff). @@ -77,7 +77,7 @@ struct io_pgtable_cfg { #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) #define IO_PGTABLE_QUIRK_TLBI_ON_MAP BIT(2) - #define IO_PGTABLE_QUIRK_ARM_MTK_4GB BIT(3) + #define IO_PGTABLE_QUIRK_ARM_MTK_EXT BIT(3) #define IO_PGTABLE_QUIRK_NON_STRICT BIT(4) unsigned long quirks; unsigned long pgsize_bitmap; -- cgit v1.2.3 From 4c019de653237674d38cf2b3119153b144ffe173 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Sat, 24 Aug 2019 11:01:54 +0800 Subject: iommu/io-pgtable-arm-v7s: Extend to support PA[33:32] for MediaTek MediaTek extend the arm v7s descriptor to support up to 34 bits PA where the bit32 and bit33 are encoded in the bit9 and bit4 of the PTE respectively. Meanwhile the iova still is 32bits. Regarding whether the pagetable address could be over 4GB, the mt8183 support it while the previous mt8173 don't, thus keep it as is. Signed-off-by: Yong Wu Acked-by: Will Deacon Signed-off-by: Joerg Roedel --- include/linux/io-pgtable.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 915fb7303aa3..a2a52c349fe4 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -65,10 +65,9 @@ struct io_pgtable_cfg { * (unmapped) entries but the hardware might do so anyway, perform * TLB maintenance when mapping as well as when unmapping. * - * IO_PGTABLE_QUIRK_ARM_MTK_EXT: (ARM v7s format) Set bit 9 in all - * PTEs, for Mediatek IOMMUs which treat it as a 33rd address bit - * when the SoC is in "4GB mode" and they can only access the high - * remap of DRAM (0x1_00000000 to 0x1_ffffffff). + * IO_PGTABLE_QUIRK_ARM_MTK_EXT: (ARM v7s format) MediaTek IOMMUs extend + * to support up to 34 bits PA where the bit32 and bit33 are + * encoded in the bit9 and bit4 of the PTE respectively. * * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs * on unmap, for DMA domains using the flush queue mechanism for -- cgit v1.2.3 From ec2da07ca1202552d87fb01b238d46642817da2b Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Sat, 24 Aug 2019 11:02:07 +0800 Subject: memory: mtk-smi: Get rid of need_larbid The "mediatek,larb-id" has already been parsed in MTK IOMMU driver. It's no need to parse it again in SMI driver. Only clean some codes. This patch is fit for all the current mt2701, mt2712, mt7623, mt8173 and mt8183. After this patch, the "mediatek,larb-id" only be needed for mt2712 which have 2 M4Us. In the other SoCs, we can get the larb-id from M4U in which the larbs in the "mediatek,larbs" always are ordered. Correspondingly, the larb_nr in the "struct mtk_smi_iommu" could also be deleted. CC: Matthias Brugger Signed-off-by: Yong Wu Reviewed-by: Evan Green Reviewed-by: Matthias Brugger Signed-off-by: Joerg Roedel --- include/soc/mediatek/smi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/soc/mediatek/smi.h b/include/soc/mediatek/smi.h index 79b74ced9d91..6f0b00cc73ea 100644 --- a/include/soc/mediatek/smi.h +++ b/include/soc/mediatek/smi.h @@ -21,7 +21,6 @@ struct mtk_smi_larb_iommu { }; struct mtk_smi_iommu { - unsigned int larb_nr; struct mtk_smi_larb_iommu larb_imu[MTK_LARB_NR_MAX]; }; -- cgit v1.2.3 From 1ee9feb2c9f893b893c900d2492c6a01dca680f3 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Sat, 24 Aug 2019 11:02:08 +0800 Subject: iommu/mediatek: Clean up struct mtk_smi_iommu Remove the "struct mtk_smi_iommu" to simplify the code since it has only one item in it right now. Signed-off-by: Yong Wu Reviewed-by: Matthias Brugger Signed-off-by: Joerg Roedel --- include/soc/mediatek/smi.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/soc/mediatek/smi.h b/include/soc/mediatek/smi.h index 6f0b00cc73ea..5a34b87d89e3 100644 --- a/include/soc/mediatek/smi.h +++ b/include/soc/mediatek/smi.h @@ -20,10 +20,6 @@ struct mtk_smi_larb_iommu { unsigned int mmu; }; -struct mtk_smi_iommu { - struct mtk_smi_larb_iommu larb_imu[MTK_LARB_NR_MAX]; -}; - /* * mtk_smi_larb_get: Enable the power domain and clocks for this local arbiter. * It also initialize some basic setting(like iommu). -- cgit v1.2.3 From 3fc1ca00653db6371585e3c21c4b873b2f20e60a Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 6 Sep 2019 14:14:48 +0800 Subject: swiotlb: Split size parameter to map/unmap APIs This splits the size parameter to swiotlb_tbl_map_single() and swiotlb_tbl_unmap_single() into an alloc_size and a mapping_size parameter, where the latter one is rounded up to the iommu page size. Suggested-by: Christoph Hellwig Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Signed-off-by: Joerg Roedel --- include/linux/swiotlb.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 361f62bb4a8e..cde3dc18e21a 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -46,13 +46,17 @@ enum dma_sync_target { extern phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr, - phys_addr_t phys, size_t size, + phys_addr_t phys, + size_t mapping_size, + size_t alloc_size, enum dma_data_direction dir, unsigned long attrs); extern void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir, + size_t mapping_size, + size_t alloc_size, + enum dma_data_direction dir, unsigned long attrs); extern void swiotlb_tbl_sync_single(struct device *hwdev, -- cgit v1.2.3 From 3b53034c268d550d9e8522e613a14ab53b8840d8 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 6 Sep 2019 14:14:51 +0800 Subject: iommu/vt-d: Add trace events for device dma map/unmap This adds trace support for the Intel IOMMU driver. It also declares some events which could be used to trace the events when an IOVA is being mapped or unmapped in a domain. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Mika Westerberg Signed-off-by: Lu Baolu Reviewed-by: Steven Rostedt (VMware) Signed-off-by: Joerg Roedel --- include/trace/events/intel_iommu.h | 106 +++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 include/trace/events/intel_iommu.h (limited to 'include') diff --git a/include/trace/events/intel_iommu.h b/include/trace/events/intel_iommu.h new file mode 100644 index 000000000000..54e61d456cdf --- /dev/null +++ b/include/trace/events/intel_iommu.h @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Intel IOMMU trace support + * + * Copyright (C) 2019 Intel Corporation + * + * Author: Lu Baolu + */ +#ifdef CONFIG_INTEL_IOMMU +#undef TRACE_SYSTEM +#define TRACE_SYSTEM intel_iommu + +#if !defined(_TRACE_INTEL_IOMMU_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_INTEL_IOMMU_H + +#include +#include + +DECLARE_EVENT_CLASS(dma_map, + TP_PROTO(struct device *dev, dma_addr_t dev_addr, phys_addr_t phys_addr, + size_t size), + + TP_ARGS(dev, dev_addr, phys_addr, size), + + TP_STRUCT__entry( + __string(dev_name, dev_name(dev)) + __field(dma_addr_t, dev_addr) + __field(phys_addr_t, phys_addr) + __field(size_t, size) + ), + + TP_fast_assign( + __assign_str(dev_name, dev_name(dev)); + __entry->dev_addr = dev_addr; + __entry->phys_addr = phys_addr; + __entry->size = size; + ), + + TP_printk("dev=%s dev_addr=0x%llx phys_addr=0x%llx size=%zu", + __get_str(dev_name), + (unsigned long long)__entry->dev_addr, + (unsigned long long)__entry->phys_addr, + __entry->size) +); + +DEFINE_EVENT(dma_map, map_single, + TP_PROTO(struct device *dev, dma_addr_t dev_addr, phys_addr_t phys_addr, + size_t size), + TP_ARGS(dev, dev_addr, phys_addr, size) +); + +DEFINE_EVENT(dma_map, map_sg, + TP_PROTO(struct device *dev, dma_addr_t dev_addr, phys_addr_t phys_addr, + size_t size), + TP_ARGS(dev, dev_addr, phys_addr, size) +); + +DEFINE_EVENT(dma_map, bounce_map_single, + TP_PROTO(struct device *dev, dma_addr_t dev_addr, phys_addr_t phys_addr, + size_t size), + TP_ARGS(dev, dev_addr, phys_addr, size) +); + +DECLARE_EVENT_CLASS(dma_unmap, + TP_PROTO(struct device *dev, dma_addr_t dev_addr, size_t size), + + TP_ARGS(dev, dev_addr, size), + + TP_STRUCT__entry( + __string(dev_name, dev_name(dev)) + __field(dma_addr_t, dev_addr) + __field(size_t, size) + ), + + TP_fast_assign( + __assign_str(dev_name, dev_name(dev)); + __entry->dev_addr = dev_addr; + __entry->size = size; + ), + + TP_printk("dev=%s dev_addr=0x%llx size=%zu", + __get_str(dev_name), + (unsigned long long)__entry->dev_addr, + __entry->size) +); + +DEFINE_EVENT(dma_unmap, unmap_single, + TP_PROTO(struct device *dev, dma_addr_t dev_addr, size_t size), + TP_ARGS(dev, dev_addr, size) +); + +DEFINE_EVENT(dma_unmap, unmap_sg, + TP_PROTO(struct device *dev, dma_addr_t dev_addr, size_t size), + TP_ARGS(dev, dev_addr, size) +); + +DEFINE_EVENT(dma_unmap, bounce_unmap_single, + TP_PROTO(struct device *dev, dma_addr_t dev_addr, size_t size), + TP_ARGS(dev, dev_addr, size) +); + +#endif /* _TRACE_INTEL_IOMMU_H */ + +/* This part must be outside protection */ +#include +#endif /* CONFIG_INTEL_IOMMU */ -- cgit v1.2.3 From fd730007a06e9b11664e3816fcebd3faa91761ea Mon Sep 17 00:00:00 2001 From: Kyung Min Park Date: Fri, 6 Sep 2019 11:14:02 -0700 Subject: iommu/vt-d: Add Scalable Mode fault information Intel VT-d specification revision 3 added support for Scalable Mode Translation for DMA remapping. Add the Scalable Mode fault reasons to show detailed fault reasons when the translation fault happens. Link: https://software.intel.com/sites/default/files/managed/c5/15/vt-directed-io-spec.pdf Reviewed-by: Sohil Mehta Signed-off-by: Kyung Min Park Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index f2ae8a006ff8..10e79a49af9d 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -272,6 +272,8 @@ #define dma_frcd_type(d) ((d >> 30) & 1) #define dma_frcd_fault_reason(c) (c & 0xff) #define dma_frcd_source_id(c) (c & 0xffff) +#define dma_frcd_pasid_value(c) (((c) >> 8) & 0xfffff) +#define dma_frcd_pasid_present(c) (((c) >> 31) & 1) /* low 64 bit */ #define dma_frcd_page_addr(d) (d & (((u64)-1) << PAGE_SHIFT)) -- cgit v1.2.3