diff options
author | Rohan Somvanshi <rsomvanshi@nvidia.com> | 2012-02-01 16:03:06 +0530 |
---|---|---|
committer | Rohan Somvanshi <rsomvanshi@nvidia.com> | 2012-02-01 16:03:06 +0530 |
commit | 13ab2df1dfed5153899b98b1417f52de0ecc70cd (patch) | |
tree | 9b47b72d23fef02d1b489a845547106430552be9 | |
parent | c8932c9184978b42398fd582592315cbbf507415 (diff) | |
parent | ac2b520c46800dec16f3c23d0cf8759826750937 (diff) |
Merge remote-tracking branch 'origin/dev/k3.3-rc1-iommu' into android-tegra-nv-3.1
Change-Id: I9001bb291779f107bbcb593d48f9f0f734074d0e
58 files changed, 3817 insertions, 1184 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index d6e6724446c8..4d5c56502690 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -315,6 +315,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. is a lot of faster off - do not initialize any AMD IOMMU found in the system + force_isolation - Force device isolation for all + devices. The IOMMU driver is not + allowed anymore to lift isolation + requirements as needed. This option + does not override iommu=pt amijoy.map= [HW,JOY] Amiga joystick support Map of devices attached to JOY0DAT and JOY1DAT @@ -1014,10 +1019,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. has the capability. With this option, super page will not be supported. intremap= [X86-64, Intel-IOMMU] - Format: { on (default) | off | nosid } on enable Interrupt Remapping (default) off disable Interrupt Remapping nosid disable Source ID checking + no_x2apic_optout + BIOS x2APIC opt-out request will be ignored inttest= [IA-64] @@ -1036,7 +1042,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nomerge forcesac soft - pt [x86, IA-64] + pt [x86, IA-64] + group_mf [x86, IA-64] + io7= [HW] IO7 for Marvel based alpha systems See comment before marvel_specify_io7 in diff --git a/arch/arm/mach-omap2/iommu2.c b/arch/arm/mach-omap2/iommu2.c index f286012783c6..eefc37912ef3 100644 --- a/arch/arm/mach-omap2/iommu2.c +++ b/arch/arm/mach-omap2/iommu2.c @@ -66,7 +66,7 @@ ((pgsz) == MMU_CAM_PGSZ_4K) ? 0xfffff000 : 0) -static void __iommu_set_twl(struct iommu *obj, bool on) +static void __iommu_set_twl(struct omap_iommu *obj, bool on) { u32 l = iommu_read_reg(obj, MMU_CNTL); @@ -85,7 +85,7 @@ static void __iommu_set_twl(struct iommu *obj, bool on) } -static int omap2_iommu_enable(struct iommu *obj) +static int omap2_iommu_enable(struct omap_iommu *obj) { u32 l, pa; unsigned long timeout; @@ -127,7 +127,7 @@ static int omap2_iommu_enable(struct iommu *obj) return 0; } -static void omap2_iommu_disable(struct iommu *obj) +static void omap2_iommu_disable(struct omap_iommu *obj) { u32 l = iommu_read_reg(obj, MMU_CNTL); @@ -138,12 +138,12 @@ static void omap2_iommu_disable(struct iommu *obj) dev_dbg(obj->dev, "%s is shutting down\n", obj->name); } -static void omap2_iommu_set_twl(struct iommu *obj, bool on) +static void omap2_iommu_set_twl(struct omap_iommu *obj, bool on) { __iommu_set_twl(obj, false); } -static u32 omap2_iommu_fault_isr(struct iommu *obj, u32 *ra) +static u32 omap2_iommu_fault_isr(struct omap_iommu *obj, u32 *ra) { u32 stat, da; u32 errs = 0; @@ -173,13 +173,13 @@ static u32 omap2_iommu_fault_isr(struct iommu *obj, u32 *ra) return errs; } -static void omap2_tlb_read_cr(struct iommu *obj, struct cr_regs *cr) +static void omap2_tlb_read_cr(struct omap_iommu *obj, struct cr_regs *cr) { cr->cam = iommu_read_reg(obj, MMU_READ_CAM); cr->ram = iommu_read_reg(obj, MMU_READ_RAM); } -static void omap2_tlb_load_cr(struct iommu *obj, struct cr_regs *cr) +static void omap2_tlb_load_cr(struct omap_iommu *obj, struct cr_regs *cr) { iommu_write_reg(obj, cr->cam | MMU_CAM_V, MMU_CAM); iommu_write_reg(obj, cr->ram, MMU_RAM); @@ -193,7 +193,8 @@ static u32 omap2_cr_to_virt(struct cr_regs *cr) return cr->cam & mask; } -static struct cr_regs *omap2_alloc_cr(struct iommu *obj, struct iotlb_entry *e) +static struct cr_regs *omap2_alloc_cr(struct omap_iommu *obj, + struct iotlb_entry *e) { struct cr_regs *cr; @@ -230,7 +231,8 @@ static u32 omap2_get_pte_attr(struct iotlb_entry *e) return attr; } -static ssize_t omap2_dump_cr(struct iommu *obj, struct cr_regs *cr, char *buf) +static ssize_t +omap2_dump_cr(struct omap_iommu *obj, struct cr_regs *cr, char *buf) { char *p = buf; @@ -254,7 +256,8 @@ static ssize_t omap2_dump_cr(struct iommu *obj, struct cr_regs *cr, char *buf) goto out; \ } while (0) -static ssize_t omap2_iommu_dump_ctx(struct iommu *obj, char *buf, ssize_t len) +static ssize_t +omap2_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t len) { char *p = buf; @@ -280,7 +283,7 @@ out: return p - buf; } -static void omap2_iommu_save_ctx(struct iommu *obj) +static void omap2_iommu_save_ctx(struct omap_iommu *obj) { int i; u32 *p = obj->ctx; @@ -293,7 +296,7 @@ static void omap2_iommu_save_ctx(struct iommu *obj) BUG_ON(p[0] != IOMMU_ARCH_VERSION); } -static void omap2_iommu_restore_ctx(struct iommu *obj) +static void omap2_iommu_restore_ctx(struct omap_iommu *obj) { int i; u32 *p = obj->ctx; @@ -343,13 +346,13 @@ static const struct iommu_functions omap2_iommu_ops = { static int __init omap2_iommu_init(void) { - return install_iommu_arch(&omap2_iommu_ops); + return omap_install_iommu_arch(&omap2_iommu_ops); } module_init(omap2_iommu_init); static void __exit omap2_iommu_exit(void) { - uninstall_iommu_arch(&omap2_iommu_ops); + omap_uninstall_iommu_arch(&omap2_iommu_ops); } module_exit(omap2_iommu_exit); diff --git a/arch/arm/plat-omap/Kconfig b/arch/arm/plat-omap/Kconfig index bb8f4a6b3e37..fa62037f1df6 100644 --- a/arch/arm/plat-omap/Kconfig +++ b/arch/arm/plat-omap/Kconfig @@ -132,18 +132,6 @@ config OMAP_MBOX_KFIFO_SIZE This can also be changed at runtime (via the mbox_kfifo_size module parameter). -config OMAP_IOMMU - tristate - -config OMAP_IOMMU_DEBUG - tristate "Export OMAP IOMMU internals in DebugFS" - depends on OMAP_IOMMU && DEBUG_FS - help - Select this to see extensive information about - the internal state of OMAP IOMMU in debugfs. - - Say N unless you know you need this. - config OMAP_IOMMU_IVA2 bool diff --git a/arch/arm/plat-omap/Makefile b/arch/arm/plat-omap/Makefile index f0233e6abcdf..985262242f25 100644 --- a/arch/arm/plat-omap/Makefile +++ b/arch/arm/plat-omap/Makefile @@ -18,8 +18,6 @@ obj-$(CONFIG_ARCH_OMAP3) += omap_device.o obj-$(CONFIG_ARCH_OMAP4) += omap_device.o obj-$(CONFIG_OMAP_MCBSP) += mcbsp.o -obj-$(CONFIG_OMAP_IOMMU) += iommu.o iovmm.o -obj-$(CONFIG_OMAP_IOMMU_DEBUG) += iommu-debug.o obj-$(CONFIG_CPU_FREQ) += cpu-omap.o obj-$(CONFIG_OMAP_DM_TIMER) += dmtimer.o diff --git a/arch/arm/plat-omap/include/plat/iommu.h b/arch/arm/plat-omap/include/plat/iommu.h index 174f1b9c8c03..7a6ec98a08e8 100644 --- a/arch/arm/plat-omap/include/plat/iommu.h +++ b/arch/arm/plat-omap/include/plat/iommu.h @@ -25,16 +25,17 @@ struct iotlb_entry { }; }; -struct iommu { +struct omap_iommu { const char *name; struct module *owner; struct clk *clk; void __iomem *regbase; struct device *dev; void *isr_priv; + struct iommu_domain *domain; unsigned int refcount; - struct mutex iommu_lock; /* global for this whole object */ + spinlock_t iommu_lock; /* global for this whole object */ /* * We don't change iopgd for a situation like pgd for a task, @@ -48,8 +49,6 @@ struct iommu { struct list_head mmap; struct mutex mmap_lock; /* protect mmap */ - int (*isr)(struct iommu *obj, u32 da, u32 iommu_errs, void *priv); - void *ctx; /* iommu context: registres saved area */ u32 da_start; u32 da_end; @@ -81,25 +80,27 @@ struct iotlb_lock { struct iommu_functions { unsigned long version; - int (*enable)(struct iommu *obj); - void (*disable)(struct iommu *obj); - void (*set_twl)(struct iommu *obj, bool on); - u32 (*fault_isr)(struct iommu *obj, u32 *ra); + int (*enable)(struct omap_iommu *obj); + void (*disable)(struct omap_iommu *obj); + void (*set_twl)(struct omap_iommu *obj, bool on); + u32 (*fault_isr)(struct omap_iommu *obj, u32 *ra); - void (*tlb_read_cr)(struct iommu *obj, struct cr_regs *cr); - void (*tlb_load_cr)(struct iommu *obj, struct cr_regs *cr); + void (*tlb_read_cr)(struct omap_iommu *obj, struct cr_regs *cr); + void (*tlb_load_cr)(struct omap_iommu *obj, struct cr_regs *cr); - struct cr_regs *(*alloc_cr)(struct iommu *obj, struct iotlb_entry *e); + struct cr_regs *(*alloc_cr)(struct omap_iommu *obj, + struct iotlb_entry *e); int (*cr_valid)(struct cr_regs *cr); u32 (*cr_to_virt)(struct cr_regs *cr); void (*cr_to_e)(struct cr_regs *cr, struct iotlb_entry *e); - ssize_t (*dump_cr)(struct iommu *obj, struct cr_regs *cr, char *buf); + ssize_t (*dump_cr)(struct omap_iommu *obj, struct cr_regs *cr, + char *buf); u32 (*get_pte_attr)(struct iotlb_entry *e); - void (*save_ctx)(struct iommu *obj); - void (*restore_ctx)(struct iommu *obj); - ssize_t (*dump_ctx)(struct iommu *obj, char *buf, ssize_t len); + void (*save_ctx)(struct omap_iommu *obj); + void (*restore_ctx)(struct omap_iommu *obj); + ssize_t (*dump_ctx)(struct omap_iommu *obj, char *buf, ssize_t len); }; struct iommu_platform_data { @@ -150,40 +151,30 @@ struct iommu_platform_data { /* * global functions */ -extern u32 iommu_arch_version(void); - -extern void iotlb_cr_to_e(struct cr_regs *cr, struct iotlb_entry *e); -extern u32 iotlb_cr_to_virt(struct cr_regs *cr); - -extern int load_iotlb_entry(struct iommu *obj, struct iotlb_entry *e); -extern void iommu_set_twl(struct iommu *obj, bool on); -extern void flush_iotlb_page(struct iommu *obj, u32 da); -extern void flush_iotlb_range(struct iommu *obj, u32 start, u32 end); -extern void flush_iotlb_all(struct iommu *obj); - -extern int iopgtable_store_entry(struct iommu *obj, struct iotlb_entry *e); -extern void iopgtable_lookup_entry(struct iommu *obj, u32 da, u32 **ppgd, - u32 **ppte); -extern size_t iopgtable_clear_entry(struct iommu *obj, u32 iova); - -extern int iommu_set_da_range(struct iommu *obj, u32 start, u32 end); -extern struct iommu *iommu_get(const char *name); -extern void iommu_put(struct iommu *obj); -extern int iommu_set_isr(const char *name, - int (*isr)(struct iommu *obj, u32 da, u32 iommu_errs, +extern u32 omap_iommu_arch_version(void); + +extern void omap_iotlb_cr_to_e(struct cr_regs *cr, struct iotlb_entry *e); + +extern int +omap_iopgtable_store_entry(struct omap_iommu *obj, struct iotlb_entry *e); + +extern int omap_iommu_set_isr(const char *name, + int (*isr)(struct omap_iommu *obj, u32 da, u32 iommu_errs, void *priv), void *isr_priv); -extern void iommu_save_ctx(struct iommu *obj); -extern void iommu_restore_ctx(struct iommu *obj); +extern void omap_iommu_save_ctx(struct device *dev); +extern void omap_iommu_restore_ctx(struct device *dev); -extern int install_iommu_arch(const struct iommu_functions *ops); -extern void uninstall_iommu_arch(const struct iommu_functions *ops); +extern int omap_install_iommu_arch(const struct iommu_functions *ops); +extern void omap_uninstall_iommu_arch(const struct iommu_functions *ops); -extern int foreach_iommu_device(void *data, +extern int omap_foreach_iommu_device(void *data, int (*fn)(struct device *, void *)); -extern ssize_t iommu_dump_ctx(struct iommu *obj, char *buf, ssize_t len); -extern size_t dump_tlb_entries(struct iommu *obj, char *buf, ssize_t len); +extern ssize_t +omap_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t len); +extern size_t +omap_dump_tlb_entries(struct omap_iommu *obj, char *buf, ssize_t len); #endif /* __MACH_IOMMU_H */ diff --git a/arch/arm/plat-omap/include/plat/iommu2.h b/arch/arm/plat-omap/include/plat/iommu2.h index 10ad05f410e9..d4116b595e40 100644 --- a/arch/arm/plat-omap/include/plat/iommu2.h +++ b/arch/arm/plat-omap/include/plat/iommu2.h @@ -83,12 +83,12 @@ /* * register accessors */ -static inline u32 iommu_read_reg(struct iommu *obj, size_t offs) +static inline u32 iommu_read_reg(struct omap_iommu *obj, size_t offs) { return __raw_readl(obj->regbase + offs); } -static inline void iommu_write_reg(struct iommu *obj, u32 val, size_t offs) +static inline void iommu_write_reg(struct omap_iommu *obj, u32 val, size_t offs) { __raw_writel(val, obj->regbase + offs); } diff --git a/arch/arm/plat-omap/iopgtable.h b/arch/arm/plat-omap/include/plat/iopgtable.h index c3e93bb0911f..e0c56aafc2e7 100644 --- a/arch/arm/plat-omap/iopgtable.h +++ b/arch/arm/plat-omap/include/plat/iopgtable.h @@ -97,6 +97,6 @@ static inline u32 iotlb_init_entry(struct iotlb_entry *e, u32 da, u32 pa, } #define to_iommu(dev) \ - (struct iommu *)platform_get_drvdata(to_platform_device(dev)) + (struct omap_iommu *)platform_get_drvdata(to_platform_device(dev)) #endif /* __PLAT_OMAP_IOMMU_H */ diff --git a/arch/arm/plat-omap/include/plat/iovmm.h b/arch/arm/plat-omap/include/plat/iovmm.h index e992b9655fbc..498e57cda6cd 100644 --- a/arch/arm/plat-omap/include/plat/iovmm.h +++ b/arch/arm/plat-omap/include/plat/iovmm.h @@ -13,8 +13,10 @@ #ifndef __IOMMU_MMAP_H #define __IOMMU_MMAP_H +#include <linux/iommu.h> + struct iovm_struct { - struct iommu *iommu; /* iommu object which this belongs to */ + struct omap_iommu *iommu; /* iommu object which this belongs to */ u32 da_start; /* area definition */ u32 da_end; u32 flags; /* IOVMF_: see below */ @@ -70,20 +72,18 @@ struct iovm_struct { #define IOVMF_DA_FIXED (1 << (4 + IOVMF_SW_SHIFT)) -extern struct iovm_struct *find_iovm_area(struct iommu *obj, u32 da); -extern u32 iommu_vmap(struct iommu *obj, u32 da, +extern struct iovm_struct *omap_find_iovm_area(struct device *dev, u32 da); +extern u32 +omap_iommu_vmap(struct iommu_domain *domain, struct device *dev, u32 da, const struct sg_table *sgt, u32 flags); -extern struct sg_table *iommu_vunmap(struct iommu *obj, u32 da); -extern u32 iommu_vmalloc(struct iommu *obj, u32 da, size_t bytes, - u32 flags); -extern void iommu_vfree(struct iommu *obj, const u32 da); -extern u32 iommu_kmap(struct iommu *obj, u32 da, u32 pa, size_t bytes, - u32 flags); -extern void iommu_kunmap(struct iommu *obj, u32 da); -extern u32 iommu_kmalloc(struct iommu *obj, u32 da, size_t bytes, - u32 flags); -extern void iommu_kfree(struct iommu *obj, u32 da); - -extern void *da_to_va(struct iommu *obj, u32 da); +extern struct sg_table *omap_iommu_vunmap(struct iommu_domain *domain, + struct device *dev, u32 da); +extern u32 +omap_iommu_vmalloc(struct iommu_domain *domain, struct device *dev, + u32 da, size_t bytes, u32 flags); +extern void +omap_iommu_vfree(struct iommu_domain *domain, struct device *dev, + const u32 da); +extern void *omap_da_to_va(struct device *dev, u32 da); #endif /* __IOMMU_MMAP_H */ diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig index 0e5cd1405e0e..43ab1cd097a5 100644 --- a/arch/ia64/configs/generic_defconfig +++ b/arch/ia64/configs/generic_defconfig @@ -234,4 +234,4 @@ CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRC_T10DIF=y CONFIG_MISC_DEVICES=y -CONFIG_DMAR=y +CONFIG_INTEL_IOMMU=y diff --git a/arch/ia64/dig/Makefile b/arch/ia64/dig/Makefile index 2f7caddf093e..ae16ec4f6308 100644 --- a/arch/ia64/dig/Makefile +++ b/arch/ia64/dig/Makefile @@ -6,7 +6,7 @@ # obj-y := setup.o -ifeq ($(CONFIG_DMAR), y) +ifeq ($(CONFIG_INTEL_IOMMU), y) obj-$(CONFIG_IA64_GENERIC) += machvec.o machvec_vtd.o else obj-$(CONFIG_IA64_GENERIC) += machvec.o diff --git a/arch/ia64/include/asm/device.h b/arch/ia64/include/asm/device.h index d66d446b127c..d05e78f6db94 100644 --- a/arch/ia64/include/asm/device.h +++ b/arch/ia64/include/asm/device.h @@ -10,7 +10,7 @@ struct dev_archdata { #ifdef CONFIG_ACPI void *acpi_handle; #endif -#ifdef CONFIG_DMAR +#ifdef CONFIG_INTEL_IOMMU void *iommu; /* hook for IOMMU specific extension */ #endif }; diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h index 745e095fe82e..b6a809fa2995 100644 --- a/arch/ia64/include/asm/iommu.h +++ b/arch/ia64/include/asm/iommu.h @@ -7,12 +7,16 @@ extern void pci_iommu_shutdown(void); extern void no_iommu_init(void); +#ifdef CONFIG_INTEL_IOMMU extern int force_iommu, no_iommu; -extern int iommu_detected; -#ifdef CONFIG_DMAR extern int iommu_pass_through; +extern int iommu_detected; +extern int iommu_group_mf; #else #define iommu_pass_through (0) +#define no_iommu (1) +#define iommu_detected (0) +#define iommu_group_mf (0) #endif extern void iommu_dma_init(void); extern void machvec_init(const char *name); diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h index 73b5f785e70c..127dd7be346a 100644 --- a/arch/ia64/include/asm/pci.h +++ b/arch/ia64/include/asm/pci.h @@ -139,7 +139,7 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) return channel ? isa_irq_to_vector(15) : isa_irq_to_vector(14); } -#ifdef CONFIG_DMAR +#ifdef CONFIG_INTEL_IOMMU extern void pci_iommu_alloc(void); #endif #endif /* _ASM_IA64_PCI_H */ diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 395c2f216dd8..d959c84904be 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -43,7 +43,7 @@ obj-$(CONFIG_IA64_ESI) += esi.o ifneq ($(CONFIG_IA64_ESI),) obj-y += esi_stub.o # must be in kernel proper endif -obj-$(CONFIG_DMAR) += pci-dma.o +obj-$(CONFIG_INTEL_IOMMU) += pci-dma.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o obj-$(CONFIG_BINFMT_ELF) += elfcore.o diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 3be485a300b1..bfb4d01e0e51 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -88,7 +88,7 @@ acpi_get_sysname(void) struct acpi_table_rsdp *rsdp; struct acpi_table_xsdt *xsdt; struct acpi_table_header *hdr; -#ifdef CONFIG_DMAR +#ifdef CONFIG_INTEL_IOMMU u64 i, nentries; #endif @@ -125,7 +125,7 @@ acpi_get_sysname(void) return "xen"; } -#ifdef CONFIG_DMAR +#ifdef CONFIG_INTEL_IOMMU /* Look for Intel IOMMU */ nentries = (hdr->length - sizeof(*hdr)) / sizeof(xsdt->table_offset_entry[0]); diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index 009df5434a7a..94e0db72d4a6 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -131,7 +131,7 @@ void arch_teardown_msi_irq(unsigned int irq) return ia64_teardown_msi_irq(irq); } -#ifdef CONFIG_DMAR +#ifdef CONFIG_INTEL_IOMMU #ifdef CONFIG_SMP static int dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) @@ -210,5 +210,5 @@ int arch_setup_dmar_msi(unsigned int irq) "edge"); return 0; } -#endif /* CONFIG_DMAR */ +#endif /* CONFIG_INTEL_IOMMU */ diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c index f6b1ff0aea76..eb1175720050 100644 --- a/arch/ia64/kernel/pci-dma.c +++ b/arch/ia64/kernel/pci-dma.c @@ -14,7 +14,7 @@ #include <asm/system.h> -#ifdef CONFIG_DMAR +#ifdef CONFIG_INTEL_IOMMU #include <linux/kernel.h> @@ -33,6 +33,7 @@ int force_iommu __read_mostly; #endif int iommu_pass_through; +int iommu_group_mf; /* Dummy device used for NULL arguments (normally ISA). Better would be probably a smaller DMA mask, but this is bug-to-bug compatible diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 8213efe1998c..43f4c92816ef 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -33,6 +33,7 @@ #include <linux/uaccess.h> #include <linux/iommu.h> #include <linux/intel-iommu.h> +#include <linux/pci.h> #include <asm/pgtable.h> #include <asm/gcc_intrin.h> @@ -204,7 +205,7 @@ int kvm_dev_ioctl_check_extension(long ext) r = KVM_COALESCED_MMIO_PAGE_OFFSET; break; case KVM_CAP_IOMMU: - r = iommu_found(); + r = iommu_present(&pci_bus_type); break; default: r = 0; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6a47bb22657f..b8cd5448b0e1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -130,7 +130,7 @@ config SBUS bool config NEED_DMA_MAP_STATE - def_bool (X86_64 || DMAR || DMA_API_DEBUG) + def_bool (X86_64 || INTEL_IOMMU || DMA_API_DEBUG) config NEED_SG_DMA_LENGTH def_bool y @@ -220,7 +220,7 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC config HAVE_INTEL_TXT def_bool y - depends on EXPERIMENTAL && DMAR && ACPI + depends on EXPERIMENTAL && INTEL_IOMMU && ACPI config X86_32_SMP def_bool y @@ -287,7 +287,7 @@ config SMP config X86_X2APIC bool "Support x2apic" - depends on X86_LOCAL_APIC && X86_64 && INTR_REMAP + depends on X86_LOCAL_APIC && X86_64 && IRQ_REMAP ---help--- This enables x2apic support on CPUs that have this feature. diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 22a0dc8e51dd..058a35b8286c 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -67,8 +67,8 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y CONFIG_X86_ACPI_CPUFREQ=y CONFIG_PCI_MMCONFIG=y -CONFIG_DMAR=y -# CONFIG_DMAR_DEFAULT_ON is not set +CONFIG_INTEL_IOMMU=y +# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set CONFIG_PCIEPORTBUS=y CONFIG_PCCARD=y CONFIG_YENTA=y diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index 029f230ab637..63a2a03d7d51 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h @@ -8,7 +8,7 @@ struct dev_archdata { #ifdef CONFIG_X86_64 struct dma_map_ops *dma_ops; #endif -#if defined(CONFIG_DMAR) || defined(CONFIG_AMD_IOMMU) +#if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU) void *iommu; /* hook for IOMMU specific extension */ #endif }; diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 09199052060f..eb92a6ed2be7 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -119,7 +119,7 @@ struct irq_cfg { cpumask_var_t old_domain; u8 vector; u8 move_in_progress : 1; -#ifdef CONFIG_INTR_REMAP +#ifdef CONFIG_IRQ_REMAP struct irq_2_iommu irq_2_iommu; #endif }; diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index 345c99cef152..dffc38ee6255 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -5,6 +5,7 @@ extern struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; extern int iommu_pass_through; +extern int iommu_group_mf; /* 10 seconds */ #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 1c23360fb2d8..47d99934580f 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -3,7 +3,8 @@ #define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8) -#ifdef CONFIG_INTR_REMAP +#ifdef CONFIG_IRQ_REMAP +static void irq_remap_modify_chip_defaults(struct irq_chip *chip); static inline void prepare_irte(struct irte *irte, int vector, unsigned int dest) { @@ -36,6 +37,9 @@ static inline bool irq_remapped(struct irq_cfg *cfg) { return false; } +static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip) +{ +} #endif #endif /* _ASM_X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 52fa56399a50..a2fd72e0ab35 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1437,27 +1437,21 @@ void enable_x2apic(void) int __init enable_IR(void) { -#ifdef CONFIG_INTR_REMAP +#ifdef CONFIG_IRQ_REMAP if (!intr_remapping_supported()) { pr_debug("intr-remapping not supported\n"); - return 0; + return -1; } if (!x2apic_preenabled && skip_ioapic_setup) { pr_info("Skipped enabling intr-remap because of skipping " "io-apic setup\n"); - return 0; + return -1; } - if (enable_intr_remapping(x2apic_supported())) - return 0; - - pr_info("Enabled Interrupt-remapping\n"); - - return 1; - + return enable_intr_remapping(); #endif - return 0; + return -1; } void __init enable_IR_x2apic(void) @@ -1481,11 +1475,11 @@ void __init enable_IR_x2apic(void) mask_ioapic_entries(); if (dmar_table_init_ret) - ret = 0; + ret = -1; else ret = enable_IR(); - if (!ret) { + if (ret < 0) { /* IR is required if there is APIC ID > 255 even when running * under KVM */ @@ -1499,6 +1493,9 @@ void __init enable_IR_x2apic(void) x2apic_force_phys(); } + if (ret == IRQ_REMAP_XAPIC_MODE) + goto nox2apic; + x2apic_enabled = 1; if (x2apic_supported() && !x2apic_mode) { @@ -1508,19 +1505,21 @@ void __init enable_IR_x2apic(void) } nox2apic: - if (!ret) /* IR enabling failed */ + if (ret < 0) /* IR enabling failed */ restore_ioapic_entries(); legacy_pic->restore_mask(); local_irq_restore(flags); out: - if (x2apic_enabled) + if (x2apic_enabled || !x2apic_supported()) return; if (x2apic_preenabled) panic("x2apic: enabled by BIOS but kernel init failed."); - else if (cpu_has_x2apic) - pr_info("Not enabling x2apic, Intr-remapping init failed.\n"); + else if (ret == IRQ_REMAP_XAPIC_MODE) + pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n"); + else if (ret < 0) + pr_info("x2apic not enabled, IRQ remapping init failed\n"); } #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8eb863e27ea6..620da6fed6b7 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1202,7 +1202,6 @@ void __setup_vector_irq(int cpu) } static struct irq_chip ioapic_chip; -static struct irq_chip ir_ioapic_chip; #ifdef CONFIG_X86_32 static inline int IO_APIC_irq_trigger(int irq) @@ -1246,7 +1245,7 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg, if (irq_remapped(cfg)) { irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - chip = &ir_ioapic_chip; + irq_remap_modify_chip_defaults(chip); fasteoi = trigger != 0; } @@ -2255,7 +2254,7 @@ ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, return ret; } -#ifdef CONFIG_INTR_REMAP +#ifdef CONFIG_IRQ_REMAP /* * Migrate the IO-APIC irq in the presence of intr-remapping. @@ -2267,6 +2266,9 @@ ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, * updated vector information), by using a virtual vector (io-apic pin number). * Real vector that is used for interrupting cpu will be coming from * the interrupt-remapping table entry. + * + * As the migration is a simple atomic update of IRTE, the same mechanism + * is used to migrate MSI irq's in the presence of interrupt-remapping. */ static int ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, @@ -2291,10 +2293,16 @@ ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, irte.dest_id = IRTE_DEST(dest); /* - * Modified the IRTE and flushes the Interrupt entry cache. + * Atomically updates the IRTE with the new destination, vector + * and flushes the interrupt entry cache. */ modify_irte(irq, &irte); + /* + * After this point, all the interrupts will start arriving + * at the new destination. So, time to cleanup the previous + * vector allocation. + */ if (cfg->move_in_progress) send_cleanup_vector(cfg); @@ -2552,7 +2560,7 @@ static void ack_apic_level(struct irq_data *data) } } -#ifdef CONFIG_INTR_REMAP +#ifdef CONFIG_IRQ_REMAP static void ir_ack_apic_edge(struct irq_data *data) { ack_APIC_irq(); @@ -2563,7 +2571,23 @@ static void ir_ack_apic_level(struct irq_data *data) ack_APIC_irq(); eoi_ioapic_irq(data->irq, data->chip_data); } -#endif /* CONFIG_INTR_REMAP */ + +static void ir_print_prefix(struct irq_data *data, struct seq_file *p) +{ + seq_printf(p, " IR-%s", data->chip->name); +} + +static void irq_remap_modify_chip_defaults(struct irq_chip *chip) +{ + chip->irq_print_chip = ir_print_prefix; + chip->irq_ack = ir_ack_apic_edge; + chip->irq_eoi = ir_ack_apic_level; + +#ifdef CONFIG_SMP + chip->irq_set_affinity = ir_ioapic_set_affinity; +#endif +} +#endif /* CONFIG_IRQ_REMAP */ static struct irq_chip ioapic_chip __read_mostly = { .name = "IO-APIC", @@ -2578,21 +2602,6 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_retrigger = ioapic_retrigger_irq, }; -static struct irq_chip ir_ioapic_chip __read_mostly = { - .name = "IR-IO-APIC", - .irq_startup = startup_ioapic_irq, - .irq_mask = mask_ioapic_irq, - .irq_unmask = unmask_ioapic_irq, -#ifdef CONFIG_INTR_REMAP - .irq_ack = ir_ack_apic_edge, - .irq_eoi = ir_ack_apic_level, -#ifdef CONFIG_SMP - .irq_set_affinity = ir_ioapic_set_affinity, -#endif -#endif - .irq_retrigger = ioapic_retrigger_irq, -}; - static inline void init_IO_APIC_traps(void) { struct irq_cfg *cfg; @@ -3144,45 +3153,6 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) return 0; } -#ifdef CONFIG_INTR_REMAP -/* - * Migrate the MSI irq to another cpumask. This migration is - * done in the process context using interrupt-remapping hardware. - */ -static int -ir_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - struct irq_cfg *cfg = data->chip_data; - unsigned int dest, irq = data->irq; - struct irte irte; - - if (get_irte(irq, &irte)) - return -1; - - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; - - irte.vector = cfg->vector; - irte.dest_id = IRTE_DEST(dest); - - /* - * atomically update the IRTE with the new destination and vector. - */ - modify_irte(irq, &irte); - - /* - * After this point, all the interrupts will start arriving - * at the new destination. So, time to cleanup the previous - * vector allocation. - */ - if (cfg->move_in_progress) - send_cleanup_vector(cfg); - - return 0; -} - -#endif #endif /* CONFIG_SMP */ /* @@ -3200,19 +3170,6 @@ static struct irq_chip msi_chip = { .irq_retrigger = ioapic_retrigger_irq, }; -static struct irq_chip msi_ir_chip = { - .name = "IR-PCI-MSI", - .irq_unmask = unmask_msi_irq, - .irq_mask = mask_msi_irq, -#ifdef CONFIG_INTR_REMAP - .irq_ack = ir_ack_apic_edge, -#ifdef CONFIG_SMP - .irq_set_affinity = ir_msi_set_affinity, -#endif -#endif - .irq_retrigger = ioapic_retrigger_irq, -}; - /* * Map the PCI dev to the corresponding remapping hardware unit * and allocate 'nvec' consecutive interrupt-remapping table entries @@ -3255,7 +3212,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) if (irq_remapped(irq_get_chip_data(irq))) { irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - chip = &msi_ir_chip; + irq_remap_modify_chip_defaults(chip); } irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); @@ -3328,7 +3285,7 @@ void native_teardown_msi_irq(unsigned int irq) destroy_irq(irq); } -#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) +#ifdef CONFIG_DMAR_TABLE #ifdef CONFIG_SMP static int dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, @@ -3409,19 +3366,6 @@ static int hpet_msi_set_affinity(struct irq_data *data, #endif /* CONFIG_SMP */ -static struct irq_chip ir_hpet_msi_type = { - .name = "IR-HPET_MSI", - .irq_unmask = hpet_msi_unmask, - .irq_mask = hpet_msi_mask, -#ifdef CONFIG_INTR_REMAP - .irq_ack = ir_ack_apic_edge, -#ifdef CONFIG_SMP - .irq_set_affinity = ir_msi_set_affinity, -#endif -#endif - .irq_retrigger = ioapic_retrigger_irq, -}; - static struct irq_chip hpet_msi_type = { .name = "HPET_MSI", .irq_unmask = hpet_msi_unmask, @@ -3458,7 +3402,7 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id) hpet_msi_write(irq_get_handler_data(irq), &msg); irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); if (irq_remapped(irq_get_chip_data(irq))) - chip = &ir_hpet_msi_type; + irq_remap_modify_chip_defaults(chip); irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); return 0; diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index b49d00da2aed..3b730fb13855 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -44,6 +44,15 @@ int iommu_detected __read_mostly = 0; */ int iommu_pass_through __read_mostly; +/* + * Group multi-function PCI devices into a single device-group for the + * iommu_device_group interface. This tells the iommu driver to pretend + * it cannot distinguish between functions of a device, exposing only one + * group for the device. Useful for disallowing use of individual PCI + * functions from userspace drivers. + */ +int iommu_group_mf __read_mostly; + extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; /* Dummy device used for NULL arguments (normally ISA). */ @@ -168,6 +177,8 @@ static __init int iommu_setup(char *p) #endif if (!strncmp(p, "pt", 2)) iommu_pass_through = 1; + if (!strncmp(p, "group_mf", 8)) + iommu_group_mf = 1; gart_parse_options(p); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 84a28ea45fa4..73c6a4268bf4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -44,6 +44,7 @@ #include <linux/perf_event.h> #include <linux/uaccess.h> #include <linux/hash.h> +#include <linux/pci.h> #include <trace/events/kvm.h> #define CREATE_TRACE_POINTS @@ -2095,7 +2096,7 @@ int kvm_dev_ioctl_check_extension(long ext) r = 0; break; case KVM_CAP_IOMMU: - r = iommu_found(); + r = iommu_present(&pci_bus_type); break; case KVM_CAP_MCE: r = KVM_MAX_MCE_BANKS; diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 85151019dde1..2774ac1086d3 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -30,10 +30,10 @@ /* * If we have Intel graphics, we're not going to have anything other than * an Intel IOMMU. So make the correct use of the PCI DMA API contingent - * on the Intel IOMMU support (CONFIG_DMAR). + * on the Intel IOMMU support (CONFIG_INTEL_IOMMU). * Only newer chipsets need to bother with this, of course. */ -#ifdef CONFIG_DMAR +#ifdef CONFIG_INTEL_IOMMU #define USE_PCI_DMA_API 1 #else #define USE_PCI_DMA_API 0 diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index a2c55bf57903..b394c6c5fbe7 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -34,7 +34,9 @@ config AMD_IOMMU bool "AMD IOMMU support" select SWIOTLB select PCI_MSI - select PCI_IOV + select PCI_ATS + select PCI_PRI + select PCI_PASID select IOMMU_API depends on X86_64 && PCI && ACPI ---help--- @@ -58,11 +60,24 @@ config AMD_IOMMU_STATS information to userspace via debugfs. If unsure, say N. +config AMD_IOMMU_V2 + tristate "AMD IOMMU Version 2 driver (EXPERIMENTAL)" + depends on AMD_IOMMU && PROFILING && EXPERIMENTAL + select MMU_NOTIFIER + ---help--- + This option enables support for the AMD IOMMUv2 features of the IOMMU + hardware. Select this option if you want to use devices that support + the the PCI PRI and PASID interface. + # Intel IOMMU support -config DMAR - bool "Support for DMA Remapping Devices" +config DMAR_TABLE + bool + +config INTEL_IOMMU + bool "Support for Intel IOMMU using DMA Remapping Devices" depends on PCI_MSI && ACPI && (X86 || IA64_GENERIC) select IOMMU_API + select DMAR_TABLE help DMA remapping (DMAR) devices support enables independent address translations for Direct Memory Access (DMA) from devices. @@ -70,18 +85,18 @@ config DMAR and include PCI device scope covered by these DMA remapping devices. -config DMAR_DEFAULT_ON +config INTEL_IOMMU_DEFAULT_ON def_bool y - prompt "Enable DMA Remapping Devices by default" - depends on DMAR + prompt "Enable Intel DMA Remapping Devices by default" + depends on INTEL_IOMMU help Selecting this option will enable a DMAR device at boot time if one is found. If this option is not selected, DMAR support can be enabled by passing intel_iommu=on to the kernel. -config DMAR_BROKEN_GFX_WA +config INTEL_IOMMU_BROKEN_GFX_WA bool "Workaround broken graphics drivers (going away soon)" - depends on DMAR && BROKEN && X86 + depends on INTEL_IOMMU && BROKEN && X86 ---help--- Current Graphics drivers tend to use physical address for DMA and avoid using DMA APIs. Setting this config @@ -90,23 +105,43 @@ config DMAR_BROKEN_GFX_WA to use physical addresses for DMA, at least until this option is removed in the 2.6.32 kernel. -config DMAR_FLOPPY_WA +config INTEL_IOMMU_FLOPPY_WA def_bool y - depends on DMAR && X86 + depends on INTEL_IOMMU && X86 ---help--- Floppy disk drivers are known to bypass DMA API calls thereby failing to work when IOMMU is enabled. This workaround will setup a 1:1 mapping for the first 16MiB to make floppy (an ISA device) work. -config INTR_REMAP +config IRQ_REMAP bool "Support for Interrupt Remapping (EXPERIMENTAL)" depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL + select DMAR_TABLE ---help--- Supports Interrupt remapping for IO-APIC and MSI devices. To use x2apic mode in the CPU's which support x2APIC enhancements or to support platforms with CPU's having > 8 bit APIC ID, say Y. +# OMAP IOMMU support +config OMAP_IOMMU + bool "OMAP IOMMU Support" + depends on ARCH_OMAP + select IOMMU_API + +config OMAP_IOVMM + tristate "OMAP IO Virtual Memory Manager Support" + depends on OMAP_IOMMU + +config OMAP_IOMMU_DEBUG + tristate "Export OMAP IOMMU/IOVMM internals in DebugFS" + depends on OMAP_IOVMM && DEBUG_FS + help + Select this to see extensive information about + the internal state of OMAP IOMMU/IOVMM in debugfs. + + Say N unless you know you need this. + config TEGRA_IOMMU_SMMU bool "Tegra SMMU IOMMU Support" depends on ARCH_TEGRA diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 74080aa1761f..7ad7a3bc1242 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -1,7 +1,12 @@ obj-$(CONFIG_IOMMU_API) += iommu.o obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o -obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o -obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o +obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o +obj-$(CONFIG_DMAR_TABLE) += dmar.o +obj-$(CONFIG_INTEL_IOMMU) += iova.o intel-iommu.o +obj-$(CONFIG_IRQ_REMAP) += intr_remapping.o +obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o +obj-$(CONFIG_OMAP_IOVMM) += omap-iovmm.o +obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index cc79045bc527..cce1f03b8895 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -17,6 +17,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/ratelimit.h> #include <linux/pci.h> #include <linux/pci-ats.h> #include <linux/bitmap.h> @@ -28,6 +29,8 @@ #include <linux/iommu.h> #include <linux/delay.h> #include <linux/amd-iommu.h> +#include <linux/notifier.h> +#include <linux/export.h> #include <asm/msidef.h> #include <asm/proto.h> #include <asm/iommu.h> @@ -41,6 +44,24 @@ #define LOOP_TIMEOUT 100000 +/* + * This bitmap is used to advertise the page sizes our hardware support + * to the IOMMU core, which will then use this information to split + * physically contiguous memory regions it is mapping into page sizes + * that we support. + * + * Traditionally the IOMMU core just handed us the mappings directly, + * after making sure the size is an order of a 4KiB page and that the + * mapping has natural alignment. + * + * To retain this behavior, we currently advertise that we support + * all page sizes that are an order of 4KiB. + * + * If at some point we'd like to utilize the IOMMU core's new behavior, + * we could change this to advertise the real page sizes we support. + */ +#define AMD_IOMMU_PGSIZES (~0xFFFUL) + static DEFINE_RWLOCK(amd_iommu_devtable_lock); /* A list of preallocated protection domains */ @@ -59,6 +80,9 @@ static struct protection_domain *pt_domain; static struct iommu_ops amd_iommu_ops; +static ATOMIC_NOTIFIER_HEAD(ppr_notifier); +int amd_iommu_max_glx_val = -1; + /* * general struct to manage commands send to an IOMMU */ @@ -67,6 +91,7 @@ struct iommu_cmd { }; static void update_domain(struct protection_domain *domain); +static int __init alloc_passthrough_domain(void); /**************************************************************************** * @@ -147,6 +172,33 @@ static struct iommu_dev_data *get_dev_data(struct device *dev) return dev->archdata.iommu; } +static bool pci_iommuv2_capable(struct pci_dev *pdev) +{ + static const int caps[] = { + PCI_EXT_CAP_ID_ATS, + PCI_EXT_CAP_ID_PRI, + PCI_EXT_CAP_ID_PASID, + }; + int i, pos; + + for (i = 0; i < 3; ++i) { + pos = pci_find_ext_capability(pdev, caps[i]); + if (pos == 0) + return false; + } + + return true; +} + +static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum) +{ + struct iommu_dev_data *dev_data; + + dev_data = get_dev_data(&pdev->dev); + + return dev_data->errata & (1 << erratum) ? true : false; +} + /* * In this function the list of preallocated protection domains is traversed to * find the domain for a specific device @@ -204,6 +256,7 @@ static bool check_device(struct device *dev) static int iommu_init_device(struct device *dev) { + struct pci_dev *pdev = to_pci_dev(dev); struct iommu_dev_data *dev_data; u16 alias; @@ -228,6 +281,13 @@ static int iommu_init_device(struct device *dev) dev_data->alias_data = alias_data; } + if (pci_iommuv2_capable(pdev)) { + struct amd_iommu *iommu; + + iommu = amd_iommu_rlookup_table[dev_data->devid]; + dev_data->iommu_v2 = iommu->is_iommu_v2; + } + dev->archdata.iommu = dev_data; return 0; @@ -317,6 +377,11 @@ DECLARE_STATS_COUNTER(domain_flush_single); DECLARE_STATS_COUNTER(domain_flush_all); DECLARE_STATS_COUNTER(alloced_io_mem); DECLARE_STATS_COUNTER(total_map_requests); +DECLARE_STATS_COUNTER(complete_ppr); +DECLARE_STATS_COUNTER(invalidate_iotlb); +DECLARE_STATS_COUNTER(invalidate_iotlb_all); +DECLARE_STATS_COUNTER(pri_requests); + static struct dentry *stats_dir; static struct dentry *de_fflush; @@ -351,6 +416,10 @@ static void amd_iommu_stats_init(void) amd_iommu_stats_add(&domain_flush_all); amd_iommu_stats_add(&alloced_io_mem); amd_iommu_stats_add(&total_map_requests); + amd_iommu_stats_add(&complete_ppr); + amd_iommu_stats_add(&invalidate_iotlb); + amd_iommu_stats_add(&invalidate_iotlb_all); + amd_iommu_stats_add(&pri_requests); } #endif @@ -365,8 +434,8 @@ static void dump_dte_entry(u16 devid) { int i; - for (i = 0; i < 8; ++i) - pr_err("AMD-Vi: DTE[%d]: %08x\n", i, + for (i = 0; i < 4; ++i) + pr_err("AMD-Vi: DTE[%d]: %016llx\n", i, amd_iommu_dev_table[devid].data[i]); } @@ -461,12 +530,84 @@ static void iommu_poll_events(struct amd_iommu *iommu) spin_unlock_irqrestore(&iommu->lock, flags); } +static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u32 head) +{ + struct amd_iommu_fault fault; + volatile u64 *raw; + int i; + + INC_STATS_COUNTER(pri_requests); + + raw = (u64 *)(iommu->ppr_log + head); + + /* + * Hardware bug: Interrupt may arrive before the entry is written to + * memory. If this happens we need to wait for the entry to arrive. + */ + for (i = 0; i < LOOP_TIMEOUT; ++i) { + if (PPR_REQ_TYPE(raw[0]) != 0) + break; + udelay(1); + } + + if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) { + pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n"); + return; + } + + fault.address = raw[1]; + fault.pasid = PPR_PASID(raw[0]); + fault.device_id = PPR_DEVID(raw[0]); + fault.tag = PPR_TAG(raw[0]); + fault.flags = PPR_FLAGS(raw[0]); + + /* + * To detect the hardware bug we need to clear the entry + * to back to zero. + */ + raw[0] = raw[1] = 0; + + atomic_notifier_call_chain(&ppr_notifier, 0, &fault); +} + +static void iommu_poll_ppr_log(struct amd_iommu *iommu) +{ + unsigned long flags; + u32 head, tail; + + if (iommu->ppr_log == NULL) + return; + + spin_lock_irqsave(&iommu->lock, flags); + + head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); + tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); + + while (head != tail) { + + /* Handle PPR entry */ + iommu_handle_ppr_entry(iommu, head); + + /* Update and refresh ring-buffer state*/ + head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE; + writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); + tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); + } + + /* enable ppr interrupts again */ + writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET); + + spin_unlock_irqrestore(&iommu->lock, flags); +} + irqreturn_t amd_iommu_int_thread(int irq, void *data) { struct amd_iommu *iommu; - for_each_iommu(iommu) + for_each_iommu(iommu) { iommu_poll_events(iommu); + iommu_poll_ppr_log(iommu); + } return IRQ_HANDLED; } @@ -595,6 +736,60 @@ static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep, cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; } +static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, int pasid, + u64 address, bool size) +{ + memset(cmd, 0, sizeof(*cmd)); + + address &= ~(0xfffULL); + + cmd->data[0] = pasid & PASID_MASK; + cmd->data[1] = domid; + cmd->data[2] = lower_32_bits(address); + cmd->data[3] = upper_32_bits(address); + cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; + cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK; + if (size) + cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; + CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); +} + +static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid, + int qdep, u64 address, bool size) +{ + memset(cmd, 0, sizeof(*cmd)); + + address &= ~(0xfffULL); + + cmd->data[0] = devid; + cmd->data[0] |= (pasid & 0xff) << 16; + cmd->data[0] |= (qdep & 0xff) << 24; + cmd->data[1] = devid; + cmd->data[1] |= ((pasid >> 8) & 0xfff) << 16; + cmd->data[2] = lower_32_bits(address); + cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK; + cmd->data[3] = upper_32_bits(address); + if (size) + cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; + CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES); +} + +static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, int pasid, + int status, int tag, bool gn) +{ + memset(cmd, 0, sizeof(*cmd)); + + cmd->data[0] = devid; + if (gn) { + cmd->data[1] = pasid & PASID_MASK; + cmd->data[2] = CMD_INV_IOMMU_PAGES_GN_MASK; + } + cmd->data[3] = tag & 0x1ff; + cmd->data[3] |= (status & PPR_STATUS_MASK) << PPR_STATUS_SHIFT; + + CMD_SET_TYPE(cmd, CMD_COMPLETE_PPR); +} + static void build_inv_all(struct iommu_cmd *cmd) { memset(cmd, 0, sizeof(*cmd)); @@ -1496,6 +1691,48 @@ static void free_pagetable(struct protection_domain *domain) domain->pt_root = NULL; } +static void free_gcr3_tbl_level1(u64 *tbl) +{ + u64 *ptr; + int i; + + for (i = 0; i < 512; ++i) { + if (!(tbl[i] & GCR3_VALID)) + continue; + + ptr = __va(tbl[i] & PAGE_MASK); + + free_page((unsigned long)ptr); + } +} + +static void free_gcr3_tbl_level2(u64 *tbl) +{ + u64 *ptr; + int i; + + for (i = 0; i < 512; ++i) { + if (!(tbl[i] & GCR3_VALID)) + continue; + + ptr = __va(tbl[i] & PAGE_MASK); + + free_gcr3_tbl_level1(ptr); + } +} + +static void free_gcr3_table(struct protection_domain *domain) +{ + if (domain->glx == 2) + free_gcr3_tbl_level2(domain->gcr3_tbl); + else if (domain->glx == 1) + free_gcr3_tbl_level1(domain->gcr3_tbl); + else if (domain->glx != 0) + BUG(); + + free_page((unsigned long)domain->gcr3_tbl); +} + /* * Free a domain, only used if something went wrong in the * allocation path and we need to free an already allocated page table @@ -1582,20 +1819,52 @@ static bool dma_ops_domain(struct protection_domain *domain) static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) { - u64 pte_root = virt_to_phys(domain->pt_root); - u32 flags = 0; + u64 pte_root = 0; + u64 flags = 0; + + if (domain->mode != PAGE_MODE_NONE) + pte_root = virt_to_phys(domain->pt_root); pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; + flags = amd_iommu_dev_table[devid].data[1]; + if (ats) flags |= DTE_FLAG_IOTLB; - amd_iommu_dev_table[devid].data[3] |= flags; - amd_iommu_dev_table[devid].data[2] = domain->id; - amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); - amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); + if (domain->flags & PD_IOMMUV2_MASK) { + u64 gcr3 = __pa(domain->gcr3_tbl); + u64 glx = domain->glx; + u64 tmp; + + pte_root |= DTE_FLAG_GV; + pte_root |= (glx & DTE_GLX_MASK) << DTE_GLX_SHIFT; + + /* First mask out possible old values for GCR3 table */ + tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B; + flags &= ~tmp; + + tmp = DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C; + flags &= ~tmp; + + /* Encode GCR3 table into DTE */ + tmp = DTE_GCR3_VAL_A(gcr3) << DTE_GCR3_SHIFT_A; + pte_root |= tmp; + + tmp = DTE_GCR3_VAL_B(gcr3) << DTE_GCR3_SHIFT_B; + flags |= tmp; + + tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C; + flags |= tmp; + } + + flags &= ~(0xffffUL); + flags |= domain->id; + + amd_iommu_dev_table[devid].data[1] = flags; + amd_iommu_dev_table[devid].data[0] = pte_root; } static void clear_dte_entry(u16 devid) @@ -1603,7 +1872,6 @@ static void clear_dte_entry(u16 devid) /* remove entry from the device table seen by the hardware */ amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; amd_iommu_dev_table[devid].data[1] = 0; - amd_iommu_dev_table[devid].data[2] = 0; amd_iommu_apply_erratum_63(devid); } @@ -1696,6 +1964,93 @@ out_unlock: return ret; } + +static void pdev_iommuv2_disable(struct pci_dev *pdev) +{ + pci_disable_ats(pdev); + pci_disable_pri(pdev); + pci_disable_pasid(pdev); +} + +/* FIXME: Change generic reset-function to do the same */ +static int pri_reset_while_enabled(struct pci_dev *pdev) +{ + u16 control; + int pos; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); + if (!pos) + return -EINVAL; + + pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control); + control |= PCI_PRI_CTRL_RESET; + pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); + + return 0; +} + +static int pdev_iommuv2_enable(struct pci_dev *pdev) +{ + bool reset_enable; + int reqs, ret; + + /* FIXME: Hardcode number of outstanding requests for now */ + reqs = 32; + if (pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE)) + reqs = 1; + reset_enable = pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_ENABLE_RESET); + + /* Only allow access to user-accessible pages */ + ret = pci_enable_pasid(pdev, 0); + if (ret) + goto out_err; + + /* First reset the PRI state of the device */ + ret = pci_reset_pri(pdev); + if (ret) + goto out_err; + + /* Enable PRI */ + ret = pci_enable_pri(pdev, reqs); + if (ret) + goto out_err; + + if (reset_enable) { + ret = pri_reset_while_enabled(pdev); + if (ret) + goto out_err; + } + + ret = pci_enable_ats(pdev, PAGE_SHIFT); + if (ret) + goto out_err; + + return 0; + +out_err: + pci_disable_pri(pdev); + pci_disable_pasid(pdev); + + return ret; +} + +/* FIXME: Move this to PCI code */ +#define PCI_PRI_TLP_OFF (1 << 2) + +bool pci_pri_tlp_required(struct pci_dev *pdev) +{ + u16 control; + int pos; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); + if (!pos) + return false; + + pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control); + + return (control & PCI_PRI_TLP_OFF) ? true : false; +} + /* * If a device is not yet associated with a domain, this function does * assigns it visible for the hardware @@ -1710,7 +2065,18 @@ static int attach_device(struct device *dev, dev_data = get_dev_data(dev); - if (amd_iommu_iotlb_sup && pci_enable_ats(pdev, PAGE_SHIFT) == 0) { + if (domain->flags & PD_IOMMUV2_MASK) { + if (!dev_data->iommu_v2 || !dev_data->passthrough) + return -EINVAL; + + if (pdev_iommuv2_enable(pdev) != 0) + return -EINVAL; + + dev_data->ats.enabled = true; + dev_data->ats.qdep = pci_ats_queue_depth(pdev); + dev_data->pri_tlp = pci_pri_tlp_required(pdev); + } else if (amd_iommu_iotlb_sup && + pci_enable_ats(pdev, PAGE_SHIFT) == 0) { dev_data->ats.enabled = true; dev_data->ats.qdep = pci_ats_queue_depth(pdev); } @@ -1760,7 +2126,7 @@ static void __detach_device(struct iommu_dev_data *dev_data) * passthrough domain if it is detached from any other domain. * Make sure we can deassign from the pt_domain itself. */ - if (iommu_pass_through && + if (dev_data->passthrough && (dev_data->domain == NULL && domain != pt_domain)) __attach_device(dev_data, pt_domain); } @@ -1770,20 +2136,24 @@ static void __detach_device(struct iommu_dev_data *dev_data) */ static void detach_device(struct device *dev) { + struct protection_domain *domain; struct iommu_dev_data *dev_data; unsigned long flags; dev_data = get_dev_data(dev); + domain = dev_data->domain; /* lock device table */ write_lock_irqsave(&amd_iommu_devtable_lock, flags); __detach_device(dev_data); write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); - if (dev_data->ats.enabled) { + if (domain->flags & PD_IOMMUV2_MASK) + pdev_iommuv2_disable(to_pci_dev(dev)); + else if (dev_data->ats.enabled) pci_disable_ats(to_pci_dev(dev)); - dev_data->ats.enabled = false; - } + + dev_data->ats.enabled = false; } /* @@ -1818,18 +2188,20 @@ static struct protection_domain *domain_for_device(struct device *dev) static int device_change_notifier(struct notifier_block *nb, unsigned long action, void *data) { - struct device *dev = data; - u16 devid; - struct protection_domain *domain; struct dma_ops_domain *dma_domain; + struct protection_domain *domain; + struct iommu_dev_data *dev_data; + struct device *dev = data; struct amd_iommu *iommu; unsigned long flags; + u16 devid; if (!check_device(dev)) return 0; - devid = get_device_id(dev); - iommu = amd_iommu_rlookup_table[devid]; + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; + dev_data = get_dev_data(dev); switch (action) { case BUS_NOTIFY_UNBOUND_DRIVER: @@ -1838,7 +2210,7 @@ static int device_change_notifier(struct notifier_block *nb, if (!domain) goto out; - if (iommu_pass_through) + if (dev_data->passthrough) break; detach_device(dev); break; @@ -2434,8 +2806,9 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask) */ static void prealloc_protection_domains(void) { - struct pci_dev *dev = NULL; + struct iommu_dev_data *dev_data; struct dma_ops_domain *dma_dom; + struct pci_dev *dev = NULL; u16 devid; for_each_pci_dev(dev) { @@ -2444,6 +2817,16 @@ static void prealloc_protection_domains(void) if (!check_device(&dev->dev)) continue; + dev_data = get_dev_data(&dev->dev); + if (!amd_iommu_force_isolation && dev_data->iommu_v2) { + /* Make sure passthrough domain is allocated */ + alloc_passthrough_domain(); + dev_data->passthrough = true; + attach_device(&dev->dev, pt_domain); + pr_info("AMD-Vi: Using passthough domain for device %s\n", + dev_name(&dev->dev)); + } + /* Is there already any domain for it? */ if (domain_for_device(&dev->dev)) continue; @@ -2474,6 +2857,7 @@ static struct dma_map_ops amd_iommu_dma_ops = { static unsigned device_dma_ops_init(void) { + struct iommu_dev_data *dev_data; struct pci_dev *pdev = NULL; unsigned unhandled = 0; @@ -2483,7 +2867,12 @@ static unsigned device_dma_ops_init(void) continue; } - pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops; + dev_data = get_dev_data(&pdev->dev); + + if (!dev_data->passthrough) + pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops; + else + pdev->dev.archdata.dma_ops = &nommu_dma_ops; } return unhandled; @@ -2495,7 +2884,7 @@ static unsigned device_dma_ops_init(void) void __init amd_iommu_init_api(void) { - register_iommu(&amd_iommu_ops); + bus_set_iommu(&pci_bus_type, &amd_iommu_ops); } int __init amd_iommu_init_dma_ops(void) @@ -2610,6 +2999,20 @@ out_err: return NULL; } +static int __init alloc_passthrough_domain(void) +{ + if (pt_domain != NULL) + return 0; + + /* allocate passthrough domain */ + pt_domain = protection_domain_alloc(); + if (!pt_domain) + return -ENOMEM; + + pt_domain->mode = PAGE_MODE_NONE; + + return 0; +} static int amd_iommu_domain_init(struct iommu_domain *dom) { struct protection_domain *domain; @@ -2623,6 +3026,8 @@ static int amd_iommu_domain_init(struct iommu_domain *dom) if (!domain->pt_root) goto out_free; + domain->iommu_domain = dom; + dom->priv = domain; return 0; @@ -2645,7 +3050,11 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom) BUG_ON(domain->dev_cnt != 0); - free_pagetable(domain); + if (domain->mode != PAGE_MODE_NONE) + free_pagetable(domain); + + if (domain->flags & PD_IOMMUV2_MASK) + free_gcr3_table(domain); protection_domain_free(domain); @@ -2702,13 +3111,15 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, } static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, - phys_addr_t paddr, int gfp_order, int iommu_prot) + phys_addr_t paddr, size_t page_size, int iommu_prot) { - unsigned long page_size = 0x1000UL << gfp_order; struct protection_domain *domain = dom->priv; int prot = 0; int ret; + if (domain->mode == PAGE_MODE_NONE) + return -EINVAL; + if (iommu_prot & IOMMU_READ) prot |= IOMMU_PROT_IR; if (iommu_prot & IOMMU_WRITE) @@ -2721,13 +3132,14 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, return ret; } -static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, - int gfp_order) +static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, + size_t page_size) { struct protection_domain *domain = dom->priv; - unsigned long page_size, unmap_size; + size_t unmap_size; - page_size = 0x1000UL << gfp_order; + if (domain->mode == PAGE_MODE_NONE) + return -EINVAL; mutex_lock(&domain->api_lock); unmap_size = iommu_unmap_page(domain, iova, page_size); @@ -2735,7 +3147,7 @@ static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, domain_flush_tlb_pde(domain); - return get_order(unmap_size); + return unmap_size; } static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, @@ -2746,6 +3158,9 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, phys_addr_t paddr; u64 *pte, __pte; + if (domain->mode == PAGE_MODE_NONE) + return iova; + pte = fetch_pte(domain, iova); if (!pte || !IOMMU_PTE_PRESENT(*pte)) @@ -2773,6 +3188,26 @@ static int amd_iommu_domain_has_cap(struct iommu_domain *domain, return 0; } +static int amd_iommu_device_group(struct device *dev, unsigned int *groupid) +{ + struct iommu_dev_data *dev_data = dev->archdata.iommu; + struct pci_dev *pdev = to_pci_dev(dev); + u16 devid; + + if (!dev_data) + return -ENODEV; + + if (pdev->is_virtfn || !iommu_group_mf) + devid = dev_data->devid; + else + devid = calc_devid(pdev->bus->number, + PCI_DEVFN(PCI_SLOT(pdev->devfn), 0)); + + *groupid = amd_iommu_alias_table[devid]; + + return 0; +} + static struct iommu_ops amd_iommu_ops = { .domain_init = amd_iommu_domain_init, .domain_destroy = amd_iommu_domain_destroy, @@ -2782,6 +3217,8 @@ static struct iommu_ops amd_iommu_ops = { .unmap = amd_iommu_unmap, .iova_to_phys = amd_iommu_iova_to_phys, .domain_has_cap = amd_iommu_domain_has_cap, + .device_group = amd_iommu_device_group, + .pgsize_bitmap = AMD_IOMMU_PGSIZES, }; /***************************************************************************** @@ -2796,21 +3233,23 @@ static struct iommu_ops amd_iommu_ops = { int __init amd_iommu_init_passthrough(void) { - struct amd_iommu *iommu; + struct iommu_dev_data *dev_data; struct pci_dev *dev = NULL; + struct amd_iommu *iommu; u16 devid; + int ret; - /* allocate passthrough domain */ - pt_domain = protection_domain_alloc(); - if (!pt_domain) - return -ENOMEM; - - pt_domain->mode |= PAGE_MODE_NONE; + ret = alloc_passthrough_domain(); + if (ret) + return ret; for_each_pci_dev(dev) { if (!check_device(&dev->dev)) continue; + dev_data = get_dev_data(&dev->dev); + dev_data->passthrough = true; + devid = get_device_id(&dev->dev); iommu = amd_iommu_rlookup_table[devid]; @@ -2820,7 +3259,375 @@ int __init amd_iommu_init_passthrough(void) attach_device(&dev->dev, pt_domain); } + amd_iommu_stats_init(); + pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); return 0; } + +/* IOMMUv2 specific functions */ +int amd_iommu_register_ppr_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&ppr_notifier, nb); +} +EXPORT_SYMBOL(amd_iommu_register_ppr_notifier); + +int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&ppr_notifier, nb); +} +EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier); + +void amd_iommu_domain_direct_map(struct iommu_domain *dom) +{ + struct protection_domain *domain = dom->priv; + unsigned long flags; + + spin_lock_irqsave(&domain->lock, flags); + + /* Update data structure */ + domain->mode = PAGE_MODE_NONE; + domain->updated = true; + + /* Make changes visible to IOMMUs */ + update_domain(domain); + + /* Page-table is not visible to IOMMU anymore, so free it */ + free_pagetable(domain); + + spin_unlock_irqrestore(&domain->lock, flags); +} +EXPORT_SYMBOL(amd_iommu_domain_direct_map); + +int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) +{ + struct protection_domain *domain = dom->priv; + unsigned long flags; + int levels, ret; + + if (pasids <= 0 || pasids > (PASID_MASK + 1)) + return -EINVAL; + + /* Number of GCR3 table levels required */ + for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9) + levels += 1; + + if (levels > amd_iommu_max_glx_val) + return -EINVAL; + + spin_lock_irqsave(&domain->lock, flags); + + /* + * Save us all sanity checks whether devices already in the + * domain support IOMMUv2. Just force that the domain has no + * devices attached when it is switched into IOMMUv2 mode. + */ + ret = -EBUSY; + if (domain->dev_cnt > 0 || domain->flags & PD_IOMMUV2_MASK) + goto out; + + ret = -ENOMEM; + domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC); + if (domain->gcr3_tbl == NULL) + goto out; + + domain->glx = levels; + domain->flags |= PD_IOMMUV2_MASK; + domain->updated = true; + + update_domain(domain); + + ret = 0; + +out: + spin_unlock_irqrestore(&domain->lock, flags); + + return ret; +} +EXPORT_SYMBOL(amd_iommu_domain_enable_v2); + +static int __flush_pasid(struct protection_domain *domain, int pasid, + u64 address, bool size) +{ + struct iommu_dev_data *dev_data; + struct iommu_cmd cmd; + int i, ret; + + if (!(domain->flags & PD_IOMMUV2_MASK)) + return -EINVAL; + + build_inv_iommu_pasid(&cmd, domain->id, pasid, address, size); + + /* + * IOMMU TLB needs to be flushed before Device TLB to + * prevent device TLB refill from IOMMU TLB + */ + for (i = 0; i < amd_iommus_present; ++i) { + if (domain->dev_iommu[i] == 0) + continue; + + ret = iommu_queue_command(amd_iommus[i], &cmd); + if (ret != 0) + goto out; + } + + /* Wait until IOMMU TLB flushes are complete */ + domain_flush_complete(domain); + + /* Now flush device TLBs */ + list_for_each_entry(dev_data, &domain->dev_list, list) { + struct amd_iommu *iommu; + int qdep; + + BUG_ON(!dev_data->ats.enabled); + + qdep = dev_data->ats.qdep; + iommu = amd_iommu_rlookup_table[dev_data->devid]; + + build_inv_iotlb_pasid(&cmd, dev_data->devid, pasid, + qdep, address, size); + + ret = iommu_queue_command(iommu, &cmd); + if (ret != 0) + goto out; + } + + /* Wait until all device TLBs are flushed */ + domain_flush_complete(domain); + + ret = 0; + +out: + + return ret; +} + +static int __amd_iommu_flush_page(struct protection_domain *domain, int pasid, + u64 address) +{ + INC_STATS_COUNTER(invalidate_iotlb); + + return __flush_pasid(domain, pasid, address, false); +} + +int amd_iommu_flush_page(struct iommu_domain *dom, int pasid, + u64 address) +{ + struct protection_domain *domain = dom->priv; + unsigned long flags; + int ret; + + spin_lock_irqsave(&domain->lock, flags); + ret = __amd_iommu_flush_page(domain, pasid, address); + spin_unlock_irqrestore(&domain->lock, flags); + + return ret; +} +EXPORT_SYMBOL(amd_iommu_flush_page); + +static int __amd_iommu_flush_tlb(struct protection_domain *domain, int pasid) +{ + INC_STATS_COUNTER(invalidate_iotlb_all); + + return __flush_pasid(domain, pasid, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, + true); +} + +int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid) +{ + struct protection_domain *domain = dom->priv; + unsigned long flags; + int ret; + + spin_lock_irqsave(&domain->lock, flags); + ret = __amd_iommu_flush_tlb(domain, pasid); + spin_unlock_irqrestore(&domain->lock, flags); + + return ret; +} +EXPORT_SYMBOL(amd_iommu_flush_tlb); + +static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc) +{ + int index; + u64 *pte; + + while (true) { + + index = (pasid >> (9 * level)) & 0x1ff; + pte = &root[index]; + + if (level == 0) + break; + + if (!(*pte & GCR3_VALID)) { + if (!alloc) + return NULL; + + root = (void *)get_zeroed_page(GFP_ATOMIC); + if (root == NULL) + return NULL; + + *pte = __pa(root) | GCR3_VALID; + } + + root = __va(*pte & PAGE_MASK); + + level -= 1; + } + + return pte; +} + +static int __set_gcr3(struct protection_domain *domain, int pasid, + unsigned long cr3) +{ + u64 *pte; + + if (domain->mode != PAGE_MODE_NONE) + return -EINVAL; + + pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true); + if (pte == NULL) + return -ENOMEM; + + *pte = (cr3 & PAGE_MASK) | GCR3_VALID; + + return __amd_iommu_flush_tlb(domain, pasid); +} + +static int __clear_gcr3(struct protection_domain *domain, int pasid) +{ + u64 *pte; + + if (domain->mode != PAGE_MODE_NONE) + return -EINVAL; + + pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false); + if (pte == NULL) + return 0; + + *pte = 0; + + return __amd_iommu_flush_tlb(domain, pasid); +} + +int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid, + unsigned long cr3) +{ + struct protection_domain *domain = dom->priv; + unsigned long flags; + int ret; + + spin_lock_irqsave(&domain->lock, flags); + ret = __set_gcr3(domain, pasid, cr3); + spin_unlock_irqrestore(&domain->lock, flags); + + return ret; +} +EXPORT_SYMBOL(amd_iommu_domain_set_gcr3); + +int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid) +{ + struct protection_domain *domain = dom->priv; + unsigned long flags; + int ret; + + spin_lock_irqsave(&domain->lock, flags); + ret = __clear_gcr3(domain, pasid); + spin_unlock_irqrestore(&domain->lock, flags); + + return ret; +} +EXPORT_SYMBOL(amd_iommu_domain_clear_gcr3); + +int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid, + int status, int tag) +{ + struct iommu_dev_data *dev_data; + struct amd_iommu *iommu; + struct iommu_cmd cmd; + + INC_STATS_COUNTER(complete_ppr); + + dev_data = get_dev_data(&pdev->dev); + iommu = amd_iommu_rlookup_table[dev_data->devid]; + + build_complete_ppr(&cmd, dev_data->devid, pasid, status, + tag, dev_data->pri_tlp); + + return iommu_queue_command(iommu, &cmd); +} +EXPORT_SYMBOL(amd_iommu_complete_ppr); + +struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev) +{ + struct protection_domain *domain; + + domain = get_domain(&pdev->dev); + if (IS_ERR(domain)) + return NULL; + + /* Only return IOMMUv2 domains */ + if (!(domain->flags & PD_IOMMUV2_MASK)) + return NULL; + + return domain->iommu_domain; +} +EXPORT_SYMBOL(amd_iommu_get_v2_domain); + +void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum) +{ + struct iommu_dev_data *dev_data; + + if (!amd_iommu_v2_supported()) + return; + + dev_data = get_dev_data(&pdev->dev); + dev_data->errata |= (1 << erratum); +} +EXPORT_SYMBOL(amd_iommu_enable_device_erratum); + +int amd_iommu_device_info(struct pci_dev *pdev, + struct amd_iommu_device_info *info) +{ + int max_pasids; + int pos; + + if (pdev == NULL || info == NULL) + return -EINVAL; + + if (!amd_iommu_v2_supported()) + return -EINVAL; + + memset(info, 0, sizeof(*info)); + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS); + if (pos) + info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); + if (pos) + info->flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID); + if (pos) { + int features; + + max_pasids = 1 << (9 * (amd_iommu_max_glx_val + 1)); + max_pasids = min(max_pasids, (1 << 20)); + + info->flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP; + info->max_pasids = min(pci_max_pasids(pdev), max_pasids); + + features = pci_pasid_features(pdev); + if (features & PCI_PASID_CAP_EXEC) + info->flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP; + if (features & PCI_PASID_CAP_PRIV) + info->flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP; + } + + return 0; +} +EXPORT_SYMBOL(amd_iommu_device_info); diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 82d2410f4205..bdea288dc185 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -25,6 +25,7 @@ #include <linux/interrupt.h> #include <linux/msi.h> #include <linux/amd-iommu.h> +#include <linux/export.h> #include <asm/pci-direct.h> #include <asm/iommu.h> #include <asm/gart.h> @@ -141,6 +142,12 @@ int amd_iommus_present; bool amd_iommu_np_cache __read_mostly; bool amd_iommu_iotlb_sup __read_mostly = true; +u32 amd_iommu_max_pasids __read_mostly = ~0; + +bool amd_iommu_v2_present __read_mostly; + +bool amd_iommu_force_isolation __read_mostly; + /* * The ACPI table parsing functions set this variable on an error */ @@ -299,6 +306,16 @@ static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); } +static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout) +{ + u32 ctrl; + + ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); + ctrl &= ~CTRL_INV_TO_MASK; + ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK; + writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); +} + /* Function to enable the hardware */ static void iommu_enable(struct amd_iommu *iommu) { @@ -581,21 +598,69 @@ static void __init free_event_buffer(struct amd_iommu *iommu) free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); } +/* allocates the memory where the IOMMU will log its events to */ +static u8 * __init alloc_ppr_log(struct amd_iommu *iommu) +{ + iommu->ppr_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(PPR_LOG_SIZE)); + + if (iommu->ppr_log == NULL) + return NULL; + + return iommu->ppr_log; +} + +static void iommu_enable_ppr_log(struct amd_iommu *iommu) +{ + u64 entry; + + if (iommu->ppr_log == NULL) + return; + + entry = (u64)virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512; + + memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET, + &entry, sizeof(entry)); + + /* set head and tail to zero manually */ + writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); + writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); + + iommu_feature_enable(iommu, CONTROL_PPFLOG_EN); + iommu_feature_enable(iommu, CONTROL_PPR_EN); +} + +static void __init free_ppr_log(struct amd_iommu *iommu) +{ + if (iommu->ppr_log == NULL) + return; + + free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE)); +} + +static void iommu_enable_gt(struct amd_iommu *iommu) +{ + if (!iommu_feature(iommu, FEATURE_GT)) + return; + + iommu_feature_enable(iommu, CONTROL_GT_EN); +} + /* sets a specific bit in the device table entry. */ static void set_dev_entry_bit(u16 devid, u8 bit) { - int i = (bit >> 5) & 0x07; - int _bit = bit & 0x1f; + int i = (bit >> 6) & 0x03; + int _bit = bit & 0x3f; - amd_iommu_dev_table[devid].data[i] |= (1 << _bit); + amd_iommu_dev_table[devid].data[i] |= (1UL << _bit); } static int get_dev_entry_bit(u16 devid, u8 bit) { - int i = (bit >> 5) & 0x07; - int _bit = bit & 0x1f; + int i = (bit >> 6) & 0x03; + int _bit = bit & 0x3f; - return (amd_iommu_dev_table[devid].data[i] & (1 << _bit)) >> _bit; + return (amd_iommu_dev_table[devid].data[i] & (1UL << _bit)) >> _bit; } @@ -699,6 +764,32 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu) iommu->features = ((u64)high << 32) | low; + if (iommu_feature(iommu, FEATURE_GT)) { + int glxval; + u32 pasids; + u64 shift; + + shift = iommu->features & FEATURE_PASID_MASK; + shift >>= FEATURE_PASID_SHIFT; + pasids = (1 << shift); + + amd_iommu_max_pasids = min(amd_iommu_max_pasids, pasids); + + glxval = iommu->features & FEATURE_GLXVAL_MASK; + glxval >>= FEATURE_GLXVAL_SHIFT; + + if (amd_iommu_max_glx_val == -1) + amd_iommu_max_glx_val = glxval; + else + amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval); + } + + if (iommu_feature(iommu, FEATURE_GT) && + iommu_feature(iommu, FEATURE_PPR)) { + iommu->is_iommu_v2 = true; + amd_iommu_v2_present = true; + } + if (!is_rd890_iommu(iommu->dev)) return; @@ -901,6 +992,7 @@ static void __init free_iommu_one(struct amd_iommu *iommu) { free_command_buffer(iommu); free_event_buffer(iommu); + free_ppr_log(iommu); iommu_unmap_mmio_space(iommu); } @@ -964,6 +1056,12 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) init_iommu_from_acpi(iommu, h); init_iommu_devices(iommu); + if (iommu_feature(iommu, FEATURE_PPR)) { + iommu->ppr_log = alloc_ppr_log(iommu); + if (!iommu->ppr_log) + return -ENOMEM; + } + if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) amd_iommu_np_cache = true; @@ -1050,6 +1148,9 @@ static int iommu_setup_msi(struct amd_iommu *iommu) iommu->int_enabled = true; iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); + if (iommu->ppr_log != NULL) + iommu_feature_enable(iommu, CONTROL_PPFINT_EN); + return 0; } @@ -1209,6 +1310,9 @@ static void iommu_init_flags(struct amd_iommu *iommu) * make IOMMU memory accesses cache coherent */ iommu_feature_enable(iommu, CONTROL_COHERENT_EN); + + /* Set IOTLB invalidation timeout to 1s */ + iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S); } static void iommu_apply_resume_quirks(struct amd_iommu *iommu) @@ -1274,6 +1378,8 @@ static void enable_iommus(void) iommu_set_device_table(iommu); iommu_enable_command_buffer(iommu); iommu_enable_event_buffer(iommu); + iommu_enable_ppr_log(iommu); + iommu_enable_gt(iommu); iommu_set_exclusion_range(iommu); iommu_init_msi(iommu); iommu_enable(iommu); @@ -1303,13 +1409,6 @@ static void amd_iommu_resume(void) /* re-load the hardware */ enable_iommus(); - - /* - * we have to flush after the IOMMUs are enabled because a - * disabled IOMMU will never execute the commands we send - */ - for_each_iommu(iommu) - iommu_flush_all_caches(iommu); } static int amd_iommu_suspend(void) @@ -1560,6 +1659,8 @@ static int __init parse_amd_iommu_options(char *str) amd_iommu_unmap_flush = true; if (strncmp(str, "off", 3) == 0) amd_iommu_disabled = true; + if (strncmp(str, "force_isolation", 15) == 0) + amd_iommu_force_isolation = true; } return 1; @@ -1572,3 +1673,9 @@ IOMMU_INIT_FINISH(amd_iommu_detect, gart_iommu_hole_init, 0, 0); + +bool amd_iommu_v2_supported(void) +{ + return amd_iommu_v2_present; +} +EXPORT_SYMBOL(amd_iommu_v2_supported); diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h index 7ffaa64410b0..1a7f41c6cc66 100644 --- a/drivers/iommu/amd_iommu_proto.h +++ b/drivers/iommu/amd_iommu_proto.h @@ -31,6 +31,30 @@ extern int amd_iommu_init_devices(void); extern void amd_iommu_uninit_devices(void); extern void amd_iommu_init_notifier(void); extern void amd_iommu_init_api(void); + +/* IOMMUv2 specific functions */ +struct iommu_domain; + +extern bool amd_iommu_v2_supported(void); +extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb); +extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb); +extern void amd_iommu_domain_direct_map(struct iommu_domain *dom); +extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); +extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid, + u64 address); +extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid); +extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid, + unsigned long cr3); +extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid); +extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev); + +#define PPR_SUCCESS 0x0 +#define PPR_INVALID 0x1 +#define PPR_FAILURE 0xf + +extern int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid, + int status, int tag); + #ifndef CONFIG_AMD_IOMMU_STATS static inline void amd_iommu_stats_init(void) { } diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 5b9c5075e81a..2452f3b71736 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -69,11 +69,14 @@ #define MMIO_EXCL_BASE_OFFSET 0x0020 #define MMIO_EXCL_LIMIT_OFFSET 0x0028 #define MMIO_EXT_FEATURES 0x0030 +#define MMIO_PPR_LOG_OFFSET 0x0038 #define MMIO_CMD_HEAD_OFFSET 0x2000 #define MMIO_CMD_TAIL_OFFSET 0x2008 #define MMIO_EVT_HEAD_OFFSET 0x2010 #define MMIO_EVT_TAIL_OFFSET 0x2018 #define MMIO_STATUS_OFFSET 0x2020 +#define MMIO_PPR_HEAD_OFFSET 0x2030 +#define MMIO_PPR_TAIL_OFFSET 0x2038 /* Extended Feature Bits */ @@ -87,8 +90,17 @@ #define FEATURE_HE (1ULL<<8) #define FEATURE_PC (1ULL<<9) +#define FEATURE_PASID_SHIFT 32 +#define FEATURE_PASID_MASK (0x1fULL << FEATURE_PASID_SHIFT) + +#define FEATURE_GLXVAL_SHIFT 14 +#define FEATURE_GLXVAL_MASK (0x03ULL << FEATURE_GLXVAL_SHIFT) + +#define PASID_MASK 0x000fffff + /* MMIO status bits */ -#define MMIO_STATUS_COM_WAIT_INT_MASK 0x04 +#define MMIO_STATUS_COM_WAIT_INT_MASK (1 << 2) +#define MMIO_STATUS_PPR_INT_MASK (1 << 6) /* event logging constants */ #define EVENT_ENTRY_SIZE 0x10 @@ -115,6 +127,7 @@ #define CONTROL_EVT_LOG_EN 0x02ULL #define CONTROL_EVT_INT_EN 0x03ULL #define CONTROL_COMWAIT_EN 0x04ULL +#define CONTROL_INV_TIMEOUT 0x05ULL #define CONTROL_PASSPW_EN 0x08ULL #define CONTROL_RESPASSPW_EN 0x09ULL #define CONTROL_COHERENT_EN 0x0aULL @@ -122,18 +135,34 @@ #define CONTROL_CMDBUF_EN 0x0cULL #define CONTROL_PPFLOG_EN 0x0dULL #define CONTROL_PPFINT_EN 0x0eULL +#define CONTROL_PPR_EN 0x0fULL +#define CONTROL_GT_EN 0x10ULL + +#define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT) +#define CTRL_INV_TO_NONE 0 +#define CTRL_INV_TO_1MS 1 +#define CTRL_INV_TO_10MS 2 +#define CTRL_INV_TO_100MS 3 +#define CTRL_INV_TO_1S 4 +#define CTRL_INV_TO_10S 5 +#define CTRL_INV_TO_100S 6 /* command specific defines */ #define CMD_COMPL_WAIT 0x01 #define CMD_INV_DEV_ENTRY 0x02 #define CMD_INV_IOMMU_PAGES 0x03 #define CMD_INV_IOTLB_PAGES 0x04 +#define CMD_COMPLETE_PPR 0x07 #define CMD_INV_ALL 0x08 #define CMD_COMPL_WAIT_STORE_MASK 0x01 #define CMD_COMPL_WAIT_INT_MASK 0x02 #define CMD_INV_IOMMU_PAGES_SIZE_MASK 0x01 #define CMD_INV_IOMMU_PAGES_PDE_MASK 0x02 +#define CMD_INV_IOMMU_PAGES_GN_MASK 0x04 + +#define PPR_STATUS_MASK 0xf +#define PPR_STATUS_SHIFT 12 #define CMD_INV_IOMMU_ALL_PAGES_ADDRESS 0x7fffffffffffffffULL @@ -165,6 +194,23 @@ #define EVT_BUFFER_SIZE 8192 /* 512 entries */ #define EVT_LEN_MASK (0x9ULL << 56) +/* Constants for PPR Log handling */ +#define PPR_LOG_ENTRIES 512 +#define PPR_LOG_SIZE_SHIFT 56 +#define PPR_LOG_SIZE_512 (0x9ULL << PPR_LOG_SIZE_SHIFT) +#define PPR_ENTRY_SIZE 16 +#define PPR_LOG_SIZE (PPR_ENTRY_SIZE * PPR_LOG_ENTRIES) + +#define PPR_REQ_TYPE(x) (((x) >> 60) & 0xfULL) +#define PPR_FLAGS(x) (((x) >> 48) & 0xfffULL) +#define PPR_DEVID(x) ((x) & 0xffffULL) +#define PPR_TAG(x) (((x) >> 32) & 0x3ffULL) +#define PPR_PASID1(x) (((x) >> 16) & 0xffffULL) +#define PPR_PASID2(x) (((x) >> 42) & 0xfULL) +#define PPR_PASID(x) ((PPR_PASID2(x) << 16) | PPR_PASID1(x)) + +#define PPR_REQ_FAULT 0x01 + #define PAGE_MODE_NONE 0x00 #define PAGE_MODE_1_LEVEL 0x01 #define PAGE_MODE_2_LEVEL 0x02 @@ -230,7 +276,24 @@ #define IOMMU_PTE_IR (1ULL << 61) #define IOMMU_PTE_IW (1ULL << 62) -#define DTE_FLAG_IOTLB 0x01 +#define DTE_FLAG_IOTLB (0x01UL << 32) +#define DTE_FLAG_GV (0x01ULL << 55) +#define DTE_GLX_SHIFT (56) +#define DTE_GLX_MASK (3) + +#define DTE_GCR3_VAL_A(x) (((x) >> 12) & 0x00007ULL) +#define DTE_GCR3_VAL_B(x) (((x) >> 15) & 0x0ffffULL) +#define DTE_GCR3_VAL_C(x) (((x) >> 31) & 0xfffffULL) + +#define DTE_GCR3_INDEX_A 0 +#define DTE_GCR3_INDEX_B 1 +#define DTE_GCR3_INDEX_C 1 + +#define DTE_GCR3_SHIFT_A 58 +#define DTE_GCR3_SHIFT_B 16 +#define DTE_GCR3_SHIFT_C 43 + +#define GCR3_VALID 0x01ULL #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) @@ -257,6 +320,7 @@ domain for an IOMMU */ #define PD_PASSTHROUGH_MASK (1UL << 2) /* domain has no page translation */ +#define PD_IOMMUV2_MASK (1UL << 3) /* domain has gcr3 table */ extern bool amd_iommu_dump; #define DUMP_printk(format, arg...) \ @@ -285,6 +349,29 @@ extern bool amd_iommu_iotlb_sup; #define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT) #define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL) + +/* + * This struct is used to pass information about + * incoming PPR faults around. + */ +struct amd_iommu_fault { + u64 address; /* IO virtual address of the fault*/ + u32 pasid; /* Address space identifier */ + u16 device_id; /* Originating PCI device id */ + u16 tag; /* PPR tag */ + u16 flags; /* Fault flags */ + +}; + +#define PPR_FAULT_EXEC (1 << 1) +#define PPR_FAULT_READ (1 << 2) +#define PPR_FAULT_WRITE (1 << 5) +#define PPR_FAULT_USER (1 << 6) +#define PPR_FAULT_RSVD (1 << 7) +#define PPR_FAULT_GN (1 << 8) + +struct iommu_domain; + /* * This structure contains generic data for IOMMU protection domains * independent of their use. @@ -297,11 +384,15 @@ struct protection_domain { u16 id; /* the domain id written to the device table */ int mode; /* paging mode (0-6 levels) */ u64 *pt_root; /* page table root pointer */ + int glx; /* Number of levels for GCR3 table */ + u64 *gcr3_tbl; /* Guest CR3 table */ unsigned long flags; /* flags to find out type of domain */ bool updated; /* complete domain flush required */ unsigned dev_cnt; /* devices assigned to this domain */ unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ void *priv; /* private data */ + struct iommu_domain *iommu_domain; /* Pointer to generic + domain structure */ }; @@ -315,10 +406,15 @@ struct iommu_dev_data { struct protection_domain *domain; /* Domain the device is bound to */ atomic_t bind; /* Domain attach reverent count */ u16 devid; /* PCI Device ID */ + bool iommu_v2; /* Device can make use of IOMMUv2 */ + bool passthrough; /* Default for device is pt_domain */ struct { bool enabled; int qdep; } ats; /* ATS state */ + bool pri_tlp; /* PASID TLB required for + PPR completions */ + u32 errata; /* Bitmap for errata to apply */ }; /* @@ -399,6 +495,9 @@ struct amd_iommu { /* Extended features */ u64 features; + /* IOMMUv2 */ + bool is_iommu_v2; + /* * Capability pointer. There could be more than one IOMMU per PCI * device function if there are more than one AMD IOMMU capability @@ -431,6 +530,9 @@ struct amd_iommu { /* MSI number for event interrupt */ u16 evt_msi_num; + /* Base of the PPR log, if present */ + u8 *ppr_log; + /* true if interrupts for this IOMMU are already enabled */ bool int_enabled; @@ -484,7 +586,7 @@ extern struct list_head amd_iommu_pd_list; * Structure defining one entry in the device table */ struct dev_table_entry { - u32 data[8]; + u64 data[4]; }; /* @@ -549,6 +651,16 @@ extern unsigned long *amd_iommu_pd_alloc_bitmap; */ extern bool amd_iommu_unmap_flush; +/* Smallest number of PASIDs supported by any IOMMU in the system */ +extern u32 amd_iommu_max_pasids; + +extern bool amd_iommu_v2_present; + +extern bool amd_iommu_force_isolation; + +/* Max levels of glxval supported */ +extern int amd_iommu_max_glx_val; + /* takes bus and device/function and returns the device id * FIXME: should that be in generic PCI code? */ static inline u16 calc_devid(u8 bus, u8 devfn) diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c new file mode 100644 index 000000000000..8add9f125d3e --- /dev/null +++ b/drivers/iommu/amd_iommu_v2.c @@ -0,0 +1,994 @@ +/* + * Copyright (C) 2010-2012 Advanced Micro Devices, Inc. + * Author: Joerg Roedel <joerg.roedel@amd.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/mmu_notifier.h> +#include <linux/amd-iommu.h> +#include <linux/mm_types.h> +#include <linux/profile.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/iommu.h> +#include <linux/wait.h> +#include <linux/pci.h> +#include <linux/gfp.h> + +#include "amd_iommu_types.h" +#include "amd_iommu_proto.h" + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Joerg Roedel <joerg.roedel@amd.com>"); + +#define MAX_DEVICES 0x10000 +#define PRI_QUEUE_SIZE 512 + +struct pri_queue { + atomic_t inflight; + bool finish; + int status; +}; + +struct pasid_state { + struct list_head list; /* For global state-list */ + atomic_t count; /* Reference count */ + struct task_struct *task; /* Task bound to this PASID */ + struct mm_struct *mm; /* mm_struct for the faults */ + struct mmu_notifier mn; /* mmu_otifier handle */ + struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */ + struct device_state *device_state; /* Link to our device_state */ + int pasid; /* PASID index */ + spinlock_t lock; /* Protect pri_queues */ + wait_queue_head_t wq; /* To wait for count == 0 */ +}; + +struct device_state { + atomic_t count; + struct pci_dev *pdev; + struct pasid_state **states; + struct iommu_domain *domain; + int pasid_levels; + int max_pasids; + amd_iommu_invalid_ppr_cb inv_ppr_cb; + amd_iommu_invalidate_ctx inv_ctx_cb; + spinlock_t lock; + wait_queue_head_t wq; +}; + +struct fault { + struct work_struct work; + struct device_state *dev_state; + struct pasid_state *state; + struct mm_struct *mm; + u64 address; + u16 devid; + u16 pasid; + u16 tag; + u16 finish; + u16 flags; +}; + +struct device_state **state_table; +static spinlock_t state_lock; + +/* List and lock for all pasid_states */ +static LIST_HEAD(pasid_state_list); +static DEFINE_SPINLOCK(ps_lock); + +static struct workqueue_struct *iommu_wq; + +/* + * Empty page table - Used between + * mmu_notifier_invalidate_range_start and + * mmu_notifier_invalidate_range_end + */ +static u64 *empty_page_table; + +static void free_pasid_states(struct device_state *dev_state); +static void unbind_pasid(struct device_state *dev_state, int pasid); +static int task_exit(struct notifier_block *nb, unsigned long e, void *data); + +static u16 device_id(struct pci_dev *pdev) +{ + u16 devid; + + devid = pdev->bus->number; + devid = (devid << 8) | pdev->devfn; + + return devid; +} + +static struct device_state *get_device_state(u16 devid) +{ + struct device_state *dev_state; + unsigned long flags; + + spin_lock_irqsave(&state_lock, flags); + dev_state = state_table[devid]; + if (dev_state != NULL) + atomic_inc(&dev_state->count); + spin_unlock_irqrestore(&state_lock, flags); + + return dev_state; +} + +static void free_device_state(struct device_state *dev_state) +{ + /* + * First detach device from domain - No more PRI requests will arrive + * from that device after it is unbound from the IOMMUv2 domain. + */ + iommu_detach_device(dev_state->domain, &dev_state->pdev->dev); + + /* Everything is down now, free the IOMMUv2 domain */ + iommu_domain_free(dev_state->domain); + + /* Finally get rid of the device-state */ + kfree(dev_state); +} + +static void put_device_state(struct device_state *dev_state) +{ + if (atomic_dec_and_test(&dev_state->count)) + wake_up(&dev_state->wq); +} + +static void put_device_state_wait(struct device_state *dev_state) +{ + DEFINE_WAIT(wait); + + prepare_to_wait(&dev_state->wq, &wait, TASK_UNINTERRUPTIBLE); + if (!atomic_dec_and_test(&dev_state->count)) + schedule(); + finish_wait(&dev_state->wq, &wait); + + free_device_state(dev_state); +} + +static struct notifier_block profile_nb = { + .notifier_call = task_exit, +}; + +static void link_pasid_state(struct pasid_state *pasid_state) +{ + spin_lock(&ps_lock); + list_add_tail(&pasid_state->list, &pasid_state_list); + spin_unlock(&ps_lock); +} + +static void __unlink_pasid_state(struct pasid_state *pasid_state) +{ + list_del(&pasid_state->list); +} + +static void unlink_pasid_state(struct pasid_state *pasid_state) +{ + spin_lock(&ps_lock); + __unlink_pasid_state(pasid_state); + spin_unlock(&ps_lock); +} + +/* Must be called under dev_state->lock */ +static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state, + int pasid, bool alloc) +{ + struct pasid_state **root, **ptr; + int level, index; + + level = dev_state->pasid_levels; + root = dev_state->states; + + while (true) { + + index = (pasid >> (9 * level)) & 0x1ff; + ptr = &root[index]; + + if (level == 0) + break; + + if (*ptr == NULL) { + if (!alloc) + return NULL; + + *ptr = (void *)get_zeroed_page(GFP_ATOMIC); + if (*ptr == NULL) + return NULL; + } + + root = (struct pasid_state **)*ptr; + level -= 1; + } + + return ptr; +} + +static int set_pasid_state(struct device_state *dev_state, + struct pasid_state *pasid_state, + int pasid) +{ + struct pasid_state **ptr; + unsigned long flags; + int ret; + + spin_lock_irqsave(&dev_state->lock, flags); + ptr = __get_pasid_state_ptr(dev_state, pasid, true); + + ret = -ENOMEM; + if (ptr == NULL) + goto out_unlock; + + ret = -ENOMEM; + if (*ptr != NULL) + goto out_unlock; + + *ptr = pasid_state; + + ret = 0; + +out_unlock: + spin_unlock_irqrestore(&dev_state->lock, flags); + + return ret; +} + +static void clear_pasid_state(struct device_state *dev_state, int pasid) +{ + struct pasid_state **ptr; + unsigned long flags; + + spin_lock_irqsave(&dev_state->lock, flags); + ptr = __get_pasid_state_ptr(dev_state, pasid, true); + + if (ptr == NULL) + goto out_unlock; + + *ptr = NULL; + +out_unlock: + spin_unlock_irqrestore(&dev_state->lock, flags); +} + +static struct pasid_state *get_pasid_state(struct device_state *dev_state, + int pasid) +{ + struct pasid_state **ptr, *ret = NULL; + unsigned long flags; + + spin_lock_irqsave(&dev_state->lock, flags); + ptr = __get_pasid_state_ptr(dev_state, pasid, false); + + if (ptr == NULL) + goto out_unlock; + + ret = *ptr; + if (ret) + atomic_inc(&ret->count); + +out_unlock: + spin_unlock_irqrestore(&dev_state->lock, flags); + + return ret; +} + +static void free_pasid_state(struct pasid_state *pasid_state) +{ + kfree(pasid_state); +} + +static void put_pasid_state(struct pasid_state *pasid_state) +{ + if (atomic_dec_and_test(&pasid_state->count)) { + put_device_state(pasid_state->device_state); + wake_up(&pasid_state->wq); + } +} + +static void put_pasid_state_wait(struct pasid_state *pasid_state) +{ + DEFINE_WAIT(wait); + + prepare_to_wait(&pasid_state->wq, &wait, TASK_UNINTERRUPTIBLE); + + if (atomic_dec_and_test(&pasid_state->count)) + put_device_state(pasid_state->device_state); + else + schedule(); + + finish_wait(&pasid_state->wq, &wait); + mmput(pasid_state->mm); + free_pasid_state(pasid_state); +} + +static void __unbind_pasid(struct pasid_state *pasid_state) +{ + struct iommu_domain *domain; + + domain = pasid_state->device_state->domain; + + amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid); + clear_pasid_state(pasid_state->device_state, pasid_state->pasid); + + /* Make sure no more pending faults are in the queue */ + flush_workqueue(iommu_wq); + + mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); + + put_pasid_state(pasid_state); /* Reference taken in bind() function */ +} + +static void unbind_pasid(struct device_state *dev_state, int pasid) +{ + struct pasid_state *pasid_state; + + pasid_state = get_pasid_state(dev_state, pasid); + if (pasid_state == NULL) + return; + + unlink_pasid_state(pasid_state); + __unbind_pasid(pasid_state); + put_pasid_state_wait(pasid_state); /* Reference taken in this function */ +} + +static void free_pasid_states_level1(struct pasid_state **tbl) +{ + int i; + + for (i = 0; i < 512; ++i) { + if (tbl[i] == NULL) + continue; + + free_page((unsigned long)tbl[i]); + } +} + +static void free_pasid_states_level2(struct pasid_state **tbl) +{ + struct pasid_state **ptr; + int i; + + for (i = 0; i < 512; ++i) { + if (tbl[i] == NULL) + continue; + + ptr = (struct pasid_state **)tbl[i]; + free_pasid_states_level1(ptr); + } +} + +static void free_pasid_states(struct device_state *dev_state) +{ + struct pasid_state *pasid_state; + int i; + + for (i = 0; i < dev_state->max_pasids; ++i) { + pasid_state = get_pasid_state(dev_state, i); + if (pasid_state == NULL) + continue; + + put_pasid_state(pasid_state); + unbind_pasid(dev_state, i); + } + + if (dev_state->pasid_levels == 2) + free_pasid_states_level2(dev_state->states); + else if (dev_state->pasid_levels == 1) + free_pasid_states_level1(dev_state->states); + else if (dev_state->pasid_levels != 0) + BUG(); + + free_page((unsigned long)dev_state->states); +} + +static struct pasid_state *mn_to_state(struct mmu_notifier *mn) +{ + return container_of(mn, struct pasid_state, mn); +} + +static void __mn_flush_page(struct mmu_notifier *mn, + unsigned long address) +{ + struct pasid_state *pasid_state; + struct device_state *dev_state; + + pasid_state = mn_to_state(mn); + dev_state = pasid_state->device_state; + + amd_iommu_flush_page(dev_state->domain, pasid_state->pasid, address); +} + +static int mn_clear_flush_young(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address) +{ + __mn_flush_page(mn, address); + + return 0; +} + +static void mn_change_pte(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address, + pte_t pte) +{ + __mn_flush_page(mn, address); +} + +static void mn_invalidate_page(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address) +{ + __mn_flush_page(mn, address); +} + +static void mn_invalidate_range_start(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + struct pasid_state *pasid_state; + struct device_state *dev_state; + + pasid_state = mn_to_state(mn); + dev_state = pasid_state->device_state; + + amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid, + __pa(empty_page_table)); +} + +static void mn_invalidate_range_end(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + struct pasid_state *pasid_state; + struct device_state *dev_state; + + pasid_state = mn_to_state(mn); + dev_state = pasid_state->device_state; + + amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid, + __pa(pasid_state->mm->pgd)); +} + +static struct mmu_notifier_ops iommu_mn = { + .clear_flush_young = mn_clear_flush_young, + .change_pte = mn_change_pte, + .invalidate_page = mn_invalidate_page, + .invalidate_range_start = mn_invalidate_range_start, + .invalidate_range_end = mn_invalidate_range_end, +}; + +static void set_pri_tag_status(struct pasid_state *pasid_state, + u16 tag, int status) +{ + unsigned long flags; + + spin_lock_irqsave(&pasid_state->lock, flags); + pasid_state->pri[tag].status = status; + spin_unlock_irqrestore(&pasid_state->lock, flags); +} + +static void finish_pri_tag(struct device_state *dev_state, + struct pasid_state *pasid_state, + u16 tag) +{ + unsigned long flags; + + spin_lock_irqsave(&pasid_state->lock, flags); + if (atomic_dec_and_test(&pasid_state->pri[tag].inflight) && + pasid_state->pri[tag].finish) { + amd_iommu_complete_ppr(dev_state->pdev, pasid_state->pasid, + pasid_state->pri[tag].status, tag); + pasid_state->pri[tag].finish = false; + pasid_state->pri[tag].status = PPR_SUCCESS; + } + spin_unlock_irqrestore(&pasid_state->lock, flags); +} + +static void do_fault(struct work_struct *work) +{ + struct fault *fault = container_of(work, struct fault, work); + int npages, write; + struct page *page; + + write = !!(fault->flags & PPR_FAULT_WRITE); + + npages = get_user_pages(fault->state->task, fault->state->mm, + fault->address, 1, write, 0, &page, NULL); + + if (npages == 1) { + put_page(page); + } else if (fault->dev_state->inv_ppr_cb) { + int status; + + status = fault->dev_state->inv_ppr_cb(fault->dev_state->pdev, + fault->pasid, + fault->address, + fault->flags); + switch (status) { + case AMD_IOMMU_INV_PRI_RSP_SUCCESS: + set_pri_tag_status(fault->state, fault->tag, PPR_SUCCESS); + break; + case AMD_IOMMU_INV_PRI_RSP_INVALID: + set_pri_tag_status(fault->state, fault->tag, PPR_INVALID); + break; + case AMD_IOMMU_INV_PRI_RSP_FAIL: + set_pri_tag_status(fault->state, fault->tag, PPR_FAILURE); + break; + default: + BUG(); + } + } else { + set_pri_tag_status(fault->state, fault->tag, PPR_INVALID); + } + + finish_pri_tag(fault->dev_state, fault->state, fault->tag); + + put_pasid_state(fault->state); + + kfree(fault); +} + +static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) +{ + struct amd_iommu_fault *iommu_fault; + struct pasid_state *pasid_state; + struct device_state *dev_state; + unsigned long flags; + struct fault *fault; + bool finish; + u16 tag; + int ret; + + iommu_fault = data; + tag = iommu_fault->tag & 0x1ff; + finish = (iommu_fault->tag >> 9) & 1; + + ret = NOTIFY_DONE; + dev_state = get_device_state(iommu_fault->device_id); + if (dev_state == NULL) + goto out; + + pasid_state = get_pasid_state(dev_state, iommu_fault->pasid); + if (pasid_state == NULL) { + /* We know the device but not the PASID -> send INVALID */ + amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid, + PPR_INVALID, tag); + goto out_drop_state; + } + + spin_lock_irqsave(&pasid_state->lock, flags); + atomic_inc(&pasid_state->pri[tag].inflight); + if (finish) + pasid_state->pri[tag].finish = true; + spin_unlock_irqrestore(&pasid_state->lock, flags); + + fault = kzalloc(sizeof(*fault), GFP_ATOMIC); + if (fault == NULL) { + /* We are OOM - send success and let the device re-fault */ + finish_pri_tag(dev_state, pasid_state, tag); + goto out_drop_state; + } + + fault->dev_state = dev_state; + fault->address = iommu_fault->address; + fault->state = pasid_state; + fault->tag = tag; + fault->finish = finish; + fault->flags = iommu_fault->flags; + INIT_WORK(&fault->work, do_fault); + + queue_work(iommu_wq, &fault->work); + + ret = NOTIFY_OK; + +out_drop_state: + put_device_state(dev_state); + +out: + return ret; +} + +static struct notifier_block ppr_nb = { + .notifier_call = ppr_notifier, +}; + +static int task_exit(struct notifier_block *nb, unsigned long e, void *data) +{ + struct pasid_state *pasid_state; + struct task_struct *task; + + task = data; + + /* + * Using this notifier is a hack - but there is no other choice + * at the moment. What I really want is a sleeping notifier that + * is called when an MM goes down. But such a notifier doesn't + * exist yet. The notifier needs to sleep because it has to make + * sure that the device does not use the PASID and the address + * space anymore before it is destroyed. This includes waiting + * for pending PRI requests to pass the workqueue. The + * MMU-Notifiers would be a good fit, but they use RCU and so + * they are not allowed to sleep. Lets see how we can solve this + * in a more intelligent way in the future. + */ +again: + spin_lock(&ps_lock); + list_for_each_entry(pasid_state, &pasid_state_list, list) { + struct device_state *dev_state; + int pasid; + + if (pasid_state->task != task) + continue; + + /* Drop Lock and unbind */ + spin_unlock(&ps_lock); + + dev_state = pasid_state->device_state; + pasid = pasid_state->pasid; + + if (pasid_state->device_state->inv_ctx_cb) + dev_state->inv_ctx_cb(dev_state->pdev, pasid); + + unbind_pasid(dev_state, pasid); + + /* Task may be in the list multiple times */ + goto again; + } + spin_unlock(&ps_lock); + + return NOTIFY_OK; +} + +int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, + struct task_struct *task) +{ + struct pasid_state *pasid_state; + struct device_state *dev_state; + u16 devid; + int ret; + + might_sleep(); + + if (!amd_iommu_v2_supported()) + return -ENODEV; + + devid = device_id(pdev); + dev_state = get_device_state(devid); + + if (dev_state == NULL) + return -EINVAL; + + ret = -EINVAL; + if (pasid < 0 || pasid >= dev_state->max_pasids) + goto out; + + ret = -ENOMEM; + pasid_state = kzalloc(sizeof(*pasid_state), GFP_KERNEL); + if (pasid_state == NULL) + goto out; + + atomic_set(&pasid_state->count, 1); + init_waitqueue_head(&pasid_state->wq); + pasid_state->task = task; + pasid_state->mm = get_task_mm(task); + pasid_state->device_state = dev_state; + pasid_state->pasid = pasid; + pasid_state->mn.ops = &iommu_mn; + + if (pasid_state->mm == NULL) + goto out_free; + + mmu_notifier_register(&pasid_state->mn, pasid_state->mm); + + ret = set_pasid_state(dev_state, pasid_state, pasid); + if (ret) + goto out_unregister; + + ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid, + __pa(pasid_state->mm->pgd)); + if (ret) + goto out_clear_state; + + link_pasid_state(pasid_state); + + return 0; + +out_clear_state: + clear_pasid_state(dev_state, pasid); + +out_unregister: + mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); + +out_free: + free_pasid_state(pasid_state); + +out: + put_device_state(dev_state); + + return ret; +} +EXPORT_SYMBOL(amd_iommu_bind_pasid); + +void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid) +{ + struct device_state *dev_state; + u16 devid; + + might_sleep(); + + if (!amd_iommu_v2_supported()) + return; + + devid = device_id(pdev); + dev_state = get_device_state(devid); + if (dev_state == NULL) + return; + + if (pasid < 0 || pasid >= dev_state->max_pasids) + goto out; + + unbind_pasid(dev_state, pasid); + +out: + put_device_state(dev_state); +} +EXPORT_SYMBOL(amd_iommu_unbind_pasid); + +int amd_iommu_init_device(struct pci_dev *pdev, int pasids) +{ + struct device_state *dev_state; + unsigned long flags; + int ret, tmp; + u16 devid; + + might_sleep(); + + if (!amd_iommu_v2_supported()) + return -ENODEV; + + if (pasids <= 0 || pasids > (PASID_MASK + 1)) + return -EINVAL; + + devid = device_id(pdev); + + dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL); + if (dev_state == NULL) + return -ENOMEM; + + spin_lock_init(&dev_state->lock); + init_waitqueue_head(&dev_state->wq); + dev_state->pdev = pdev; + + tmp = pasids; + for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9) + dev_state->pasid_levels += 1; + + atomic_set(&dev_state->count, 1); + dev_state->max_pasids = pasids; + + ret = -ENOMEM; + dev_state->states = (void *)get_zeroed_page(GFP_KERNEL); + if (dev_state->states == NULL) + goto out_free_dev_state; + + dev_state->domain = iommu_domain_alloc(&pci_bus_type); + if (dev_state->domain == NULL) + goto out_free_states; + + amd_iommu_domain_direct_map(dev_state->domain); + + ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids); + if (ret) + goto out_free_domain; + + ret = iommu_attach_device(dev_state->domain, &pdev->dev); + if (ret != 0) + goto out_free_domain; + + spin_lock_irqsave(&state_lock, flags); + + if (state_table[devid] != NULL) { + spin_unlock_irqrestore(&state_lock, flags); + ret = -EBUSY; + goto out_free_domain; + } + + state_table[devid] = dev_state; + + spin_unlock_irqrestore(&state_lock, flags); + + return 0; + +out_free_domain: + iommu_domain_free(dev_state->domain); + +out_free_states: + free_page((unsigned long)dev_state->states); + +out_free_dev_state: + kfree(dev_state); + + return ret; +} +EXPORT_SYMBOL(amd_iommu_init_device); + +void amd_iommu_free_device(struct pci_dev *pdev) +{ + struct device_state *dev_state; + unsigned long flags; + u16 devid; + + if (!amd_iommu_v2_supported()) + return; + + devid = device_id(pdev); + + spin_lock_irqsave(&state_lock, flags); + + dev_state = state_table[devid]; + if (dev_state == NULL) { + spin_unlock_irqrestore(&state_lock, flags); + return; + } + + state_table[devid] = NULL; + + spin_unlock_irqrestore(&state_lock, flags); + + /* Get rid of any remaining pasid states */ + free_pasid_states(dev_state); + + put_device_state_wait(dev_state); +} +EXPORT_SYMBOL(amd_iommu_free_device); + +int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev, + amd_iommu_invalid_ppr_cb cb) +{ + struct device_state *dev_state; + unsigned long flags; + u16 devid; + int ret; + + if (!amd_iommu_v2_supported()) + return -ENODEV; + + devid = device_id(pdev); + + spin_lock_irqsave(&state_lock, flags); + + ret = -EINVAL; + dev_state = state_table[devid]; + if (dev_state == NULL) + goto out_unlock; + + dev_state->inv_ppr_cb = cb; + + ret = 0; + +out_unlock: + spin_unlock_irqrestore(&state_lock, flags); + + return ret; +} +EXPORT_SYMBOL(amd_iommu_set_invalid_ppr_cb); + +int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev, + amd_iommu_invalidate_ctx cb) +{ + struct device_state *dev_state; + unsigned long flags; + u16 devid; + int ret; + + if (!amd_iommu_v2_supported()) + return -ENODEV; + + devid = device_id(pdev); + + spin_lock_irqsave(&state_lock, flags); + + ret = -EINVAL; + dev_state = state_table[devid]; + if (dev_state == NULL) + goto out_unlock; + + dev_state->inv_ctx_cb = cb; + + ret = 0; + +out_unlock: + spin_unlock_irqrestore(&state_lock, flags); + + return ret; +} +EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb); + +static int __init amd_iommu_v2_init(void) +{ + size_t state_table_size; + int ret; + + pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>"); + + spin_lock_init(&state_lock); + + state_table_size = MAX_DEVICES * sizeof(struct device_state *); + state_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(state_table_size)); + if (state_table == NULL) + return -ENOMEM; + + ret = -ENOMEM; + iommu_wq = create_workqueue("amd_iommu_v2"); + if (iommu_wq == NULL) + goto out_free; + + ret = -ENOMEM; + empty_page_table = (u64 *)get_zeroed_page(GFP_KERNEL); + if (empty_page_table == NULL) + goto out_destroy_wq; + + amd_iommu_register_ppr_notifier(&ppr_nb); + profile_event_register(PROFILE_TASK_EXIT, &profile_nb); + + return 0; + +out_destroy_wq: + destroy_workqueue(iommu_wq); + +out_free: + free_pages((unsigned long)state_table, get_order(state_table_size)); + + return ret; +} + +static void __exit amd_iommu_v2_exit(void) +{ + struct device_state *dev_state; + size_t state_table_size; + int i; + + profile_event_unregister(PROFILE_TASK_EXIT, &profile_nb); + amd_iommu_unregister_ppr_notifier(&ppr_nb); + + flush_workqueue(iommu_wq); + + /* + * The loop below might call flush_workqueue(), so call + * destroy_workqueue() after it + */ + for (i = 0; i < MAX_DEVICES; ++i) { + dev_state = get_device_state(i); + + if (dev_state == NULL) + continue; + + WARN_ON_ONCE(1); + + put_device_state(dev_state); + amd_iommu_free_device(dev_state->pdev); + } + + destroy_workqueue(iommu_wq); + + state_table_size = MAX_DEVICES * sizeof(struct device_state *); + free_pages((unsigned long)state_table, get_order(state_table_size)); + + free_page((unsigned long)empty_page_table); +} + +module_init(amd_iommu_v2_init); +module_exit(amd_iommu_v2_exit); diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 6dcc7e2d54de..35c1e17fce1d 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -46,7 +46,7 @@ */ LIST_HEAD(dmar_drhd_units); -static struct acpi_table_header * __initdata dmar_tbl; +struct acpi_table_header * __initdata dmar_tbl; static acpi_size dmar_tbl_size; static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd) @@ -118,8 +118,8 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, return 0; } -static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt, - struct pci_dev ***devices, u16 segment) +int __init dmar_parse_dev_scope(void *start, void *end, int *cnt, + struct pci_dev ***devices, u16 segment) { struct acpi_dmar_device_scope *scope; void * tmp = start; @@ -217,133 +217,6 @@ static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru) return ret; } -#ifdef CONFIG_DMAR -LIST_HEAD(dmar_rmrr_units); - -static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr) -{ - list_add(&rmrr->list, &dmar_rmrr_units); -} - - -static int __init -dmar_parse_one_rmrr(struct acpi_dmar_header *header) -{ - struct acpi_dmar_reserved_memory *rmrr; - struct dmar_rmrr_unit *rmrru; - - rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); - if (!rmrru) - return -ENOMEM; - - rmrru->hdr = header; - rmrr = (struct acpi_dmar_reserved_memory *)header; - rmrru->base_address = rmrr->base_address; - rmrru->end_address = rmrr->end_address; - - dmar_register_rmrr_unit(rmrru); - return 0; -} - -static int __init -rmrr_parse_dev(struct dmar_rmrr_unit *rmrru) -{ - struct acpi_dmar_reserved_memory *rmrr; - int ret; - - rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr; - ret = dmar_parse_dev_scope((void *)(rmrr + 1), - ((void *)rmrr) + rmrr->header.length, - &rmrru->devices_cnt, &rmrru->devices, rmrr->segment); - - if (ret || (rmrru->devices_cnt == 0)) { - list_del(&rmrru->list); - kfree(rmrru); - } - return ret; -} - -static LIST_HEAD(dmar_atsr_units); - -static int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr) -{ - struct acpi_dmar_atsr *atsr; - struct dmar_atsr_unit *atsru; - - atsr = container_of(hdr, struct acpi_dmar_atsr, header); - atsru = kzalloc(sizeof(*atsru), GFP_KERNEL); - if (!atsru) - return -ENOMEM; - - atsru->hdr = hdr; - atsru->include_all = atsr->flags & 0x1; - - list_add(&atsru->list, &dmar_atsr_units); - - return 0; -} - -static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru) -{ - int rc; - struct acpi_dmar_atsr *atsr; - - if (atsru->include_all) - return 0; - - atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); - rc = dmar_parse_dev_scope((void *)(atsr + 1), - (void *)atsr + atsr->header.length, - &atsru->devices_cnt, &atsru->devices, - atsr->segment); - if (rc || !atsru->devices_cnt) { - list_del(&atsru->list); - kfree(atsru); - } - - return rc; -} - -int dmar_find_matched_atsr_unit(struct pci_dev *dev) -{ - int i; - struct pci_bus *bus; - struct acpi_dmar_atsr *atsr; - struct dmar_atsr_unit *atsru; - - dev = pci_physfn(dev); - - list_for_each_entry(atsru, &dmar_atsr_units, list) { - atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); - if (atsr->segment == pci_domain_nr(dev->bus)) - goto found; - } - - return 0; - -found: - for (bus = dev->bus; bus; bus = bus->parent) { - struct pci_dev *bridge = bus->self; - - if (!bridge || !pci_is_pcie(bridge) || - bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE) - return 0; - - if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) { - for (i = 0; i < atsru->devices_cnt; i++) - if (atsru->devices[i] == bridge) - return 1; - break; - } - } - - if (atsru->include_all) - return 1; - - return 0; -} -#endif - #ifdef CONFIG_ACPI_NUMA static int __init dmar_parse_one_rhsa(struct acpi_dmar_header *header) @@ -484,14 +357,10 @@ parse_dmar_table(void) ret = dmar_parse_one_drhd(entry_header); break; case ACPI_DMAR_TYPE_RESERVED_MEMORY: -#ifdef CONFIG_DMAR ret = dmar_parse_one_rmrr(entry_header); -#endif break; case ACPI_DMAR_TYPE_ATSR: -#ifdef CONFIG_DMAR ret = dmar_parse_one_atsr(entry_header); -#endif break; case ACPI_DMAR_HARDWARE_AFFINITY: #ifdef CONFIG_ACPI_NUMA @@ -557,34 +426,31 @@ dmar_find_matched_drhd_unit(struct pci_dev *dev) int __init dmar_dev_scope_init(void) { + static int dmar_dev_scope_initialized; struct dmar_drhd_unit *drhd, *drhd_n; int ret = -ENODEV; + if (dmar_dev_scope_initialized) + return dmar_dev_scope_initialized; + + if (list_empty(&dmar_drhd_units)) + goto fail; + list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) { ret = dmar_parse_dev(drhd); if (ret) - return ret; + goto fail; } -#ifdef CONFIG_DMAR - { - struct dmar_rmrr_unit *rmrr, *rmrr_n; - struct dmar_atsr_unit *atsr, *atsr_n; - - list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) { - ret = rmrr_parse_dev(rmrr); - if (ret) - return ret; - } + ret = dmar_parse_rmrr_atsr_dev(); + if (ret) + goto fail; - list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) { - ret = atsr_parse_dev(atsr); - if (ret) - return ret; - } - } -#endif + dmar_dev_scope_initialized = 1; + return 0; +fail: + dmar_dev_scope_initialized = ret; return ret; } @@ -611,14 +477,6 @@ int __init dmar_table_init(void) return -ENODEV; } -#ifdef CONFIG_DMAR - if (list_empty(&dmar_rmrr_units)) - printk(KERN_INFO PREFIX "No RMRR found\n"); - - if (list_empty(&dmar_atsr_units)) - printk(KERN_INFO PREFIX "No ATSR found\n"); -#endif - return 0; } @@ -682,9 +540,6 @@ int __init check_zero_address(void) return 1; failed: -#ifdef CONFIG_DMAR - dmar_disabled = 1; -#endif return 0; } @@ -696,22 +551,21 @@ int __init detect_intel_iommu(void) if (ret) ret = check_zero_address(); { -#ifdef CONFIG_INTR_REMAP struct acpi_table_dmar *dmar; dmar = (struct acpi_table_dmar *) dmar_tbl; - if (ret && cpu_has_x2apic && dmar->flags & 0x1) + + if (ret && intr_remapping_enabled && cpu_has_x2apic && + dmar->flags & 0x1) printk(KERN_INFO - "Queued invalidation will be enabled to support " - "x2apic and Intr-remapping.\n"); -#endif -#ifdef CONFIG_DMAR + "Queued invalidation will be enabled to support x2apic and Intr-remapping.\n"); + if (ret && !no_iommu && !iommu_detected && !dmar_disabled) { iommu_detected = 1; /* Make sure ACS will be enabled */ pci_request_acs(); } -#endif + #ifdef CONFIG_X86 if (ret) x86_init.iommu.iommu_init = intel_iommu_init; @@ -758,7 +612,6 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) goto err_unmap; } -#ifdef CONFIG_DMAR agaw = iommu_calculate_agaw(iommu); if (agaw < 0) { printk(KERN_ERR @@ -773,7 +626,6 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->seq_id); goto err_unmap; } -#endif iommu->agaw = agaw; iommu->msagaw = msagaw; @@ -800,7 +652,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) (unsigned long long)iommu->cap, (unsigned long long)iommu->ecap); - spin_lock_init(&iommu->register_lock); + raw_spin_lock_init(&iommu->register_lock); drhd->iommu = iommu; return 0; @@ -817,9 +669,7 @@ void free_iommu(struct intel_iommu *iommu) if (!iommu) return; -#ifdef CONFIG_DMAR free_dmar_iommu(iommu); -#endif if (iommu->reg) iounmap(iommu->reg); @@ -921,11 +771,11 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) restart: rc = 0; - spin_lock_irqsave(&qi->q_lock, flags); + raw_spin_lock_irqsave(&qi->q_lock, flags); while (qi->free_cnt < 3) { - spin_unlock_irqrestore(&qi->q_lock, flags); + raw_spin_unlock_irqrestore(&qi->q_lock, flags); cpu_relax(); - spin_lock_irqsave(&qi->q_lock, flags); + raw_spin_lock_irqsave(&qi->q_lock, flags); } index = qi->free_head; @@ -965,15 +815,15 @@ restart: if (rc) break; - spin_unlock(&qi->q_lock); + raw_spin_unlock(&qi->q_lock); cpu_relax(); - spin_lock(&qi->q_lock); + raw_spin_lock(&qi->q_lock); } qi->desc_status[index] = QI_DONE; reclaim_free_desc(qi); - spin_unlock_irqrestore(&qi->q_lock, flags); + raw_spin_unlock_irqrestore(&qi->q_lock, flags); if (rc == -EAGAIN) goto restart; @@ -1062,7 +912,7 @@ void dmar_disable_qi(struct intel_iommu *iommu) if (!ecap_qis(iommu->ecap)) return; - spin_lock_irqsave(&iommu->register_lock, flags); + raw_spin_lock_irqsave(&iommu->register_lock, flags); sts = dmar_readq(iommu->reg + DMAR_GSTS_REG); if (!(sts & DMA_GSTS_QIES)) @@ -1082,7 +932,7 @@ void dmar_disable_qi(struct intel_iommu *iommu) IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, !(sts & DMA_GSTS_QIES), sts); end: - spin_unlock_irqrestore(&iommu->register_lock, flags); + raw_spin_unlock_irqrestore(&iommu->register_lock, flags); } /* @@ -1097,7 +947,7 @@ static void __dmar_enable_qi(struct intel_iommu *iommu) qi->free_head = qi->free_tail = 0; qi->free_cnt = QI_LENGTH; - spin_lock_irqsave(&iommu->register_lock, flags); + raw_spin_lock_irqsave(&iommu->register_lock, flags); /* write zero to the tail reg */ writel(0, iommu->reg + DMAR_IQT_REG); @@ -1110,7 +960,7 @@ static void __dmar_enable_qi(struct intel_iommu *iommu) /* Make sure hardware complete it */ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts); - spin_unlock_irqrestore(&iommu->register_lock, flags); + raw_spin_unlock_irqrestore(&iommu->register_lock, flags); } /* @@ -1159,7 +1009,7 @@ int dmar_enable_qi(struct intel_iommu *iommu) qi->free_head = qi->free_tail = 0; qi->free_cnt = QI_LENGTH; - spin_lock_init(&qi->q_lock); + raw_spin_lock_init(&qi->q_lock); __dmar_enable_qi(iommu); @@ -1225,11 +1075,11 @@ void dmar_msi_unmask(struct irq_data *data) unsigned long flag; /* unmask it */ - spin_lock_irqsave(&iommu->register_lock, flag); + raw_spin_lock_irqsave(&iommu->register_lock, flag); writel(0, iommu->reg + DMAR_FECTL_REG); /* Read a reg to force flush the post write */ readl(iommu->reg + DMAR_FECTL_REG); - spin_unlock_irqrestore(&iommu->register_lock, flag); + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } void dmar_msi_mask(struct irq_data *data) @@ -1238,11 +1088,11 @@ void dmar_msi_mask(struct irq_data *data) struct intel_iommu *iommu = irq_data_get_irq_handler_data(data); /* mask it */ - spin_lock_irqsave(&iommu->register_lock, flag); + raw_spin_lock_irqsave(&iommu->register_lock, flag); writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG); /* Read a reg to force flush the post write */ readl(iommu->reg + DMAR_FECTL_REG); - spin_unlock_irqrestore(&iommu->register_lock, flag); + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } void dmar_msi_write(int irq, struct msi_msg *msg) @@ -1250,11 +1100,11 @@ void dmar_msi_write(int irq, struct msi_msg *msg) struct intel_iommu *iommu = irq_get_handler_data(irq); unsigned long flag; - spin_lock_irqsave(&iommu->register_lock, flag); + raw_spin_lock_irqsave(&iommu->register_lock, flag); writel(msg->data, iommu->reg + DMAR_FEDATA_REG); writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG); writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG); - spin_unlock_irqrestore(&iommu->register_lock, flag); + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } void dmar_msi_read(int irq, struct msi_msg *msg) @@ -1262,11 +1112,11 @@ void dmar_msi_read(int irq, struct msi_msg *msg) struct intel_iommu *iommu = irq_get_handler_data(irq); unsigned long flag; - spin_lock_irqsave(&iommu->register_lock, flag); + raw_spin_lock_irqsave(&iommu->register_lock, flag); msg->data = readl(iommu->reg + DMAR_FEDATA_REG); msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG); msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG); - spin_unlock_irqrestore(&iommu->register_lock, flag); + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } static int dmar_fault_do_one(struct intel_iommu *iommu, int type, @@ -1303,7 +1153,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id) u32 fault_status; unsigned long flag; - spin_lock_irqsave(&iommu->register_lock, flag); + raw_spin_lock_irqsave(&iommu->register_lock, flag); fault_status = readl(iommu->reg + DMAR_FSTS_REG); if (fault_status) printk(KERN_ERR "DRHD: handling fault status reg %x\n", @@ -1342,7 +1192,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id) writel(DMA_FRCD_F, iommu->reg + reg + fault_index * PRIMARY_FAULT_REG_LEN + 12); - spin_unlock_irqrestore(&iommu->register_lock, flag); + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); dmar_fault_do_one(iommu, type, fault_reason, source_id, guest_addr); @@ -1350,14 +1200,14 @@ irqreturn_t dmar_fault(int irq, void *dev_id) fault_index++; if (fault_index >= cap_num_fault_regs(iommu->cap)) fault_index = 0; - spin_lock_irqsave(&iommu->register_lock, flag); + raw_spin_lock_irqsave(&iommu->register_lock, flag); } clear_rest: /* clear all the other faults */ fault_status = readl(iommu->reg + DMAR_FSTS_REG); writel(fault_status, iommu->reg + DMAR_FSTS_REG); - spin_unlock_irqrestore(&iommu->register_lock, flag); + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); return IRQ_HANDLED; } diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index a88f3cbb100b..77e3b917c8da 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -24,6 +24,7 @@ #include <linux/init.h> #include <linux/bitmap.h> #include <linux/debugfs.h> +#include <linux/export.h> #include <linux/slab.h> #include <linux/irq.h> #include <linux/interrupt.h> @@ -77,6 +78,24 @@ #define LEVEL_STRIDE (9) #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1) +/* + * This bitmap is used to advertise the page sizes our hardware support + * to the IOMMU core, which will then use this information to split + * physically contiguous memory regions it is mapping into page sizes + * that we support. + * + * Traditionally the IOMMU core just handed us the mappings directly, + * after making sure the size is an order of a 4KiB page and that the + * mapping has natural alignment. + * + * To retain this behavior, we currently advertise that we support + * all page sizes that are an order of 4KiB. + * + * If at some point we'd like to utilize the IOMMU core's new behavior, + * we could change this to advertise the real page sizes we support. + */ +#define INTEL_IOMMU_PGSIZES (~0xFFFUL) + static inline int agaw_to_level(int agaw) { return agaw + 2; @@ -398,11 +417,14 @@ static long list_size; static void domain_remove_dev_info(struct dmar_domain *domain); -#ifdef CONFIG_DMAR_DEFAULT_ON +#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON int dmar_disabled = 0; #else int dmar_disabled = 1; -#endif /*CONFIG_DMAR_DEFAULT_ON*/ +#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/ + +int intel_iommu_enabled = 0; +EXPORT_SYMBOL_GPL(intel_iommu_enabled); static int dmar_map_gfx = 1; static int dmar_forcedac; @@ -939,7 +961,7 @@ static void iommu_set_root_entry(struct intel_iommu *iommu) addr = iommu->root_entry; - spin_lock_irqsave(&iommu->register_lock, flag); + raw_spin_lock_irqsave(&iommu->register_lock, flag); dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr)); writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG); @@ -948,7 +970,7 @@ static void iommu_set_root_entry(struct intel_iommu *iommu) IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_RTPS), sts); - spin_unlock_irqrestore(&iommu->register_lock, flag); + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } static void iommu_flush_write_buffer(struct intel_iommu *iommu) @@ -959,14 +981,14 @@ static void iommu_flush_write_buffer(struct intel_iommu *iommu) if (!rwbf_quirk && !cap_rwbf(iommu->cap)) return; - spin_lock_irqsave(&iommu->register_lock, flag); + raw_spin_lock_irqsave(&iommu->register_lock, flag); writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG); /* Make sure hardware complete it */ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (!(val & DMA_GSTS_WBFS)), val); - spin_unlock_irqrestore(&iommu->register_lock, flag); + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } /* return value determine if we need a write buffer flush */ @@ -993,14 +1015,14 @@ static void __iommu_flush_context(struct intel_iommu *iommu, } val |= DMA_CCMD_ICC; - spin_lock_irqsave(&iommu->register_lock, flag); + raw_spin_lock_irqsave(&iommu->register_lock, flag); dmar_writeq(iommu->reg + DMAR_CCMD_REG, val); /* Make sure hardware complete it */ IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG, dmar_readq, (!(val & DMA_CCMD_ICC)), val); - spin_unlock_irqrestore(&iommu->register_lock, flag); + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } /* return value determine if we need a write buffer flush */ @@ -1039,7 +1061,7 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, if (cap_write_drain(iommu->cap)) val |= DMA_TLB_WRITE_DRAIN; - spin_lock_irqsave(&iommu->register_lock, flag); + raw_spin_lock_irqsave(&iommu->register_lock, flag); /* Note: Only uses first TLB reg currently */ if (val_iva) dmar_writeq(iommu->reg + tlb_offset, val_iva); @@ -1049,7 +1071,7 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, IOMMU_WAIT_OP(iommu, tlb_offset + 8, dmar_readq, (!(val & DMA_TLB_IVT)), val); - spin_unlock_irqrestore(&iommu->register_lock, flag); + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); /* check IOTLB invalidation granularity */ if (DMA_TLB_IAIG(val) == 0) @@ -1165,7 +1187,7 @@ static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) u32 pmen; unsigned long flags; - spin_lock_irqsave(&iommu->register_lock, flags); + raw_spin_lock_irqsave(&iommu->register_lock, flags); pmen = readl(iommu->reg + DMAR_PMEN_REG); pmen &= ~DMA_PMEN_EPM; writel(pmen, iommu->reg + DMAR_PMEN_REG); @@ -1174,7 +1196,7 @@ static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG, readl, !(pmen & DMA_PMEN_PRS), pmen); - spin_unlock_irqrestore(&iommu->register_lock, flags); + raw_spin_unlock_irqrestore(&iommu->register_lock, flags); } static int iommu_enable_translation(struct intel_iommu *iommu) @@ -1182,7 +1204,7 @@ static int iommu_enable_translation(struct intel_iommu *iommu) u32 sts; unsigned long flags; - spin_lock_irqsave(&iommu->register_lock, flags); + raw_spin_lock_irqsave(&iommu->register_lock, flags); iommu->gcmd |= DMA_GCMD_TE; writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); @@ -1190,7 +1212,7 @@ static int iommu_enable_translation(struct intel_iommu *iommu) IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_TES), sts); - spin_unlock_irqrestore(&iommu->register_lock, flags); + raw_spin_unlock_irqrestore(&iommu->register_lock, flags); return 0; } @@ -1199,7 +1221,7 @@ static int iommu_disable_translation(struct intel_iommu *iommu) u32 sts; unsigned long flag; - spin_lock_irqsave(&iommu->register_lock, flag); + raw_spin_lock_irqsave(&iommu->register_lock, flag); iommu->gcmd &= ~DMA_GCMD_TE; writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); @@ -1207,7 +1229,7 @@ static int iommu_disable_translation(struct intel_iommu *iommu) IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (!(sts & DMA_GSTS_TES)), sts); - spin_unlock_irqrestore(&iommu->register_lock, flag); + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); return 0; } @@ -2157,7 +2179,7 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, rmrr->end_address); } -#ifdef CONFIG_DMAR_FLOPPY_WA +#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA static inline void iommu_prepare_isa(void) { struct pci_dev *pdev; @@ -2180,7 +2202,7 @@ static inline void iommu_prepare_isa(void) { return; } -#endif /* !CONFIG_DMAR_FLPY_WA */ +#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */ static int md_domain_init(struct dmar_domain *domain, int guest_width); @@ -2491,7 +2513,7 @@ static int __init init_dmars(void) if (iommu_pass_through) iommu_identity_mapping |= IDENTMAP_ALL; -#ifdef CONFIG_DMAR_BROKEN_GFX_WA +#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA iommu_identity_mapping |= IDENTMAP_GFX; #endif @@ -3329,7 +3351,7 @@ static int iommu_suspend(void) for_each_active_iommu(iommu, drhd) { iommu_disable_translation(iommu); - spin_lock_irqsave(&iommu->register_lock, flag); + raw_spin_lock_irqsave(&iommu->register_lock, flag); iommu->iommu_state[SR_DMAR_FECTL_REG] = readl(iommu->reg + DMAR_FECTL_REG); @@ -3340,7 +3362,7 @@ static int iommu_suspend(void) iommu->iommu_state[SR_DMAR_FEUADDR_REG] = readl(iommu->reg + DMAR_FEUADDR_REG); - spin_unlock_irqrestore(&iommu->register_lock, flag); + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } return 0; @@ -3367,7 +3389,7 @@ static void iommu_resume(void) for_each_active_iommu(iommu, drhd) { - spin_lock_irqsave(&iommu->register_lock, flag); + raw_spin_lock_irqsave(&iommu->register_lock, flag); writel(iommu->iommu_state[SR_DMAR_FECTL_REG], iommu->reg + DMAR_FECTL_REG); @@ -3378,7 +3400,7 @@ static void iommu_resume(void) writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG], iommu->reg + DMAR_FEUADDR_REG); - spin_unlock_irqrestore(&iommu->register_lock, flag); + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } for_each_active_iommu(iommu, drhd) @@ -3399,6 +3421,151 @@ static void __init init_iommu_pm_ops(void) static inline void init_iommu_pm_ops(void) {} #endif /* CONFIG_PM */ +LIST_HEAD(dmar_rmrr_units); + +static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr) +{ + list_add(&rmrr->list, &dmar_rmrr_units); +} + + +int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header) +{ + struct acpi_dmar_reserved_memory *rmrr; + struct dmar_rmrr_unit *rmrru; + + rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); + if (!rmrru) + return -ENOMEM; + + rmrru->hdr = header; + rmrr = (struct acpi_dmar_reserved_memory *)header; + rmrru->base_address = rmrr->base_address; + rmrru->end_address = rmrr->end_address; + + dmar_register_rmrr_unit(rmrru); + return 0; +} + +static int __init +rmrr_parse_dev(struct dmar_rmrr_unit *rmrru) +{ + struct acpi_dmar_reserved_memory *rmrr; + int ret; + + rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr; + ret = dmar_parse_dev_scope((void *)(rmrr + 1), + ((void *)rmrr) + rmrr->header.length, + &rmrru->devices_cnt, &rmrru->devices, rmrr->segment); + + if (ret || (rmrru->devices_cnt == 0)) { + list_del(&rmrru->list); + kfree(rmrru); + } + return ret; +} + +static LIST_HEAD(dmar_atsr_units); + +int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr) +{ + struct acpi_dmar_atsr *atsr; + struct dmar_atsr_unit *atsru; + + atsr = container_of(hdr, struct acpi_dmar_atsr, header); + atsru = kzalloc(sizeof(*atsru), GFP_KERNEL); + if (!atsru) + return -ENOMEM; + + atsru->hdr = hdr; + atsru->include_all = atsr->flags & 0x1; + + list_add(&atsru->list, &dmar_atsr_units); + + return 0; +} + +static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru) +{ + int rc; + struct acpi_dmar_atsr *atsr; + + if (atsru->include_all) + return 0; + + atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); + rc = dmar_parse_dev_scope((void *)(atsr + 1), + (void *)atsr + atsr->header.length, + &atsru->devices_cnt, &atsru->devices, + atsr->segment); + if (rc || !atsru->devices_cnt) { + list_del(&atsru->list); + kfree(atsru); + } + + return rc; +} + +int dmar_find_matched_atsr_unit(struct pci_dev *dev) +{ + int i; + struct pci_bus *bus; + struct acpi_dmar_atsr *atsr; + struct dmar_atsr_unit *atsru; + + dev = pci_physfn(dev); + + list_for_each_entry(atsru, &dmar_atsr_units, list) { + atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); + if (atsr->segment == pci_domain_nr(dev->bus)) + goto found; + } + + return 0; + +found: + for (bus = dev->bus; bus; bus = bus->parent) { + struct pci_dev *bridge = bus->self; + + if (!bridge || !pci_is_pcie(bridge) || + bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE) + return 0; + + if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) { + for (i = 0; i < atsru->devices_cnt; i++) + if (atsru->devices[i] == bridge) + return 1; + break; + } + } + + if (atsru->include_all) + return 1; + + return 0; +} + +int __init dmar_parse_rmrr_atsr_dev(void) +{ + struct dmar_rmrr_unit *rmrr, *rmrr_n; + struct dmar_atsr_unit *atsr, *atsr_n; + int ret = 0; + + list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) { + ret = rmrr_parse_dev(rmrr); + if (ret) + return ret; + } + + list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) { + ret = atsr_parse_dev(atsr); + if (ret) + return ret; + } + + return ret; +} + /* * Here we only respond to action of unbound device from driver. * @@ -3448,16 +3615,12 @@ int __init intel_iommu_init(void) return -ENODEV; } - if (dmar_dev_scope_init()) { + if (dmar_dev_scope_init() < 0) { if (force_on) panic("tboot: Failed to initialize DMAR device scope\n"); return -ENODEV; } - /* - * Check the need for DMA-remapping initialization now. - * Above initialization will also be used by Interrupt-remapping. - */ if (no_iommu || dmar_disabled) return -ENODEV; @@ -3467,6 +3630,12 @@ int __init intel_iommu_init(void) return -ENODEV; } + if (list_empty(&dmar_rmrr_units)) + printk(KERN_INFO "DMAR: No RMRR found\n"); + + if (list_empty(&dmar_atsr_units)) + printk(KERN_INFO "DMAR: No ATSR found\n"); + if (dmar_init_reserved_ranges()) { if (force_on) panic("tboot: Failed to reserve iommu ranges\n"); @@ -3495,10 +3664,12 @@ int __init intel_iommu_init(void) init_iommu_pm_ops(); - register_iommu(&intel_iommu_ops); + bus_set_iommu(&pci_bus_type, &intel_iommu_ops); bus_register_notifier(&pci_bus_type, &device_nb); + intel_iommu_enabled = 1; + return 0; } @@ -3831,12 +4002,11 @@ static void intel_iommu_detach_device(struct iommu_domain *domain, static int intel_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t hpa, - int gfp_order, int iommu_prot) + size_t size, int iommu_prot) { struct dmar_domain *dmar_domain = domain->priv; u64 max_addr; int prot = 0; - size_t size; int ret; if (iommu_prot & IOMMU_READ) @@ -3846,7 +4016,6 @@ static int intel_iommu_map(struct iommu_domain *domain, if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) prot |= DMA_PTE_SNP; - size = PAGE_SIZE << gfp_order; max_addr = iova + size; if (dmar_domain->max_addr < max_addr) { u64 end; @@ -3869,11 +4038,10 @@ static int intel_iommu_map(struct iommu_domain *domain, return ret; } -static int intel_iommu_unmap(struct iommu_domain *domain, - unsigned long iova, int gfp_order) +static size_t intel_iommu_unmap(struct iommu_domain *domain, + unsigned long iova, size_t size) { struct dmar_domain *dmar_domain = domain->priv; - size_t size = PAGE_SIZE << gfp_order; int order; order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT, @@ -3882,7 +4050,7 @@ static int intel_iommu_unmap(struct iommu_domain *domain, if (dmar_domain->max_addr == iova + size) dmar_domain->max_addr = iova; - return order; + return PAGE_SIZE << order; } static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, @@ -3912,6 +4080,54 @@ static int intel_iommu_domain_has_cap(struct iommu_domain *domain, return 0; } +/* + * Group numbers are arbitrary. Device with the same group number + * indicate the iommu cannot differentiate between them. To avoid + * tracking used groups we just use the seg|bus|devfn of the lowest + * level we're able to differentiate devices + */ +static int intel_iommu_device_group(struct device *dev, unsigned int *groupid) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_dev *bridge; + union { + struct { + u8 devfn; + u8 bus; + u16 segment; + } pci; + u32 group; + } id; + + if (iommu_no_mapping(dev)) + return -ENODEV; + + id.pci.segment = pci_domain_nr(pdev->bus); + id.pci.bus = pdev->bus->number; + id.pci.devfn = pdev->devfn; + + if (!device_to_iommu(id.pci.segment, id.pci.bus, id.pci.devfn)) + return -ENODEV; + + bridge = pci_find_upstream_pcie_bridge(pdev); + if (bridge) { + if (pci_is_pcie(bridge)) { + id.pci.bus = bridge->subordinate->number; + id.pci.devfn = 0; + } else { + id.pci.bus = bridge->bus->number; + id.pci.devfn = bridge->devfn; + } + } + + if (!pdev->is_virtfn && iommu_group_mf) + id.pci.devfn = PCI_DEVFN(PCI_SLOT(id.pci.devfn), 0); + + *groupid = id.group; + + return 0; +} + static struct iommu_ops intel_iommu_ops = { .domain_init = intel_iommu_domain_init, .domain_destroy = intel_iommu_domain_destroy, @@ -3921,6 +4137,8 @@ static struct iommu_ops intel_iommu_ops = { .unmap = intel_iommu_unmap, .iova_to_phys = intel_iommu_iova_to_phys, .domain_has_cap = intel_iommu_domain_has_cap, + .device_group = intel_iommu_device_group, + .pgsize_bitmap = INTEL_IOMMU_PGSIZES, }; static void __devinit quirk_iommu_rwbf(struct pci_dev *dev) diff --git a/drivers/iommu/intr_remapping.c b/drivers/iommu/intr_remapping.c index 1a89d4a2cadf..6777ca049471 100644 --- a/drivers/iommu/intr_remapping.c +++ b/drivers/iommu/intr_remapping.c @@ -21,6 +21,7 @@ int intr_remapping_enabled; static int disable_intremap; static int disable_sourceid_checking; +static int no_x2apic_optout; static __init int setup_nointremap(char *str) { @@ -34,18 +35,26 @@ static __init int setup_intremap(char *str) if (!str) return -EINVAL; - if (!strncmp(str, "on", 2)) - disable_intremap = 0; - else if (!strncmp(str, "off", 3)) - disable_intremap = 1; - else if (!strncmp(str, "nosid", 5)) - disable_sourceid_checking = 1; + while (*str) { + if (!strncmp(str, "on", 2)) + disable_intremap = 0; + else if (!strncmp(str, "off", 3)) + disable_intremap = 1; + else if (!strncmp(str, "nosid", 5)) + disable_sourceid_checking = 1; + else if (!strncmp(str, "no_x2apic_optout", 16)) + no_x2apic_optout = 1; + + str += strcspn(str, ","); + while (*str == ',') + str++; + } return 0; } early_param("intremap", setup_intremap); -static DEFINE_SPINLOCK(irq_2_ir_lock); +static DEFINE_RAW_SPINLOCK(irq_2_ir_lock); static struct irq_2_iommu *irq_2_iommu(unsigned int irq) { @@ -62,12 +71,12 @@ int get_irte(int irq, struct irte *entry) if (!entry || !irq_iommu) return -1; - spin_lock_irqsave(&irq_2_ir_lock, flags); + raw_spin_lock_irqsave(&irq_2_ir_lock, flags); index = irq_iommu->irte_index + irq_iommu->sub_handle; *entry = *(irq_iommu->iommu->ir_table->base + index); - spin_unlock_irqrestore(&irq_2_ir_lock, flags); + raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); return 0; } @@ -101,7 +110,7 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) return -1; } - spin_lock_irqsave(&irq_2_ir_lock, flags); + raw_spin_lock_irqsave(&irq_2_ir_lock, flags); do { for (i = index; i < index + count; i++) if (table->base[i].present) @@ -113,7 +122,7 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) index = (index + count) % INTR_REMAP_TABLE_ENTRIES; if (index == start_index) { - spin_unlock_irqrestore(&irq_2_ir_lock, flags); + raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); printk(KERN_ERR "can't allocate an IRTE\n"); return -1; } @@ -127,7 +136,7 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) irq_iommu->sub_handle = 0; irq_iommu->irte_mask = mask; - spin_unlock_irqrestore(&irq_2_ir_lock, flags); + raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); return index; } @@ -152,10 +161,10 @@ int map_irq_to_irte_handle(int irq, u16 *sub_handle) if (!irq_iommu) return -1; - spin_lock_irqsave(&irq_2_ir_lock, flags); + raw_spin_lock_irqsave(&irq_2_ir_lock, flags); *sub_handle = irq_iommu->sub_handle; index = irq_iommu->irte_index; - spin_unlock_irqrestore(&irq_2_ir_lock, flags); + raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); return index; } @@ -167,14 +176,14 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) if (!irq_iommu) return -1; - spin_lock_irqsave(&irq_2_ir_lock, flags); + raw_spin_lock_irqsave(&irq_2_ir_lock, flags); irq_iommu->iommu = iommu; irq_iommu->irte_index = index; irq_iommu->sub_handle = subhandle; irq_iommu->irte_mask = 0; - spin_unlock_irqrestore(&irq_2_ir_lock, flags); + raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); return 0; } @@ -190,7 +199,7 @@ int modify_irte(int irq, struct irte *irte_modified) if (!irq_iommu) return -1; - spin_lock_irqsave(&irq_2_ir_lock, flags); + raw_spin_lock_irqsave(&irq_2_ir_lock, flags); iommu = irq_iommu->iommu; @@ -202,7 +211,7 @@ int modify_irte(int irq, struct irte *irte_modified) __iommu_flush_cache(iommu, irte, sizeof(*irte)); rc = qi_flush_iec(iommu, index, 0); - spin_unlock_irqrestore(&irq_2_ir_lock, flags); + raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); return rc; } @@ -270,7 +279,7 @@ int free_irte(int irq) if (!irq_iommu) return -1; - spin_lock_irqsave(&irq_2_ir_lock, flags); + raw_spin_lock_irqsave(&irq_2_ir_lock, flags); rc = clear_entries(irq_iommu); @@ -279,7 +288,7 @@ int free_irte(int irq) irq_iommu->sub_handle = 0; irq_iommu->irte_mask = 0; - spin_unlock_irqrestore(&irq_2_ir_lock, flags); + raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); return rc; } @@ -409,7 +418,7 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) addr = virt_to_phys((void *)iommu->ir_table->base); - spin_lock_irqsave(&iommu->register_lock, flags); + raw_spin_lock_irqsave(&iommu->register_lock, flags); dmar_writeq(iommu->reg + DMAR_IRTA_REG, (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE); @@ -420,7 +429,7 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_IRTPS), sts); - spin_unlock_irqrestore(&iommu->register_lock, flags); + raw_spin_unlock_irqrestore(&iommu->register_lock, flags); /* * global invalidation of interrupt entry cache before enabling @@ -428,7 +437,7 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) */ qi_global_iec(iommu); - spin_lock_irqsave(&iommu->register_lock, flags); + raw_spin_lock_irqsave(&iommu->register_lock, flags); /* Enable interrupt-remapping */ iommu->gcmd |= DMA_GCMD_IRE; @@ -437,7 +446,7 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_IRES), sts); - spin_unlock_irqrestore(&iommu->register_lock, flags); + raw_spin_unlock_irqrestore(&iommu->register_lock, flags); } @@ -485,7 +494,7 @@ static void iommu_disable_intr_remapping(struct intel_iommu *iommu) */ qi_global_iec(iommu); - spin_lock_irqsave(&iommu->register_lock, flags); + raw_spin_lock_irqsave(&iommu->register_lock, flags); sts = dmar_readq(iommu->reg + DMAR_GSTS_REG); if (!(sts & DMA_GSTS_IRES)) @@ -498,7 +507,16 @@ static void iommu_disable_intr_remapping(struct intel_iommu *iommu) readl, !(sts & DMA_GSTS_IRES), sts); end: - spin_unlock_irqrestore(&iommu->register_lock, flags); + raw_spin_unlock_irqrestore(&iommu->register_lock, flags); +} + +static int __init dmar_x2apic_optout(void) +{ + struct acpi_table_dmar *dmar; + dmar = (struct acpi_table_dmar *)dmar_tbl; + if (!dmar || no_x2apic_optout) + return 0; + return dmar->flags & DMAR_X2APIC_OPT_OUT; } int __init intr_remapping_supported(void) @@ -521,16 +539,25 @@ int __init intr_remapping_supported(void) return 1; } -int __init enable_intr_remapping(int eim) +int __init enable_intr_remapping(void) { struct dmar_drhd_unit *drhd; int setup = 0; + int eim = 0; if (parse_ioapics_under_ir() != 1) { printk(KERN_INFO "Not enable interrupt remapping\n"); return -1; } + if (x2apic_supported()) { + eim = !dmar_x2apic_optout(); + WARN(!eim, KERN_WARNING + "Your BIOS is broken and requested that x2apic be disabled\n" + "This will leave your machine vulnerable to irq-injection attacks\n" + "Use 'intremap=no_x2apic_optout' to override BIOS request\n"); + } + for_each_drhd_unit(drhd) { struct intel_iommu *iommu = drhd->iommu; @@ -606,8 +633,9 @@ int __init enable_intr_remapping(int eim) goto error; intr_remapping_enabled = 1; + pr_info("Enabled IRQ remapping in %s mode\n", eim ? "x2apic" : "xapic"); - return 0; + return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE; error: /* @@ -745,6 +773,15 @@ int __init parse_ioapics_under_ir(void) return ir_supported; } +int __init ir_dev_scope_init(void) +{ + if (!intr_remapping_enabled) + return 0; + + return dmar_dev_scope_init(); +} +rootfs_initcall(ir_dev_scope_init); + void disable_intr_remapping(void) { struct dmar_drhd_unit *drhd; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 6e6b6a11b3ce..2198b2dbbcd3 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -16,6 +16,10 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#define pr_fmt(fmt) "%s: " fmt, __func__ + +#include <linux/device.h> +#include <linux/kernel.h> #include <linux/bug.h> #include <linux/types.h> #include <linux/module.h> @@ -23,32 +27,129 @@ #include <linux/errno.h> #include <linux/iommu.h> -static struct iommu_ops *iommu_ops; +static ssize_t show_iommu_group(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned int groupid; + + if (iommu_device_group(dev, &groupid)) + return 0; + + return sprintf(buf, "%u", groupid); +} +static DEVICE_ATTR(iommu_group, S_IRUGO, show_iommu_group, NULL); + +static int add_iommu_group(struct device *dev, void *data) +{ + unsigned int groupid; + + if (iommu_device_group(dev, &groupid) == 0) + return device_create_file(dev, &dev_attr_iommu_group); + + return 0; +} + +static int remove_iommu_group(struct device *dev) +{ + unsigned int groupid; + + if (iommu_device_group(dev, &groupid) == 0) + device_remove_file(dev, &dev_attr_iommu_group); + + return 0; +} + +static int iommu_device_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + + if (action == BUS_NOTIFY_ADD_DEVICE) + return add_iommu_group(dev, NULL); + else if (action == BUS_NOTIFY_DEL_DEVICE) + return remove_iommu_group(dev); + + return 0; +} -void register_iommu(struct iommu_ops *ops) +static struct notifier_block iommu_device_nb = { + .notifier_call = iommu_device_notifier, +}; + +static void iommu_bus_init(struct bus_type *bus, struct iommu_ops *ops) +{ + bus_register_notifier(bus, &iommu_device_nb); + bus_for_each_dev(bus, NULL, NULL, add_iommu_group); +} + +/** + * bus_set_iommu - set iommu-callbacks for the bus + * @bus: bus. + * @ops: the callbacks provided by the iommu-driver + * + * This function is called by an iommu driver to set the iommu methods + * used for a particular bus. Drivers for devices on that bus can use + * the iommu-api after these ops are registered. + * This special function is needed because IOMMUs are usually devices on + * the bus itself, so the iommu drivers are not initialized when the bus + * is set up. With this function the iommu-driver can set the iommu-ops + * afterwards. + */ +int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops) { - if (iommu_ops) - BUG(); + if (bus->iommu_ops != NULL) + return -EBUSY; - iommu_ops = ops; + bus->iommu_ops = ops; + + /* Do IOMMU specific setup for this bus-type */ + iommu_bus_init(bus, ops); + + return 0; +} +EXPORT_SYMBOL_GPL(bus_set_iommu); + +bool iommu_present(struct bus_type *bus) +{ + return bus->iommu_ops != NULL; } +EXPORT_SYMBOL_GPL(iommu_present); -bool iommu_found(void) +/** + * iommu_set_fault_handler() - set a fault handler for an iommu domain + * @domain: iommu domain + * @handler: fault handler + * + * This function should be used by IOMMU users which want to be notified + * whenever an IOMMU fault happens. + * + * The fault handler itself should return 0 on success, and an appropriate + * error code otherwise. + */ +void iommu_set_fault_handler(struct iommu_domain *domain, + iommu_fault_handler_t handler) { - return iommu_ops != NULL; + BUG_ON(!domain); + + domain->handler = handler; } -EXPORT_SYMBOL_GPL(iommu_found); +EXPORT_SYMBOL_GPL(iommu_set_fault_handler); -struct iommu_domain *iommu_domain_alloc(void) +struct iommu_domain *iommu_domain_alloc(struct bus_type *bus) { struct iommu_domain *domain; int ret; - domain = kmalloc(sizeof(*domain), GFP_KERNEL); + if (bus == NULL || bus->iommu_ops == NULL) + return NULL; + + domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) return NULL; - ret = iommu_ops->domain_init(domain); + domain->ops = bus->iommu_ops; + + ret = domain->ops->domain_init(domain); if (ret) goto out_free; @@ -63,62 +164,180 @@ EXPORT_SYMBOL_GPL(iommu_domain_alloc); void iommu_domain_free(struct iommu_domain *domain) { - iommu_ops->domain_destroy(domain); + if (likely(domain->ops->domain_destroy != NULL)) + domain->ops->domain_destroy(domain); + kfree(domain); } EXPORT_SYMBOL_GPL(iommu_domain_free); int iommu_attach_device(struct iommu_domain *domain, struct device *dev) { - return iommu_ops->attach_dev(domain, dev); + if (unlikely(domain->ops->attach_dev == NULL)) + return -ENODEV; + + return domain->ops->attach_dev(domain, dev); } EXPORT_SYMBOL_GPL(iommu_attach_device); void iommu_detach_device(struct iommu_domain *domain, struct device *dev) { - iommu_ops->detach_dev(domain, dev); + if (unlikely(domain->ops->detach_dev == NULL)) + return; + + domain->ops->detach_dev(domain, dev); } EXPORT_SYMBOL_GPL(iommu_detach_device); phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, unsigned long iova) { - return iommu_ops->iova_to_phys(domain, iova); + if (unlikely(domain->ops->iova_to_phys == NULL)) + return 0; + + return domain->ops->iova_to_phys(domain, iova); } EXPORT_SYMBOL_GPL(iommu_iova_to_phys); int iommu_domain_has_cap(struct iommu_domain *domain, unsigned long cap) { - return iommu_ops->domain_has_cap(domain, cap); + if (unlikely(domain->ops->domain_has_cap == NULL)) + return 0; + + return domain->ops->domain_has_cap(domain, cap); } EXPORT_SYMBOL_GPL(iommu_domain_has_cap); int iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, int gfp_order, int prot) + phys_addr_t paddr, size_t size, int prot) { - unsigned long invalid_mask; - size_t size; + unsigned long orig_iova = iova; + unsigned int min_pagesz; + size_t orig_size = size; + int ret = 0; + + if (unlikely(domain->ops->map == NULL)) + return -ENODEV; + + /* find out the minimum page size supported */ + min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap); + + /* + * both the virtual address and the physical one, as well as + * the size of the mapping, must be aligned (at least) to the + * size of the smallest page supported by the hardware + */ + if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) { + pr_err("unaligned: iova 0x%lx pa 0x%lx size 0x%lx min_pagesz " + "0x%x\n", iova, (unsigned long)paddr, + (unsigned long)size, min_pagesz); + return -EINVAL; + } + + pr_debug("map: iova 0x%lx pa 0x%lx size 0x%lx\n", iova, + (unsigned long)paddr, (unsigned long)size); + + while (size) { + unsigned long pgsize, addr_merge = iova | paddr; + unsigned int pgsize_idx; + + /* Max page size that still fits into 'size' */ + pgsize_idx = __fls(size); + + /* need to consider alignment requirements ? */ + if (likely(addr_merge)) { + /* Max page size allowed by both iova and paddr */ + unsigned int align_pgsize_idx = __ffs(addr_merge); + + pgsize_idx = min(pgsize_idx, align_pgsize_idx); + } - size = 0x1000UL << gfp_order; - invalid_mask = size - 1; + /* build a mask of acceptable page sizes */ + pgsize = (1UL << (pgsize_idx + 1)) - 1; - BUG_ON((iova | paddr) & invalid_mask); + /* throw away page sizes not supported by the hardware */ + pgsize &= domain->ops->pgsize_bitmap; - return iommu_ops->map(domain, iova, paddr, gfp_order, prot); + /* make sure we're still sane */ + BUG_ON(!pgsize); + + /* pick the biggest page */ + pgsize_idx = __fls(pgsize); + pgsize = 1UL << pgsize_idx; + + pr_debug("mapping: iova 0x%lx pa 0x%lx pgsize %lu\n", iova, + (unsigned long)paddr, pgsize); + + ret = domain->ops->map(domain, iova, paddr, pgsize, prot); + if (ret) + break; + + iova += pgsize; + paddr += pgsize; + size -= pgsize; + } + + /* unroll mapping in case something went wrong */ + if (ret) + iommu_unmap(domain, orig_iova, orig_size - size); + + return ret; } EXPORT_SYMBOL_GPL(iommu_map); -int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order) +size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { - unsigned long invalid_mask; - size_t size; + size_t unmapped_page, unmapped = 0; + unsigned int min_pagesz; + + if (unlikely(domain->ops->unmap == NULL)) + return -ENODEV; + + /* find out the minimum page size supported */ + min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap); - size = 0x1000UL << gfp_order; - invalid_mask = size - 1; + /* + * The virtual address, as well as the size of the mapping, must be + * aligned (at least) to the size of the smallest page supported + * by the hardware + */ + if (!IS_ALIGNED(iova | size, min_pagesz)) { + pr_err("unaligned: iova 0x%lx size 0x%lx min_pagesz 0x%x\n", + iova, (unsigned long)size, min_pagesz); + return -EINVAL; + } - BUG_ON(iova & invalid_mask); + pr_debug("unmap this: iova 0x%lx size 0x%lx\n", iova, + (unsigned long)size); - return iommu_ops->unmap(domain, iova, gfp_order); + /* + * Keep iterating until we either unmap 'size' bytes (or more) + * or we hit an area that isn't mapped. + */ + while (unmapped < size) { + size_t left = size - unmapped; + + unmapped_page = domain->ops->unmap(domain, iova, left); + if (!unmapped_page) + break; + + pr_debug("unmapped: iova 0x%lx size %lx\n", iova, + (unsigned long)unmapped_page); + + iova += unmapped_page; + unmapped += unmapped_page; + } + + return unmapped; } EXPORT_SYMBOL_GPL(iommu_unmap); + +int iommu_device_group(struct device *dev, unsigned int *groupid) +{ + if (iommu_present(dev->bus) && dev->bus->iommu_ops->device_group) + return dev->bus->iommu_ops->device_group(dev, groupid); + + return -ENODEV; +} +EXPORT_SYMBOL_GPL(iommu_device_group); diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 1a584e077c61..08a90b88e40d 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -42,6 +42,9 @@ __asm__ __volatile__ ( \ #define RCP15_PRRR(reg) MRC(reg, p15, 0, c10, c2, 0) #define RCP15_NMRR(reg) MRC(reg, p15, 0, c10, c2, 1) +/* bitmap of the page sizes currently supported */ +#define MSM_IOMMU_PGSIZES (SZ_4K | SZ_64K | SZ_1M | SZ_16M) + static int msm_iommu_tex_class[4]; DEFINE_SPINLOCK(msm_iommu_lock); @@ -352,7 +355,7 @@ fail: } static int msm_iommu_map(struct iommu_domain *domain, unsigned long va, - phys_addr_t pa, int order, int prot) + phys_addr_t pa, size_t len, int prot) { struct msm_priv *priv; unsigned long flags; @@ -363,7 +366,6 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long va, unsigned long *sl_pte; unsigned long sl_offset; unsigned int pgprot; - size_t len = 0x1000UL << order; int ret = 0, tex, sh; spin_lock_irqsave(&msm_iommu_lock, flags); @@ -463,8 +465,8 @@ fail: return ret; } -static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va, - int order) +static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long va, + size_t len) { struct msm_priv *priv; unsigned long flags; @@ -474,7 +476,6 @@ static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va, unsigned long *sl_table; unsigned long *sl_pte; unsigned long sl_offset; - size_t len = 0x1000UL << order; int i, ret = 0; spin_lock_irqsave(&msm_iommu_lock, flags); @@ -543,9 +544,13 @@ static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va, } ret = __flush_iotlb(domain); + fail: spin_unlock_irqrestore(&msm_iommu_lock, flags); - return ret; + + /* the IOMMU API requires us to return how many bytes were unmapped */ + len = ret ? 0 : len; + return len; } static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain, @@ -677,7 +682,8 @@ static struct iommu_ops msm_iommu_ops = { .map = msm_iommu_map, .unmap = msm_iommu_unmap, .iova_to_phys = msm_iommu_iova_to_phys, - .domain_has_cap = msm_iommu_domain_has_cap + .domain_has_cap = msm_iommu_domain_has_cap, + .pgsize_bitmap = MSM_IOMMU_PGSIZES, }; static int __init get_tex_class(int icp, int ocp, int mt, int nos) @@ -721,7 +727,7 @@ static void __init setup_iommu_tex_classes(void) static int __init msm_iommu_init(void) { setup_iommu_tex_classes(); - register_iommu(&msm_iommu_ops); + bus_set_iommu(&platform_bus_type, &msm_iommu_ops); return 0; } diff --git a/arch/arm/plat-omap/iommu-debug.c b/drivers/iommu/omap-iommu-debug.c index f07cf2f08e09..288da5c1499d 100644 --- a/arch/arm/plat-omap/iommu-debug.c +++ b/drivers/iommu/omap-iommu-debug.c @@ -10,6 +10,7 @@ * published by the Free Software Foundation. */ +#include <linux/module.h> #include <linux/err.h> #include <linux/clk.h> #include <linux/io.h> @@ -21,7 +22,7 @@ #include <plat/iommu.h> #include <plat/iovmm.h> -#include "iopgtable.h" +#include <plat/iopgtable.h> #define MAXCOLUMN 100 /* for short messages */ @@ -32,7 +33,7 @@ static struct dentry *iommu_debug_root; static ssize_t debug_read_ver(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { - u32 ver = iommu_arch_version(); + u32 ver = omap_iommu_arch_version(); char buf[MAXCOLUMN], *p = buf; p += sprintf(p, "H/W version: %d.%d\n", (ver >> 4) & 0xf , ver & 0xf); @@ -43,7 +44,7 @@ static ssize_t debug_read_ver(struct file *file, char __user *userbuf, static ssize_t debug_read_regs(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { - struct iommu *obj = file->private_data; + struct omap_iommu *obj = file->private_data; char *p, *buf; ssize_t bytes; @@ -54,7 +55,7 @@ static ssize_t debug_read_regs(struct file *file, char __user *userbuf, mutex_lock(&iommu_debug_lock); - bytes = iommu_dump_ctx(obj, p, count); + bytes = omap_iommu_dump_ctx(obj, p, count); bytes = simple_read_from_buffer(userbuf, count, ppos, buf, bytes); mutex_unlock(&iommu_debug_lock); @@ -66,7 +67,7 @@ static ssize_t debug_read_regs(struct file *file, char __user *userbuf, static ssize_t debug_read_tlb(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { - struct iommu *obj = file->private_data; + struct omap_iommu *obj = file->private_data; char *p, *buf; ssize_t bytes, rest; @@ -80,7 +81,7 @@ static ssize_t debug_read_tlb(struct file *file, char __user *userbuf, p += sprintf(p, "%8s %8s\n", "cam:", "ram:"); p += sprintf(p, "-----------------------------------------\n"); rest = count - (p - buf); - p += dump_tlb_entries(obj, p, rest); + p += omap_dump_tlb_entries(obj, p, rest); bytes = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); @@ -96,7 +97,7 @@ static ssize_t debug_write_pagetable(struct file *file, struct iotlb_entry e; struct cr_regs cr; int err; - struct iommu *obj = file->private_data; + struct omap_iommu *obj = file->private_data; char buf[MAXCOLUMN], *p = buf; count = min(count, sizeof(buf)); @@ -113,8 +114,8 @@ static ssize_t debug_write_pagetable(struct file *file, return -EINVAL; } - iotlb_cr_to_e(&cr, &e); - err = iopgtable_store_entry(obj, &e); + omap_iotlb_cr_to_e(&cr, &e); + err = omap_iopgtable_store_entry(obj, &e); if (err) dev_err(obj->dev, "%s: fail to store cr\n", __func__); @@ -136,7 +137,7 @@ static ssize_t debug_write_pagetable(struct file *file, __err; \ }) -static ssize_t dump_ioptable(struct iommu *obj, char *buf, ssize_t len) +static ssize_t dump_ioptable(struct omap_iommu *obj, char *buf, ssize_t len) { int i; u32 *iopgd; @@ -183,7 +184,7 @@ out: static ssize_t debug_read_pagetable(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { - struct iommu *obj = file->private_data; + struct omap_iommu *obj = file->private_data; char *p, *buf; size_t bytes; @@ -211,7 +212,7 @@ static ssize_t debug_read_pagetable(struct file *file, char __user *userbuf, static ssize_t debug_read_mmap(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { - struct iommu *obj = file->private_data; + struct omap_iommu *obj = file->private_data; char *p, *buf; struct iovm_struct *tmp; int uninitialized_var(i); @@ -253,7 +254,7 @@ static ssize_t debug_read_mmap(struct file *file, char __user *userbuf, static ssize_t debug_read_mem(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { - struct iommu *obj = file->private_data; + struct omap_iommu *obj = file->private_data; char *p, *buf; struct iovm_struct *area; ssize_t bytes; @@ -267,7 +268,7 @@ static ssize_t debug_read_mem(struct file *file, char __user *userbuf, mutex_lock(&iommu_debug_lock); - area = find_iovm_area(obj, (u32)ppos); + area = omap_find_iovm_area(obj, (u32)ppos); if (IS_ERR(area)) { bytes = -EINVAL; goto err_out; @@ -286,7 +287,7 @@ err_out: static ssize_t debug_write_mem(struct file *file, const char __user *userbuf, size_t count, loff_t *ppos) { - struct iommu *obj = file->private_data; + struct omap_iommu *obj = file->private_data; struct iovm_struct *area; char *p, *buf; @@ -304,7 +305,7 @@ static ssize_t debug_write_mem(struct file *file, const char __user *userbuf, goto err_out; } - area = find_iovm_area(obj, (u32)ppos); + area = omap_find_iovm_area(obj, (u32)ppos); if (IS_ERR(area)) { count = -EINVAL; goto err_out; @@ -360,7 +361,7 @@ DEBUG_FOPS(mem); static int iommu_debug_register(struct device *dev, void *data) { struct platform_device *pdev = to_platform_device(dev); - struct iommu *obj = platform_get_drvdata(pdev); + struct omap_iommu *obj = platform_get_drvdata(pdev); struct dentry *d, *parent; if (!obj || !obj->dev) @@ -396,7 +397,7 @@ static int __init iommu_debug_init(void) return -ENOMEM; iommu_debug_root = d; - err = foreach_iommu_device(d, iommu_debug_register); + err = omap_foreach_iommu_device(d, iommu_debug_register); if (err) goto err_out; return 0; diff --git a/arch/arm/plat-omap/iommu.c b/drivers/iommu/omap-iommu.c index 34fc31ee9081..d8edd979d01b 100644 --- a/arch/arm/plat-omap/iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -18,18 +18,37 @@ #include <linux/ioport.h> #include <linux/clk.h> #include <linux/platform_device.h> +#include <linux/iommu.h> +#include <linux/mutex.h> +#include <linux/spinlock.h> #include <asm/cacheflush.h> #include <plat/iommu.h> -#include "iopgtable.h" +#include <plat/iopgtable.h> #define for_each_iotlb_cr(obj, n, __i, cr) \ for (__i = 0; \ (__i < (n)) && (cr = __iotlb_read_cr((obj), __i), true); \ __i++) +/* bitmap of the page sizes currently supported */ +#define OMAP_IOMMU_PGSIZES (SZ_4K | SZ_64K | SZ_1M | SZ_16M) + +/** + * struct omap_iommu_domain - omap iommu domain + * @pgtable: the page table + * @iommu_dev: an omap iommu device attached to this domain. only a single + * iommu device can be attached for now. + * @lock: domain lock, should be taken when attaching/detaching + */ +struct omap_iommu_domain { + u32 *pgtable; + struct omap_iommu *iommu_dev; + spinlock_t lock; +}; + /* accommodate the difference between omap1 and omap2/3 */ static const struct iommu_functions *arch_iommu; @@ -37,13 +56,13 @@ static struct platform_driver omap_iommu_driver; static struct kmem_cache *iopte_cachep; /** - * install_iommu_arch - Install archtecure specific iommu functions + * omap_install_iommu_arch - Install archtecure specific iommu functions * @ops: a pointer to architecture specific iommu functions * * There are several kind of iommu algorithm(tlb, pagetable) among * omap series. This interface installs such an iommu algorighm. **/ -int install_iommu_arch(const struct iommu_functions *ops) +int omap_install_iommu_arch(const struct iommu_functions *ops) { if (arch_iommu) return -EBUSY; @@ -51,53 +70,57 @@ int install_iommu_arch(const struct iommu_functions *ops) arch_iommu = ops; return 0; } -EXPORT_SYMBOL_GPL(install_iommu_arch); +EXPORT_SYMBOL_GPL(omap_install_iommu_arch); /** - * uninstall_iommu_arch - Uninstall archtecure specific iommu functions + * omap_uninstall_iommu_arch - Uninstall archtecure specific iommu functions * @ops: a pointer to architecture specific iommu functions * * This interface uninstalls the iommu algorighm installed previously. **/ -void uninstall_iommu_arch(const struct iommu_functions *ops) +void omap_uninstall_iommu_arch(const struct iommu_functions *ops) { if (arch_iommu != ops) pr_err("%s: not your arch\n", __func__); arch_iommu = NULL; } -EXPORT_SYMBOL_GPL(uninstall_iommu_arch); +EXPORT_SYMBOL_GPL(omap_uninstall_iommu_arch); /** - * iommu_save_ctx - Save registers for pm off-mode support - * @obj: target iommu + * omap_iommu_save_ctx - Save registers for pm off-mode support + * @dev: client device **/ -void iommu_save_ctx(struct iommu *obj) +void omap_iommu_save_ctx(struct device *dev) { + struct omap_iommu *obj = dev_to_omap_iommu(dev); + arch_iommu->save_ctx(obj); } -EXPORT_SYMBOL_GPL(iommu_save_ctx); +EXPORT_SYMBOL_GPL(omap_iommu_save_ctx); /** - * iommu_restore_ctx - Restore registers for pm off-mode support - * @obj: target iommu + * omap_iommu_restore_ctx - Restore registers for pm off-mode support + * @dev: client device **/ -void iommu_restore_ctx(struct iommu *obj) +void omap_iommu_restore_ctx(struct device *dev) { + struct omap_iommu *obj = dev_to_omap_iommu(dev); + arch_iommu->restore_ctx(obj); } -EXPORT_SYMBOL_GPL(iommu_restore_ctx); +EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx); /** - * iommu_arch_version - Return running iommu arch version + * omap_iommu_arch_version - Return running iommu arch version **/ -u32 iommu_arch_version(void) +u32 omap_iommu_arch_version(void) { return arch_iommu->version; } -EXPORT_SYMBOL_GPL(iommu_arch_version); +EXPORT_SYMBOL_GPL(omap_iommu_arch_version); -static int iommu_enable(struct iommu *obj) +static int iommu_enable(struct omap_iommu *obj) { int err; @@ -115,7 +138,7 @@ static int iommu_enable(struct iommu *obj) return err; } -static void iommu_disable(struct iommu *obj) +static void iommu_disable(struct omap_iommu *obj) { if (!obj) return; @@ -130,13 +153,13 @@ static void iommu_disable(struct iommu *obj) /* * TLB operations */ -void iotlb_cr_to_e(struct cr_regs *cr, struct iotlb_entry *e) +void omap_iotlb_cr_to_e(struct cr_regs *cr, struct iotlb_entry *e) { BUG_ON(!cr || !e); arch_iommu->cr_to_e(cr, e); } -EXPORT_SYMBOL_GPL(iotlb_cr_to_e); +EXPORT_SYMBOL_GPL(omap_iotlb_cr_to_e); static inline int iotlb_cr_valid(struct cr_regs *cr) { @@ -146,7 +169,7 @@ static inline int iotlb_cr_valid(struct cr_regs *cr) return arch_iommu->cr_valid(cr); } -static inline struct cr_regs *iotlb_alloc_cr(struct iommu *obj, +static inline struct cr_regs *iotlb_alloc_cr(struct omap_iommu *obj, struct iotlb_entry *e) { if (!e) @@ -155,23 +178,22 @@ static inline struct cr_regs *iotlb_alloc_cr(struct iommu *obj, return arch_iommu->alloc_cr(obj, e); } -u32 iotlb_cr_to_virt(struct cr_regs *cr) +static u32 iotlb_cr_to_virt(struct cr_regs *cr) { return arch_iommu->cr_to_virt(cr); } -EXPORT_SYMBOL_GPL(iotlb_cr_to_virt); static u32 get_iopte_attr(struct iotlb_entry *e) { return arch_iommu->get_pte_attr(e); } -static u32 iommu_report_fault(struct iommu *obj, u32 *da) +static u32 iommu_report_fault(struct omap_iommu *obj, u32 *da) { return arch_iommu->fault_isr(obj, da); } -static void iotlb_lock_get(struct iommu *obj, struct iotlb_lock *l) +static void iotlb_lock_get(struct omap_iommu *obj, struct iotlb_lock *l) { u32 val; @@ -182,7 +204,7 @@ static void iotlb_lock_get(struct iommu *obj, struct iotlb_lock *l) } -static void iotlb_lock_set(struct iommu *obj, struct iotlb_lock *l) +static void iotlb_lock_set(struct omap_iommu *obj, struct iotlb_lock *l) { u32 val; @@ -192,12 +214,12 @@ static void iotlb_lock_set(struct iommu *obj, struct iotlb_lock *l) iommu_write_reg(obj, val, MMU_LOCK); } -static void iotlb_read_cr(struct iommu *obj, struct cr_regs *cr) +static void iotlb_read_cr(struct omap_iommu *obj, struct cr_regs *cr) { arch_iommu->tlb_read_cr(obj, cr); } -static void iotlb_load_cr(struct iommu *obj, struct cr_regs *cr) +static void iotlb_load_cr(struct omap_iommu *obj, struct cr_regs *cr) { arch_iommu->tlb_load_cr(obj, cr); @@ -211,7 +233,7 @@ static void iotlb_load_cr(struct iommu *obj, struct cr_regs *cr) * @cr: contents of cam and ram register * @buf: output buffer **/ -static inline ssize_t iotlb_dump_cr(struct iommu *obj, struct cr_regs *cr, +static inline ssize_t iotlb_dump_cr(struct omap_iommu *obj, struct cr_regs *cr, char *buf) { BUG_ON(!cr || !buf); @@ -220,7 +242,7 @@ static inline ssize_t iotlb_dump_cr(struct iommu *obj, struct cr_regs *cr, } /* only used in iotlb iteration for-loop */ -static struct cr_regs __iotlb_read_cr(struct iommu *obj, int n) +static struct cr_regs __iotlb_read_cr(struct omap_iommu *obj, int n) { struct cr_regs cr; struct iotlb_lock l; @@ -238,7 +260,8 @@ static struct cr_regs __iotlb_read_cr(struct iommu *obj, int n) * @obj: target iommu * @e: an iommu tlb entry info **/ -int load_iotlb_entry(struct iommu *obj, struct iotlb_entry *e) +#ifdef PREFETCH_IOTLB +static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e) { int err = 0; struct iotlb_lock l; @@ -294,7 +317,20 @@ out: clk_disable(obj->clk); return err; } -EXPORT_SYMBOL_GPL(load_iotlb_entry); + +#else /* !PREFETCH_IOTLB */ + +static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e) +{ + return 0; +} + +#endif /* !PREFETCH_IOTLB */ + +static int prefetch_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e) +{ + return load_iotlb_entry(obj, e); +} /** * flush_iotlb_page - Clear an iommu tlb entry @@ -303,7 +339,7 @@ EXPORT_SYMBOL_GPL(load_iotlb_entry); * * Clear an iommu tlb entry which includes 'da' address. **/ -void flush_iotlb_page(struct iommu *obj, u32 da) +static void flush_iotlb_page(struct omap_iommu *obj, u32 da) { int i; struct cr_regs cr; @@ -332,33 +368,12 @@ void flush_iotlb_page(struct iommu *obj, u32 da) if (i == obj->nr_tlb_entries) dev_dbg(obj->dev, "%s: no page for %08x\n", __func__, da); } -EXPORT_SYMBOL_GPL(flush_iotlb_page); - -/** - * flush_iotlb_range - Clear an iommu tlb entries - * @obj: target iommu - * @start: iommu device virtual address(start) - * @end: iommu device virtual address(end) - * - * Clear an iommu tlb entry which includes 'da' address. - **/ -void flush_iotlb_range(struct iommu *obj, u32 start, u32 end) -{ - u32 da = start; - - while (da < end) { - flush_iotlb_page(obj, da); - /* FIXME: Optimize for multiple page size */ - da += IOPTE_SIZE; - } -} -EXPORT_SYMBOL_GPL(flush_iotlb_range); /** * flush_iotlb_all - Clear all iommu tlb entries * @obj: target iommu **/ -void flush_iotlb_all(struct iommu *obj) +static void flush_iotlb_all(struct omap_iommu *obj) { struct iotlb_lock l; @@ -372,28 +387,10 @@ void flush_iotlb_all(struct iommu *obj) clk_disable(obj->clk); } -EXPORT_SYMBOL_GPL(flush_iotlb_all); - -/** - * iommu_set_twl - enable/disable table walking logic - * @obj: target iommu - * @on: enable/disable - * - * Function used to enable/disable TWL. If one wants to work - * exclusively with locked TLB entries and receive notifications - * for TLB miss then call this function to disable TWL. - */ -void iommu_set_twl(struct iommu *obj, bool on) -{ - clk_enable(obj->clk); - arch_iommu->set_twl(obj, on); - clk_disable(obj->clk); -} -EXPORT_SYMBOL_GPL(iommu_set_twl); -#if defined(CONFIG_OMAP_IOMMU_DEBUG_MODULE) +#if defined(CONFIG_OMAP_IOMMU_DEBUG) || defined(CONFIG_OMAP_IOMMU_DEBUG_MODULE) -ssize_t iommu_dump_ctx(struct iommu *obj, char *buf, ssize_t bytes) +ssize_t omap_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t bytes) { if (!obj || !buf) return -EINVAL; @@ -406,9 +403,10 @@ ssize_t iommu_dump_ctx(struct iommu *obj, char *buf, ssize_t bytes) return bytes; } -EXPORT_SYMBOL_GPL(iommu_dump_ctx); +EXPORT_SYMBOL_GPL(omap_iommu_dump_ctx); -static int __dump_tlb_entries(struct iommu *obj, struct cr_regs *crs, int num) +static int +__dump_tlb_entries(struct omap_iommu *obj, struct cr_regs *crs, int num) { int i; struct iotlb_lock saved; @@ -431,11 +429,11 @@ static int __dump_tlb_entries(struct iommu *obj, struct cr_regs *crs, int num) } /** - * dump_tlb_entries - dump cr arrays to given buffer + * omap_dump_tlb_entries - dump cr arrays to given buffer * @obj: target iommu * @buf: output buffer **/ -size_t dump_tlb_entries(struct iommu *obj, char *buf, ssize_t bytes) +size_t omap_dump_tlb_entries(struct omap_iommu *obj, char *buf, ssize_t bytes) { int i, num; struct cr_regs *cr; @@ -455,14 +453,14 @@ size_t dump_tlb_entries(struct iommu *obj, char *buf, ssize_t bytes) return p - buf; } -EXPORT_SYMBOL_GPL(dump_tlb_entries); +EXPORT_SYMBOL_GPL(omap_dump_tlb_entries); -int foreach_iommu_device(void *data, int (*fn)(struct device *, void *)) +int omap_foreach_iommu_device(void *data, int (*fn)(struct device *, void *)) { return driver_for_each_device(&omap_iommu_driver.driver, NULL, data, fn); } -EXPORT_SYMBOL_GPL(foreach_iommu_device); +EXPORT_SYMBOL_GPL(omap_foreach_iommu_device); #endif /* CONFIG_OMAP_IOMMU_DEBUG_MODULE */ @@ -495,7 +493,7 @@ static void iopte_free(u32 *iopte) kmem_cache_free(iopte_cachep, iopte); } -static u32 *iopte_alloc(struct iommu *obj, u32 *iopgd, u32 da) +static u32 *iopte_alloc(struct omap_iommu *obj, u32 *iopgd, u32 da) { u32 *iopte; @@ -533,7 +531,7 @@ pte_ready: return iopte; } -static int iopgd_alloc_section(struct iommu *obj, u32 da, u32 pa, u32 prot) +static int iopgd_alloc_section(struct omap_iommu *obj, u32 da, u32 pa, u32 prot) { u32 *iopgd = iopgd_offset(obj, da); @@ -548,7 +546,7 @@ static int iopgd_alloc_section(struct iommu *obj, u32 da, u32 pa, u32 prot) return 0; } -static int iopgd_alloc_super(struct iommu *obj, u32 da, u32 pa, u32 prot) +static int iopgd_alloc_super(struct omap_iommu *obj, u32 da, u32 pa, u32 prot) { u32 *iopgd = iopgd_offset(obj, da); int i; @@ -565,7 +563,7 @@ static int iopgd_alloc_super(struct iommu *obj, u32 da, u32 pa, u32 prot) return 0; } -static int iopte_alloc_page(struct iommu *obj, u32 da, u32 pa, u32 prot) +static int iopte_alloc_page(struct omap_iommu *obj, u32 da, u32 pa, u32 prot) { u32 *iopgd = iopgd_offset(obj, da); u32 *iopte = iopte_alloc(obj, iopgd, da); @@ -582,7 +580,7 @@ static int iopte_alloc_page(struct iommu *obj, u32 da, u32 pa, u32 prot) return 0; } -static int iopte_alloc_large(struct iommu *obj, u32 da, u32 pa, u32 prot) +static int iopte_alloc_large(struct omap_iommu *obj, u32 da, u32 pa, u32 prot) { u32 *iopgd = iopgd_offset(obj, da); u32 *iopte = iopte_alloc(obj, iopgd, da); @@ -603,9 +601,10 @@ static int iopte_alloc_large(struct iommu *obj, u32 da, u32 pa, u32 prot) return 0; } -static int iopgtable_store_entry_core(struct iommu *obj, struct iotlb_entry *e) +static int +iopgtable_store_entry_core(struct omap_iommu *obj, struct iotlb_entry *e) { - int (*fn)(struct iommu *, u32, u32, u32); + int (*fn)(struct omap_iommu *, u32, u32, u32); u32 prot; int err; @@ -641,23 +640,21 @@ static int iopgtable_store_entry_core(struct iommu *obj, struct iotlb_entry *e) } /** - * iopgtable_store_entry - Make an iommu pte entry + * omap_iopgtable_store_entry - Make an iommu pte entry * @obj: target iommu * @e: an iommu tlb entry info **/ -int iopgtable_store_entry(struct iommu *obj, struct iotlb_entry *e) +int omap_iopgtable_store_entry(struct omap_iommu *obj, struct iotlb_entry *e) { int err; flush_iotlb_page(obj, e->da); err = iopgtable_store_entry_core(obj, e); -#ifdef PREFETCH_IOTLB if (!err) - load_iotlb_entry(obj, e); -#endif + prefetch_iotlb_entry(obj, e); return err; } -EXPORT_SYMBOL_GPL(iopgtable_store_entry); +EXPORT_SYMBOL_GPL(omap_iopgtable_store_entry); /** * iopgtable_lookup_entry - Lookup an iommu pte entry @@ -666,7 +663,8 @@ EXPORT_SYMBOL_GPL(iopgtable_store_entry); * @ppgd: iommu pgd entry pointer to be returned * @ppte: iommu pte entry pointer to be returned **/ -void iopgtable_lookup_entry(struct iommu *obj, u32 da, u32 **ppgd, u32 **ppte) +static void +iopgtable_lookup_entry(struct omap_iommu *obj, u32 da, u32 **ppgd, u32 **ppte) { u32 *iopgd, *iopte = NULL; @@ -680,9 +678,8 @@ out: *ppgd = iopgd; *ppte = iopte; } -EXPORT_SYMBOL_GPL(iopgtable_lookup_entry); -static size_t iopgtable_clear_entry_core(struct iommu *obj, u32 da) +static size_t iopgtable_clear_entry_core(struct omap_iommu *obj, u32 da) { size_t bytes; u32 *iopgd = iopgd_offset(obj, da); @@ -735,7 +732,7 @@ out: * @obj: target iommu * @da: iommu device virtual address **/ -size_t iopgtable_clear_entry(struct iommu *obj, u32 da) +static size_t iopgtable_clear_entry(struct omap_iommu *obj, u32 da) { size_t bytes; @@ -748,9 +745,8 @@ size_t iopgtable_clear_entry(struct iommu *obj, u32 da) return bytes; } -EXPORT_SYMBOL_GPL(iopgtable_clear_entry); -static void iopgtable_clear_entry_all(struct iommu *obj) +static void iopgtable_clear_entry_all(struct omap_iommu *obj) { int i; @@ -785,7 +781,8 @@ static irqreturn_t iommu_fault_handler(int irq, void *data) { u32 da, errs; u32 *iopgd, *iopte; - struct iommu *obj = data; + struct omap_iommu *obj = data; + struct iommu_domain *domain = obj->domain; if (!obj->refcount) return IRQ_NONE; @@ -797,7 +794,7 @@ static irqreturn_t iommu_fault_handler(int irq, void *data) return IRQ_HANDLED; /* Fault callback or TLB/PTE Dynamic loading */ - if (obj->isr && !obj->isr(obj, da, errs, obj->isr_priv)) + if (!report_iommu_fault(domain, obj->dev, da, 0)) return IRQ_HANDLED; iommu_disable(obj); @@ -821,7 +818,7 @@ static irqreturn_t iommu_fault_handler(int irq, void *data) static int device_match_by_alias(struct device *dev, void *data) { - struct iommu *obj = to_iommu(dev); + struct omap_iommu *obj = to_iommu(dev); const char *name = data; pr_debug("%s: %s %s\n", __func__, obj->name, name); @@ -830,57 +827,43 @@ static int device_match_by_alias(struct device *dev, void *data) } /** - * iommu_set_da_range - Set a valid device address range - * @obj: target iommu - * @start Start of valid range - * @end End of valid range + * omap_iommu_attach() - attach iommu device to an iommu domain + * @name: name of target omap iommu device + * @iopgd: page table **/ -int iommu_set_da_range(struct iommu *obj, u32 start, u32 end) -{ - - if (!obj) - return -EFAULT; - - if (end < start || !PAGE_ALIGN(start | end)) - return -EINVAL; - - obj->da_start = start; - obj->da_end = end; - - return 0; -} -EXPORT_SYMBOL_GPL(iommu_set_da_range); - -/** - * iommu_get - Get iommu handler - * @name: target iommu name - **/ -struct iommu *iommu_get(const char *name) +static struct omap_iommu *omap_iommu_attach(const char *name, u32 *iopgd) { int err = -ENOMEM; struct device *dev; - struct iommu *obj; + struct omap_iommu *obj; - dev = driver_find_device(&omap_iommu_driver.driver, NULL, (void *)name, - device_match_by_alias); + dev = driver_find_device(&omap_iommu_driver.driver, NULL, + (void *)name, + device_match_by_alias); if (!dev) - return ERR_PTR(-ENODEV); + return NULL; obj = to_iommu(dev); - mutex_lock(&obj->iommu_lock); + spin_lock(&obj->iommu_lock); - if (obj->refcount++ == 0) { - err = iommu_enable(obj); - if (err) - goto err_enable; - flush_iotlb_all(obj); + /* an iommu device can only be attached once */ + if (++obj->refcount > 1) { + dev_err(dev, "%s: already attached!\n", obj->name); + err = -EBUSY; + goto err_enable; } + obj->iopgd = iopgd; + err = iommu_enable(obj); + if (err) + goto err_enable; + flush_iotlb_all(obj); + if (!try_module_get(obj->owner)) goto err_module; - mutex_unlock(&obj->iommu_lock); + spin_unlock(&obj->iommu_lock); dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name); return obj; @@ -890,59 +873,32 @@ err_module: iommu_disable(obj); err_enable: obj->refcount--; - mutex_unlock(&obj->iommu_lock); + spin_unlock(&obj->iommu_lock); return ERR_PTR(err); } -EXPORT_SYMBOL_GPL(iommu_get); /** - * iommu_put - Put back iommu handler + * omap_iommu_detach - release iommu device * @obj: target iommu **/ -void iommu_put(struct iommu *obj) +static void omap_iommu_detach(struct omap_iommu *obj) { if (!obj || IS_ERR(obj)) return; - mutex_lock(&obj->iommu_lock); + spin_lock(&obj->iommu_lock); if (--obj->refcount == 0) iommu_disable(obj); module_put(obj->owner); - mutex_unlock(&obj->iommu_lock); + obj->iopgd = NULL; - dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name); -} -EXPORT_SYMBOL_GPL(iommu_put); + spin_unlock(&obj->iommu_lock); -int iommu_set_isr(const char *name, - int (*isr)(struct iommu *obj, u32 da, u32 iommu_errs, - void *priv), - void *isr_priv) -{ - struct device *dev; - struct iommu *obj; - - dev = driver_find_device(&omap_iommu_driver.driver, NULL, (void *)name, - device_match_by_alias); - if (!dev) - return -ENODEV; - - obj = to_iommu(dev); - mutex_lock(&obj->iommu_lock); - if (obj->refcount != 0) { - mutex_unlock(&obj->iommu_lock); - return -EBUSY; - } - obj->isr = isr; - obj->isr_priv = isr_priv; - mutex_unlock(&obj->iommu_lock); - - return 0; + dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name); } -EXPORT_SYMBOL_GPL(iommu_set_isr); /* * OMAP Device MMU(IOMMU) detection @@ -950,9 +906,8 @@ EXPORT_SYMBOL_GPL(iommu_set_isr); static int __devinit omap_iommu_probe(struct platform_device *pdev) { int err = -ENODEV; - void *p; int irq; - struct iommu *obj; + struct omap_iommu *obj; struct resource *res; struct iommu_platform_data *pdata = pdev->dev.platform_data; @@ -974,7 +929,7 @@ static int __devinit omap_iommu_probe(struct platform_device *pdev) obj->da_start = pdata->da_start; obj->da_end = pdata->da_end; - mutex_init(&obj->iommu_lock); + spin_lock_init(&obj->iommu_lock); mutex_init(&obj->mmap_lock); spin_lock_init(&obj->page_table_lock); INIT_LIST_HEAD(&obj->mmap); @@ -1009,22 +964,9 @@ static int __devinit omap_iommu_probe(struct platform_device *pdev) goto err_irq; platform_set_drvdata(pdev, obj); - p = (void *)__get_free_pages(GFP_KERNEL, get_order(IOPGD_TABLE_SIZE)); - if (!p) { - err = -ENOMEM; - goto err_pgd; - } - memset(p, 0, IOPGD_TABLE_SIZE); - clean_dcache_area(p, IOPGD_TABLE_SIZE); - obj->iopgd = p; - - BUG_ON(!IS_ALIGNED((unsigned long)obj->iopgd, IOPGD_TABLE_SIZE)); - dev_info(&pdev->dev, "%s registered\n", obj->name); return 0; -err_pgd: - free_irq(irq, obj); err_irq: iounmap(obj->regbase); err_ioremap: @@ -1040,12 +982,11 @@ static int __devexit omap_iommu_remove(struct platform_device *pdev) { int irq; struct resource *res; - struct iommu *obj = platform_get_drvdata(pdev); + struct omap_iommu *obj = platform_get_drvdata(pdev); platform_set_drvdata(pdev, NULL); iopgtable_clear_entry_all(obj); - free_pages((unsigned long)obj->iopgd, get_order(IOPGD_TABLE_SIZE)); irq = platform_get_irq(pdev, 0); free_irq(irq, obj); @@ -1072,6 +1013,200 @@ static void iopte_cachep_ctor(void *iopte) clean_dcache_area(iopte, IOPTE_TABLE_SIZE); } +static int omap_iommu_map(struct iommu_domain *domain, unsigned long da, + phys_addr_t pa, size_t bytes, int prot) +{ + struct omap_iommu_domain *omap_domain = domain->priv; + struct omap_iommu *oiommu = omap_domain->iommu_dev; + struct device *dev = oiommu->dev; + struct iotlb_entry e; + int omap_pgsz; + u32 ret, flags; + + /* we only support mapping a single iommu page for now */ + omap_pgsz = bytes_to_iopgsz(bytes); + if (omap_pgsz < 0) { + dev_err(dev, "invalid size to map: %d\n", bytes); + return -EINVAL; + } + + dev_dbg(dev, "mapping da 0x%lx to pa 0x%x size 0x%x\n", da, pa, bytes); + + flags = omap_pgsz | prot; + + iotlb_init_entry(&e, da, pa, flags); + + ret = omap_iopgtable_store_entry(oiommu, &e); + if (ret) + dev_err(dev, "omap_iopgtable_store_entry failed: %d\n", ret); + + return ret; +} + +static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da, + size_t size) +{ + struct omap_iommu_domain *omap_domain = domain->priv; + struct omap_iommu *oiommu = omap_domain->iommu_dev; + struct device *dev = oiommu->dev; + + dev_dbg(dev, "unmapping da 0x%lx size %u\n", da, size); + + return iopgtable_clear_entry(oiommu, da); +} + +static int +omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) +{ + struct omap_iommu_domain *omap_domain = domain->priv; + struct omap_iommu *oiommu; + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + int ret = 0; + + spin_lock(&omap_domain->lock); + + /* only a single device is supported per domain for now */ + if (omap_domain->iommu_dev) { + dev_err(dev, "iommu domain is already attached\n"); + ret = -EBUSY; + goto out; + } + + /* get a handle to and enable the omap iommu */ + oiommu = omap_iommu_attach(arch_data->name, omap_domain->pgtable); + if (IS_ERR(oiommu)) { + ret = PTR_ERR(oiommu); + dev_err(dev, "can't get omap iommu: %d\n", ret); + goto out; + } + + omap_domain->iommu_dev = arch_data->iommu_dev = oiommu; + oiommu->domain = domain; + +out: + spin_unlock(&omap_domain->lock); + return ret; +} + +static void omap_iommu_detach_dev(struct iommu_domain *domain, + struct device *dev) +{ + struct omap_iommu_domain *omap_domain = domain->priv; + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + struct omap_iommu *oiommu = dev_to_omap_iommu(dev); + + spin_lock(&omap_domain->lock); + + /* only a single device is supported per domain for now */ + if (omap_domain->iommu_dev != oiommu) { + dev_err(dev, "invalid iommu device\n"); + goto out; + } + + iopgtable_clear_entry_all(oiommu); + + omap_iommu_detach(oiommu); + + omap_domain->iommu_dev = arch_data->iommu_dev = NULL; + +out: + spin_unlock(&omap_domain->lock); +} + +static int omap_iommu_domain_init(struct iommu_domain *domain) +{ + struct omap_iommu_domain *omap_domain; + + omap_domain = kzalloc(sizeof(*omap_domain), GFP_KERNEL); + if (!omap_domain) { + pr_err("kzalloc failed\n"); + goto out; + } + + omap_domain->pgtable = kzalloc(IOPGD_TABLE_SIZE, GFP_KERNEL); + if (!omap_domain->pgtable) { + pr_err("kzalloc failed\n"); + goto fail_nomem; + } + + /* + * should never fail, but please keep this around to ensure + * we keep the hardware happy + */ + BUG_ON(!IS_ALIGNED((long)omap_domain->pgtable, IOPGD_TABLE_SIZE)); + + clean_dcache_area(omap_domain->pgtable, IOPGD_TABLE_SIZE); + spin_lock_init(&omap_domain->lock); + + domain->priv = omap_domain; + + return 0; + +fail_nomem: + kfree(omap_domain); +out: + return -ENOMEM; +} + +/* assume device was already detached */ +static void omap_iommu_domain_destroy(struct iommu_domain *domain) +{ + struct omap_iommu_domain *omap_domain = domain->priv; + + domain->priv = NULL; + + kfree(omap_domain->pgtable); + kfree(omap_domain); +} + +static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain, + unsigned long da) +{ + struct omap_iommu_domain *omap_domain = domain->priv; + struct omap_iommu *oiommu = omap_domain->iommu_dev; + struct device *dev = oiommu->dev; + u32 *pgd, *pte; + phys_addr_t ret = 0; + + iopgtable_lookup_entry(oiommu, da, &pgd, &pte); + + if (pte) { + if (iopte_is_small(*pte)) + ret = omap_iommu_translate(*pte, da, IOPTE_MASK); + else if (iopte_is_large(*pte)) + ret = omap_iommu_translate(*pte, da, IOLARGE_MASK); + else + dev_err(dev, "bogus pte 0x%x, da 0x%lx", *pte, da); + } else { + if (iopgd_is_section(*pgd)) + ret = omap_iommu_translate(*pgd, da, IOSECTION_MASK); + else if (iopgd_is_super(*pgd)) + ret = omap_iommu_translate(*pgd, da, IOSUPER_MASK); + else + dev_err(dev, "bogus pgd 0x%x, da 0x%lx", *pgd, da); + } + + return ret; +} + +static int omap_iommu_domain_has_cap(struct iommu_domain *domain, + unsigned long cap) +{ + return 0; +} + +static struct iommu_ops omap_iommu_ops = { + .domain_init = omap_iommu_domain_init, + .domain_destroy = omap_iommu_domain_destroy, + .attach_dev = omap_iommu_attach_dev, + .detach_dev = omap_iommu_detach_dev, + .map = omap_iommu_map, + .unmap = omap_iommu_unmap, + .iova_to_phys = omap_iommu_iova_to_phys, + .domain_has_cap = omap_iommu_domain_has_cap, + .pgsize_bitmap = OMAP_IOMMU_PGSIZES, +}; + static int __init omap_iommu_init(void) { struct kmem_cache *p; @@ -1084,6 +1219,8 @@ static int __init omap_iommu_init(void) return -ENOMEM; iopte_cachep = p; + bus_set_iommu(&platform_bus_type, &omap_iommu_ops); + return platform_driver_register(&omap_iommu_driver); } module_init(omap_iommu_init); diff --git a/arch/arm/plat-omap/iovmm.c b/drivers/iommu/omap-iovmm.c index 79e7fedb8602..2e10c3e0a7ae 100644 --- a/arch/arm/plat-omap/iovmm.c +++ b/drivers/iommu/omap-iovmm.c @@ -10,11 +10,13 @@ * published by the Free Software Foundation. */ +#include <linux/module.h> #include <linux/err.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/device.h> #include <linux/scatterlist.h> +#include <linux/iommu.h> #include <asm/cacheflush.h> #include <asm/mach/map.h> @@ -22,44 +24,19 @@ #include <plat/iommu.h> #include <plat/iovmm.h> -#include "iopgtable.h" - -/* - * A device driver needs to create address mappings between: - * - * - iommu/device address - * - physical address - * - mpu virtual address - * - * There are 4 possible patterns for them: - * - * |iova/ mapping iommu_ page - * | da pa va (d)-(p)-(v) function type - * --------------------------------------------------------------------------- - * 1 | c c c 1 - 1 - 1 _kmap() / _kunmap() s - * 2 | c c,a c 1 - 1 - 1 _kmalloc()/ _kfree() s - * 3 | c d c 1 - n - 1 _vmap() / _vunmap() s - * 4 | c d,a c 1 - n - 1 _vmalloc()/ _vfree() n* - * - * - * 'iova': device iommu virtual address - * 'da': alias of 'iova' - * 'pa': physical address - * 'va': mpu virtual address - * - * 'c': contiguous memory area - * 'd': discontiguous memory area - * 'a': anonymous memory allocation - * '()': optional feature - * - * 'n': a normal page(4KB) size is used. - * 's': multiple iommu superpage(16MB, 1MB, 64KB, 4KB) size is used. - * - * '*': not yet, but feasible. - */ +#include <plat/iopgtable.h> static struct kmem_cache *iovm_area_cachep; +/* return the offset of the first scatterlist entry in a sg table */ +static unsigned int sgtable_offset(const struct sg_table *sgt) +{ + if (!sgt || !sgt->nents) + return 0; + + return sgt->sgl->offset; +} + /* return total bytes of sg buffers */ static size_t sgtable_len(const struct sg_table *sgt) { @@ -72,11 +49,17 @@ static size_t sgtable_len(const struct sg_table *sgt) for_each_sg(sgt->sgl, sg, sgt->nents, i) { size_t bytes; - bytes = sg->length; + bytes = sg->length + sg->offset; if (!iopgsz_ok(bytes)) { - pr_err("%s: sg[%d] not iommu pagesize(%x)\n", - __func__, i, bytes); + pr_err("%s: sg[%d] not iommu pagesize(%u %u)\n", + __func__, i, bytes, sg->offset); + return 0; + } + + if (i && sg->offset) { + pr_err("%s: sg[%d] offset not allowed in internal " + "entries\n", __func__, i); return 0; } @@ -197,8 +180,8 @@ static void *vmap_sg(const struct sg_table *sgt) u32 pa; int err; - pa = sg_phys(sg); - bytes = sg->length; + pa = sg_phys(sg) - sg->offset; + bytes = sg->length + sg->offset; BUG_ON(bytes != PAGE_SIZE); @@ -224,7 +207,8 @@ static inline void vunmap_sg(const void *va) vunmap(va); } -static struct iovm_struct *__find_iovm_area(struct iommu *obj, const u32 da) +static struct iovm_struct *__find_iovm_area(struct omap_iommu *obj, + const u32 da) { struct iovm_struct *tmp; @@ -246,13 +230,15 @@ static struct iovm_struct *__find_iovm_area(struct iommu *obj, const u32 da) } /** - * find_iovm_area - find iovma which includes @da + * omap_find_iovm_area - find iovma which includes @da + * @dev: client device * @da: iommu device virtual address * * Find the existing iovma starting at @da */ -struct iovm_struct *find_iovm_area(struct iommu *obj, u32 da) +struct iovm_struct *omap_find_iovm_area(struct device *dev, u32 da) { + struct omap_iommu *obj = dev_to_omap_iommu(dev); struct iovm_struct *area; mutex_lock(&obj->mmap_lock); @@ -261,13 +247,13 @@ struct iovm_struct *find_iovm_area(struct iommu *obj, u32 da) return area; } -EXPORT_SYMBOL_GPL(find_iovm_area); +EXPORT_SYMBOL_GPL(omap_find_iovm_area); /* * This finds the hole(area) which fits the requested address and len * in iovmas mmap, and returns the new allocated iovma. */ -static struct iovm_struct *alloc_iovm_area(struct iommu *obj, u32 da, +static struct iovm_struct *alloc_iovm_area(struct omap_iommu *obj, u32 da, size_t bytes, u32 flags) { struct iovm_struct *new, *tmp; @@ -342,7 +328,7 @@ found: return new; } -static void free_iovm_area(struct iommu *obj, struct iovm_struct *area) +static void free_iovm_area(struct omap_iommu *obj, struct iovm_struct *area) { size_t bytes; @@ -358,15 +344,16 @@ static void free_iovm_area(struct iommu *obj, struct iovm_struct *area) } /** - * da_to_va - convert (d) to (v) - * @obj: objective iommu + * omap_da_to_va - convert (d) to (v) + * @dev: client device * @da: iommu device virtual address * @va: mpu virtual address * * Returns mpu virtual addr which corresponds to a given device virtual addr */ -void *da_to_va(struct iommu *obj, u32 da) +void *omap_da_to_va(struct device *dev, u32 da) { + struct omap_iommu *obj = dev_to_omap_iommu(dev); void *va = NULL; struct iovm_struct *area; @@ -383,7 +370,7 @@ out: return va; } -EXPORT_SYMBOL_GPL(da_to_va); +EXPORT_SYMBOL_GPL(omap_da_to_va); static void sgtable_fill_vmalloc(struct sg_table *sgt, void *_va) { @@ -397,7 +384,7 @@ static void sgtable_fill_vmalloc(struct sg_table *sgt, void *_va) const size_t bytes = PAGE_SIZE; /* - * iommu 'superpage' isn't supported with 'iommu_vmalloc()' + * iommu 'superpage' isn't supported with 'omap_iommu_vmalloc()' */ pg = vmalloc_to_page(va); BUG_ON(!pg); @@ -418,74 +405,36 @@ static inline void sgtable_drain_vmalloc(struct sg_table *sgt) BUG_ON(!sgt); } -static void sgtable_fill_kmalloc(struct sg_table *sgt, u32 pa, u32 da, - size_t len) -{ - unsigned int i; - struct scatterlist *sg; - - for_each_sg(sgt->sgl, sg, sgt->nents, i) { - unsigned bytes; - - bytes = max_alignment(da | pa); - bytes = min_t(unsigned, bytes, iopgsz_max(len)); - - BUG_ON(!iopgsz_ok(bytes)); - - sg_set_buf(sg, phys_to_virt(pa), bytes); - /* - * 'pa' is cotinuous(linear). - */ - pa += bytes; - da += bytes; - len -= bytes; - } - BUG_ON(len); -} - -static inline void sgtable_drain_kmalloc(struct sg_table *sgt) -{ - /* - * Actually this is not necessary at all, just exists for - * consistency of the code readability - */ - BUG_ON(!sgt); -} - /* create 'da' <-> 'pa' mapping from 'sgt' */ -static int map_iovm_area(struct iommu *obj, struct iovm_struct *new, - const struct sg_table *sgt, u32 flags) +static int map_iovm_area(struct iommu_domain *domain, struct iovm_struct *new, + const struct sg_table *sgt, u32 flags) { int err; unsigned int i, j; struct scatterlist *sg; u32 da = new->da_start; - if (!obj || !sgt) + if (!domain || !sgt) return -EINVAL; BUG_ON(!sgtable_ok(sgt)); for_each_sg(sgt->sgl, sg, sgt->nents, i) { u32 pa; - int pgsz; size_t bytes; - struct iotlb_entry e; - pa = sg_phys(sg); - bytes = sg->length; + pa = sg_phys(sg) - sg->offset; + bytes = sg->length + sg->offset; flags &= ~IOVMF_PGSZ_MASK; - pgsz = bytes_to_iopgsz(bytes); - if (pgsz < 0) + + if (bytes_to_iopgsz(bytes) < 0) goto err_out; - flags |= pgsz; pr_debug("%s: [%d] %08x %08x(%x)\n", __func__, i, da, pa, bytes); - iotlb_init_entry(&e, da, pa, flags); - err = iopgtable_store_entry(obj, &e); + err = iommu_map(domain, da, pa, bytes, flags); if (err) goto err_out; @@ -499,9 +448,10 @@ err_out: for_each_sg(sgt->sgl, sg, i, j) { size_t bytes; - bytes = iopgtable_clear_entry(obj, da); + bytes = sg->length + sg->offset; - BUG_ON(!iopgsz_ok(bytes)); + /* ignore failures.. we're already handling one */ + iommu_unmap(domain, da, bytes); da += bytes; } @@ -509,22 +459,30 @@ err_out: } /* release 'da' <-> 'pa' mapping */ -static void unmap_iovm_area(struct iommu *obj, struct iovm_struct *area) +static void unmap_iovm_area(struct iommu_domain *domain, struct omap_iommu *obj, + struct iovm_struct *area) { u32 start; size_t total = area->da_end - area->da_start; + const struct sg_table *sgt = area->sgt; + struct scatterlist *sg; + int i; + size_t unmapped; + BUG_ON(!sgtable_ok(sgt)); BUG_ON((!total) || !IS_ALIGNED(total, PAGE_SIZE)); start = area->da_start; - while (total > 0) { + for_each_sg(sgt->sgl, sg, sgt->nents, i) { size_t bytes; - bytes = iopgtable_clear_entry(obj, start); - if (bytes == 0) - bytes = PAGE_SIZE; - else - dev_dbg(obj->dev, "%s: unmap %08x(%x) %08x\n", + bytes = sg->length + sg->offset; + + unmapped = iommu_unmap(domain, start, bytes); + if (unmapped < bytes) + break; + + dev_dbg(obj->dev, "%s: unmap %08x(%x) %08x\n", __func__, start, bytes, area->flags); BUG_ON(!IS_ALIGNED(bytes, PAGE_SIZE)); @@ -536,7 +494,8 @@ static void unmap_iovm_area(struct iommu *obj, struct iovm_struct *area) } /* template function for all unmapping */ -static struct sg_table *unmap_vm_area(struct iommu *obj, const u32 da, +static struct sg_table *unmap_vm_area(struct iommu_domain *domain, + struct omap_iommu *obj, const u32 da, void (*fn)(const void *), u32 flags) { struct sg_table *sgt = NULL; @@ -562,7 +521,7 @@ static struct sg_table *unmap_vm_area(struct iommu *obj, const u32 da, } sgt = (struct sg_table *)area->sgt; - unmap_iovm_area(obj, area); + unmap_iovm_area(domain, obj, area); fn(area->va); @@ -577,8 +536,9 @@ out: return sgt; } -static u32 map_iommu_region(struct iommu *obj, u32 da, - const struct sg_table *sgt, void *va, size_t bytes, u32 flags) +static u32 map_iommu_region(struct iommu_domain *domain, struct omap_iommu *obj, + u32 da, const struct sg_table *sgt, void *va, + size_t bytes, u32 flags) { int err = -ENOMEM; struct iovm_struct *new; @@ -593,7 +553,7 @@ static u32 map_iommu_region(struct iommu *obj, u32 da, new->va = va; new->sgt = sgt; - if (map_iovm_area(obj, new, sgt, new->flags)) + if (map_iovm_area(domain, new, sgt, new->flags)) goto err_map; mutex_unlock(&obj->mmap_lock); @@ -610,24 +570,28 @@ err_alloc_iovma: return err; } -static inline u32 __iommu_vmap(struct iommu *obj, u32 da, - const struct sg_table *sgt, void *va, size_t bytes, u32 flags) +static inline u32 +__iommu_vmap(struct iommu_domain *domain, struct omap_iommu *obj, + u32 da, const struct sg_table *sgt, + void *va, size_t bytes, u32 flags) { - return map_iommu_region(obj, da, sgt, va, bytes, flags); + return map_iommu_region(domain, obj, da, sgt, va, bytes, flags); } /** - * iommu_vmap - (d)-(p)-(v) address mapper - * @obj: objective iommu + * omap_iommu_vmap - (d)-(p)-(v) address mapper + * @domain: iommu domain + * @dev: client device * @sgt: address of scatter gather table * @flags: iovma and page property * * Creates 1-n-1 mapping with given @sgt and returns @da. * All @sgt element must be io page size aligned. */ -u32 iommu_vmap(struct iommu *obj, u32 da, const struct sg_table *sgt, - u32 flags) +u32 omap_iommu_vmap(struct iommu_domain *domain, struct device *dev, u32 da, + const struct sg_table *sgt, u32 flags) { + struct omap_iommu *obj = dev_to_omap_iommu(dev); size_t bytes; void *va = NULL; @@ -648,39 +612,44 @@ u32 iommu_vmap(struct iommu *obj, u32 da, const struct sg_table *sgt, flags |= IOVMF_DISCONT; flags |= IOVMF_MMIO; - da = __iommu_vmap(obj, da, sgt, va, bytes, flags); + da = __iommu_vmap(domain, obj, da, sgt, va, bytes, flags); if (IS_ERR_VALUE(da)) vunmap_sg(va); - return da; + return da + sgtable_offset(sgt); } -EXPORT_SYMBOL_GPL(iommu_vmap); +EXPORT_SYMBOL_GPL(omap_iommu_vmap); /** - * iommu_vunmap - release virtual mapping obtained by 'iommu_vmap()' - * @obj: objective iommu + * omap_iommu_vunmap - release virtual mapping obtained by 'omap_iommu_vmap()' + * @domain: iommu domain + * @dev: client device * @da: iommu device virtual address * * Free the iommu virtually contiguous memory area starting at - * @da, which was returned by 'iommu_vmap()'. + * @da, which was returned by 'omap_iommu_vmap()'. */ -struct sg_table *iommu_vunmap(struct iommu *obj, u32 da) +struct sg_table * +omap_iommu_vunmap(struct iommu_domain *domain, struct device *dev, u32 da) { + struct omap_iommu *obj = dev_to_omap_iommu(dev); struct sg_table *sgt; /* - * 'sgt' is allocated before 'iommu_vmalloc()' is called. + * 'sgt' is allocated before 'omap_iommu_vmalloc()' is called. * Just returns 'sgt' to the caller to free */ - sgt = unmap_vm_area(obj, da, vunmap_sg, IOVMF_DISCONT | IOVMF_MMIO); + da &= PAGE_MASK; + sgt = unmap_vm_area(domain, obj, da, vunmap_sg, + IOVMF_DISCONT | IOVMF_MMIO); if (!sgt) dev_dbg(obj->dev, "%s: No sgt\n", __func__); return sgt; } -EXPORT_SYMBOL_GPL(iommu_vunmap); +EXPORT_SYMBOL_GPL(omap_iommu_vunmap); /** - * iommu_vmalloc - (d)-(p)-(v) address allocator and mapper - * @obj: objective iommu + * omap_iommu_vmalloc - (d)-(p)-(v) address allocator and mapper + * @dev: client device * @da: contiguous iommu virtual memory * @bytes: allocation size * @flags: iovma and page property @@ -688,8 +657,11 @@ EXPORT_SYMBOL_GPL(iommu_vunmap); * Allocate @bytes linearly and creates 1-n-1 mapping and returns * @da again, which might be adjusted if 'IOVMF_DA_FIXED' is not set. */ -u32 iommu_vmalloc(struct iommu *obj, u32 da, size_t bytes, u32 flags) +u32 +omap_iommu_vmalloc(struct iommu_domain *domain, struct device *dev, u32 da, + size_t bytes, u32 flags) { + struct omap_iommu *obj = dev_to_omap_iommu(dev); void *va; struct sg_table *sgt; @@ -712,7 +684,7 @@ u32 iommu_vmalloc(struct iommu *obj, u32 da, size_t bytes, u32 flags) } sgtable_fill_vmalloc(sgt, va); - da = __iommu_vmap(obj, da, sgt, va, bytes, flags); + da = __iommu_vmap(domain, obj, da, sgt, va, bytes, flags); if (IS_ERR_VALUE(da)) goto err_iommu_vmap; @@ -725,158 +697,29 @@ err_sgt_alloc: vfree(va); return da; } -EXPORT_SYMBOL_GPL(iommu_vmalloc); +EXPORT_SYMBOL_GPL(omap_iommu_vmalloc); /** - * iommu_vfree - release memory allocated by 'iommu_vmalloc()' - * @obj: objective iommu + * omap_iommu_vfree - release memory allocated by 'omap_iommu_vmalloc()' + * @dev: client device * @da: iommu device virtual address * * Frees the iommu virtually continuous memory area starting at - * @da, as obtained from 'iommu_vmalloc()'. + * @da, as obtained from 'omap_iommu_vmalloc()'. */ -void iommu_vfree(struct iommu *obj, const u32 da) +void omap_iommu_vfree(struct iommu_domain *domain, struct device *dev, + const u32 da) { + struct omap_iommu *obj = dev_to_omap_iommu(dev); struct sg_table *sgt; - sgt = unmap_vm_area(obj, da, vfree, IOVMF_DISCONT | IOVMF_ALLOC); + sgt = unmap_vm_area(domain, obj, da, vfree, + IOVMF_DISCONT | IOVMF_ALLOC); if (!sgt) dev_dbg(obj->dev, "%s: No sgt\n", __func__); sgtable_free(sgt); } -EXPORT_SYMBOL_GPL(iommu_vfree); - -static u32 __iommu_kmap(struct iommu *obj, u32 da, u32 pa, void *va, - size_t bytes, u32 flags) -{ - struct sg_table *sgt; - - sgt = sgtable_alloc(bytes, flags, da, pa); - if (IS_ERR(sgt)) - return PTR_ERR(sgt); - - sgtable_fill_kmalloc(sgt, pa, da, bytes); - - da = map_iommu_region(obj, da, sgt, va, bytes, flags); - if (IS_ERR_VALUE(da)) { - sgtable_drain_kmalloc(sgt); - sgtable_free(sgt); - } - - return da; -} - -/** - * iommu_kmap - (d)-(p)-(v) address mapper - * @obj: objective iommu - * @da: contiguous iommu virtual memory - * @pa: contiguous physical memory - * @flags: iovma and page property - * - * Creates 1-1-1 mapping and returns @da again, which can be - * adjusted if 'IOVMF_DA_FIXED' is not set. - */ -u32 iommu_kmap(struct iommu *obj, u32 da, u32 pa, size_t bytes, - u32 flags) -{ - void *va; - - if (!obj || !obj->dev || !bytes) - return -EINVAL; - - bytes = PAGE_ALIGN(bytes); - - va = ioremap(pa, bytes); - if (!va) - return -ENOMEM; - - flags |= IOVMF_LINEAR; - flags |= IOVMF_MMIO; - - da = __iommu_kmap(obj, da, pa, va, bytes, flags); - if (IS_ERR_VALUE(da)) - iounmap(va); - - return da; -} -EXPORT_SYMBOL_GPL(iommu_kmap); - -/** - * iommu_kunmap - release virtual mapping obtained by 'iommu_kmap()' - * @obj: objective iommu - * @da: iommu device virtual address - * - * Frees the iommu virtually contiguous memory area starting at - * @da, which was passed to and was returned by'iommu_kmap()'. - */ -void iommu_kunmap(struct iommu *obj, u32 da) -{ - struct sg_table *sgt; - typedef void (*func_t)(const void *); - - sgt = unmap_vm_area(obj, da, (func_t)iounmap, - IOVMF_LINEAR | IOVMF_MMIO); - if (!sgt) - dev_dbg(obj->dev, "%s: No sgt\n", __func__); - sgtable_free(sgt); -} -EXPORT_SYMBOL_GPL(iommu_kunmap); - -/** - * iommu_kmalloc - (d)-(p)-(v) address allocator and mapper - * @obj: objective iommu - * @da: contiguous iommu virtual memory - * @bytes: bytes for allocation - * @flags: iovma and page property - * - * Allocate @bytes linearly and creates 1-1-1 mapping and returns - * @da again, which might be adjusted if 'IOVMF_DA_FIXED' is not set. - */ -u32 iommu_kmalloc(struct iommu *obj, u32 da, size_t bytes, u32 flags) -{ - void *va; - u32 pa; - - if (!obj || !obj->dev || !bytes) - return -EINVAL; - - bytes = PAGE_ALIGN(bytes); - - va = kmalloc(bytes, GFP_KERNEL | GFP_DMA); - if (!va) - return -ENOMEM; - pa = virt_to_phys(va); - - flags |= IOVMF_LINEAR; - flags |= IOVMF_ALLOC; - - da = __iommu_kmap(obj, da, pa, va, bytes, flags); - if (IS_ERR_VALUE(da)) - kfree(va); - - return da; -} -EXPORT_SYMBOL_GPL(iommu_kmalloc); - -/** - * iommu_kfree - release virtual mapping obtained by 'iommu_kmalloc()' - * @obj: objective iommu - * @da: iommu device virtual address - * - * Frees the iommu virtually contiguous memory area starting at - * @da, which was passed to and was returned by'iommu_kmalloc()'. - */ -void iommu_kfree(struct iommu *obj, u32 da) -{ - struct sg_table *sgt; - - sgt = unmap_vm_area(obj, da, kfree, IOVMF_LINEAR | IOVMF_ALLOC); - if (!sgt) - dev_dbg(obj->dev, "%s: No sgt\n", __func__); - sgtable_free(sgt); -} -EXPORT_SYMBOL_GPL(iommu_kfree); - +EXPORT_SYMBOL_GPL(omap_iommu_vfree); static int __init iovmm_init(void) { diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig index 1b4b4214e3b0..38719599a476 100644 --- a/drivers/media/video/Kconfig +++ b/drivers/media/video/Kconfig @@ -764,8 +764,7 @@ source "drivers/media/video/m5mols/Kconfig" config VIDEO_OMAP3 tristate "OMAP 3 Camera support (EXPERIMENTAL)" - select OMAP_IOMMU - depends on VIDEO_V4L2 && I2C && VIDEO_V4L2_SUBDEV_API && ARCH_OMAP3 && EXPERIMENTAL + depends on OMAP_IOVMM && VIDEO_V4L2 && I2C && VIDEO_V4L2_SUBDEV_API && ARCH_OMAP3 && EXPERIMENTAL ---help--- Driver for an OMAP 3 camera controller. diff --git a/drivers/media/video/omap3isp/isp.c b/drivers/media/video/omap3isp/isp.c index 5cea2bbd7014..abc26854e29c 100644 --- a/drivers/media/video/omap3isp/isp.c +++ b/drivers/media/video/omap3isp/isp.c @@ -1107,8 +1107,7 @@ isp_restore_context(struct isp_device *isp, struct isp_reg *reg_list) static void isp_save_ctx(struct isp_device *isp) { isp_save_context(isp, isp_reg_list); - if (isp->iommu) - iommu_save_ctx(isp->iommu); + omap_iommu_save_ctx(isp->dev); } /* @@ -1121,8 +1120,7 @@ static void isp_save_ctx(struct isp_device *isp) static void isp_restore_ctx(struct isp_device *isp) { isp_restore_context(isp, isp_reg_list); - if (isp->iommu) - iommu_restore_ctx(isp->iommu); + omap_iommu_restore_ctx(isp->dev); omap3isp_ccdc_restore_context(isp); omap3isp_preview_restore_context(isp); } @@ -1975,7 +1973,8 @@ static int isp_remove(struct platform_device *pdev) isp_cleanup_modules(isp); omap3isp_get(isp); - iommu_put(isp->iommu); + iommu_detach_device(isp->domain, &pdev->dev); + iommu_domain_free(isp->domain); omap3isp_put(isp); free_irq(isp->irq_num, isp); @@ -2122,26 +2121,31 @@ static int isp_probe(struct platform_device *pdev) } } - /* IOMMU */ - isp->iommu = iommu_get("isp"); - if (IS_ERR_OR_NULL(isp->iommu)) { - isp->iommu = NULL; - ret = -ENODEV; + isp->domain = iommu_domain_alloc(pdev->dev.bus); + if (!isp->domain) { + dev_err(isp->dev, "can't alloc iommu domain\n"); + ret = -ENOMEM; goto error_isp; } + ret = iommu_attach_device(isp->domain, &pdev->dev); + if (ret) { + dev_err(&pdev->dev, "can't attach iommu device: %d\n", ret); + goto free_domain; + } + /* Interrupt */ isp->irq_num = platform_get_irq(pdev, 0); if (isp->irq_num <= 0) { dev_err(isp->dev, "No IRQ resource\n"); ret = -ENODEV; - goto error_isp; + goto detach_dev; } if (request_irq(isp->irq_num, isp_isr, IRQF_SHARED, "OMAP3 ISP", isp)) { dev_err(isp->dev, "Unable to request IRQ\n"); ret = -EINVAL; - goto error_isp; + goto detach_dev; } /* Entities */ @@ -2162,8 +2166,11 @@ error_modules: isp_cleanup_modules(isp); error_irq: free_irq(isp->irq_num, isp); +detach_dev: + iommu_detach_device(isp->domain, &pdev->dev); +free_domain: + iommu_domain_free(isp->domain); error_isp: - iommu_put(isp->iommu); omap3isp_put(isp); error: isp_put_clocks(isp); diff --git a/drivers/media/video/omap3isp/isp.h b/drivers/media/video/omap3isp/isp.h index 529e582ef948..60e0e29e3edc 100644 --- a/drivers/media/video/omap3isp/isp.h +++ b/drivers/media/video/omap3isp/isp.h @@ -32,6 +32,7 @@ #include <linux/io.h> #include <linux/platform_device.h> #include <linux/wait.h> +#include <linux/iommu.h> #include <plat/iommu.h> #include <plat/iovmm.h> @@ -294,7 +295,7 @@ struct isp_device { unsigned int sbl_resources; unsigned int subclk_resources; - struct iommu *iommu; + struct iommu_domain *domain; struct isp_platform_callback platform_cb; }; diff --git a/drivers/media/video/omap3isp/ispccdc.c b/drivers/media/video/omap3isp/ispccdc.c index 80796eb0c53e..8748e0855c61 100644 --- a/drivers/media/video/omap3isp/ispccdc.c +++ b/drivers/media/video/omap3isp/ispccdc.c @@ -366,7 +366,7 @@ static void ccdc_lsc_free_request(struct isp_ccdc_device *ccdc, dma_unmap_sg(isp->dev, req->iovm->sgt->sgl, req->iovm->sgt->nents, DMA_TO_DEVICE); if (req->table) - iommu_vfree(isp->iommu, req->table); + omap_iommu_vfree(isp->domain, isp->dev, req->table); kfree(req); } @@ -438,15 +438,15 @@ static int ccdc_lsc_config(struct isp_ccdc_device *ccdc, req->enable = 1; - req->table = iommu_vmalloc(isp->iommu, 0, req->config.size, - IOMMU_FLAG); + req->table = omap_iommu_vmalloc(isp->domain, isp->dev, 0, + req->config.size, IOMMU_FLAG); if (IS_ERR_VALUE(req->table)) { req->table = 0; ret = -ENOMEM; goto done; } - req->iovm = find_iovm_area(isp->iommu, req->table); + req->iovm = omap_find_iovm_area(isp->dev, req->table); if (req->iovm == NULL) { ret = -ENOMEM; goto done; @@ -462,7 +462,7 @@ static int ccdc_lsc_config(struct isp_ccdc_device *ccdc, dma_sync_sg_for_cpu(isp->dev, req->iovm->sgt->sgl, req->iovm->sgt->nents, DMA_TO_DEVICE); - table = da_to_va(isp->iommu, req->table); + table = omap_da_to_va(isp->dev, req->table); if (copy_from_user(table, config->lsc, req->config.size)) { ret = -EFAULT; goto done; @@ -731,18 +731,19 @@ static int ccdc_config(struct isp_ccdc_device *ccdc, /* * table_new must be 64-bytes aligned, but it's - * already done by iommu_vmalloc(). + * already done by omap_iommu_vmalloc(). */ size = ccdc->fpc.fpnum * 4; - table_new = iommu_vmalloc(isp->iommu, 0, size, - IOMMU_FLAG); + table_new = omap_iommu_vmalloc(isp->domain, isp->dev, + 0, size, IOMMU_FLAG); if (IS_ERR_VALUE(table_new)) return -ENOMEM; - if (copy_from_user(da_to_va(isp->iommu, table_new), + if (copy_from_user(omap_da_to_va(isp->dev, table_new), (__force void __user *) ccdc->fpc.fpcaddr, size)) { - iommu_vfree(isp->iommu, table_new); + omap_iommu_vfree(isp->domain, isp->dev, + table_new); return -EFAULT; } @@ -752,7 +753,7 @@ static int ccdc_config(struct isp_ccdc_device *ccdc, ccdc_configure_fpc(ccdc); if (table_old != 0) - iommu_vfree(isp->iommu, table_old); + omap_iommu_vfree(isp->domain, isp->dev, table_old); } return ccdc_lsc_config(ccdc, ccdc_struct); @@ -1405,11 +1406,14 @@ static int __ccdc_handle_stopping(struct isp_ccdc_device *ccdc, u32 event) static void ccdc_hs_vs_isr(struct isp_ccdc_device *ccdc) { + struct isp_pipeline *pipe = + to_isp_pipeline(&ccdc->video_out.video.entity); struct video_device *vdev = &ccdc->subdev.devnode; struct v4l2_event event; memset(&event, 0, sizeof(event)); - event.type = V4L2_EVENT_OMAP3ISP_HS_VS; + event.type = V4L2_EVENT_FRAME_SYNC; + event.u.frame_sync.frame_sequence = atomic_read(&pipe->frame_number); v4l2_event_queue(vdev, &event); } @@ -1691,7 +1695,11 @@ static long ccdc_ioctl(struct v4l2_subdev *sd, unsigned int cmd, void *arg) static int ccdc_subscribe_event(struct v4l2_subdev *sd, struct v4l2_fh *fh, struct v4l2_event_subscription *sub) { - if (sub->type != V4L2_EVENT_OMAP3ISP_HS_VS) + if (sub->type != V4L2_EVENT_FRAME_SYNC) + return -EINVAL; + + /* line number is zero at frame start */ + if (sub->id != 0) return -EINVAL; return v4l2_event_subscribe(fh, sub, OMAP3ISP_CCDC_NEVENTS); @@ -1828,7 +1836,7 @@ ccdc_try_format(struct isp_ccdc_device *ccdc, struct v4l2_subdev_fh *fh, * callers to request an output size bigger than the input size * up to the nearest multiple of 16. */ - fmt->width = clamp_t(u32, width, 32, (fmt->width + 15) & ~15); + fmt->width = clamp_t(u32, width, 32, fmt->width + 15); fmt->width &= ~15; fmt->height = clamp_t(u32, height, 32, fmt->height); break; @@ -2144,6 +2152,37 @@ static const struct media_entity_operations ccdc_media_ops = { .link_setup = ccdc_link_setup, }; +void omap3isp_ccdc_unregister_entities(struct isp_ccdc_device *ccdc) +{ + v4l2_device_unregister_subdev(&ccdc->subdev); + omap3isp_video_unregister(&ccdc->video_out); +} + +int omap3isp_ccdc_register_entities(struct isp_ccdc_device *ccdc, + struct v4l2_device *vdev) +{ + int ret; + + /* Register the subdev and video node. */ + ret = v4l2_device_register_subdev(vdev, &ccdc->subdev); + if (ret < 0) + goto error; + + ret = omap3isp_video_register(&ccdc->video_out, vdev); + if (ret < 0) + goto error; + + return 0; + +error: + omap3isp_ccdc_unregister_entities(ccdc); + return ret; +} + +/* ----------------------------------------------------------------------------- + * ISP CCDC initialisation and cleanup + */ + /* * ccdc_init_entities - Initialize V4L2 subdev and media entity * @ccdc: ISP CCDC module @@ -2185,50 +2224,23 @@ static int ccdc_init_entities(struct isp_ccdc_device *ccdc) ret = omap3isp_video_init(&ccdc->video_out, "CCDC"); if (ret < 0) - return ret; + goto error_video; /* Connect the CCDC subdev to the video node. */ ret = media_entity_create_link(&ccdc->subdev.entity, CCDC_PAD_SOURCE_OF, &ccdc->video_out.video.entity, 0, 0); if (ret < 0) - return ret; - - return 0; -} - -void omap3isp_ccdc_unregister_entities(struct isp_ccdc_device *ccdc) -{ - media_entity_cleanup(&ccdc->subdev.entity); - - v4l2_device_unregister_subdev(&ccdc->subdev); - omap3isp_video_unregister(&ccdc->video_out); -} - -int omap3isp_ccdc_register_entities(struct isp_ccdc_device *ccdc, - struct v4l2_device *vdev) -{ - int ret; - - /* Register the subdev and video node. */ - ret = v4l2_device_register_subdev(vdev, &ccdc->subdev); - if (ret < 0) - goto error; - - ret = omap3isp_video_register(&ccdc->video_out, vdev); - if (ret < 0) - goto error; + goto error_link; return 0; -error: - omap3isp_ccdc_unregister_entities(ccdc); +error_link: + omap3isp_video_cleanup(&ccdc->video_out); +error_video: + media_entity_cleanup(me); return ret; } -/* ----------------------------------------------------------------------------- - * ISP CCDC initialisation and cleanup - */ - /* * omap3isp_ccdc_init - CCDC module initialization. * @dev: Device pointer specific to the OMAP3 ISP. @@ -2240,6 +2252,7 @@ error: int omap3isp_ccdc_init(struct isp_device *isp) { struct isp_ccdc_device *ccdc = &isp->isp_ccdc; + int ret; spin_lock_init(&ccdc->lock); init_waitqueue_head(&ccdc->wait); @@ -2268,7 +2281,13 @@ int omap3isp_ccdc_init(struct isp_device *isp) ccdc->update = OMAP3ISP_CCDC_BLCLAMP; ccdc_apply_controls(ccdc); - return ccdc_init_entities(ccdc); + ret = ccdc_init_entities(ccdc); + if (ret < 0) { + mutex_destroy(&ccdc->ioctl_lock); + return ret; + } + + return 0; } /* @@ -2279,6 +2298,9 @@ void omap3isp_ccdc_cleanup(struct isp_device *isp) { struct isp_ccdc_device *ccdc = &isp->isp_ccdc; + omap3isp_video_cleanup(&ccdc->video_out); + media_entity_cleanup(&ccdc->subdev.entity); + /* Free LSC requests. As the CCDC is stopped there's no active request, * so only the pending request and the free queue need to be handled. */ @@ -2287,5 +2309,7 @@ void omap3isp_ccdc_cleanup(struct isp_device *isp) ccdc_lsc_free_queue(ccdc, &ccdc->lsc.free_queue); if (ccdc->fpc.fpcaddr != 0) - iommu_vfree(isp->iommu, ccdc->fpc.fpcaddr); + omap_iommu_vfree(isp->domain, isp->dev, ccdc->fpc.fpcaddr); + + mutex_destroy(&ccdc->ioctl_lock); } diff --git a/drivers/media/video/omap3isp/ispstat.c b/drivers/media/video/omap3isp/ispstat.c index 808065948ac1..036fc938bbb5 100644 --- a/drivers/media/video/omap3isp/ispstat.c +++ b/drivers/media/video/omap3isp/ispstat.c @@ -366,7 +366,8 @@ static void isp_stat_bufs_free(struct ispstat *stat) dma_unmap_sg(isp->dev, buf->iovm->sgt->sgl, buf->iovm->sgt->nents, DMA_FROM_DEVICE); - iommu_vfree(isp->iommu, buf->iommu_addr); + omap_iommu_vfree(isp->domain, isp->dev, + buf->iommu_addr); } else { if (!buf->virt_addr) continue; @@ -399,8 +400,8 @@ static int isp_stat_bufs_alloc_iommu(struct ispstat *stat, unsigned int size) struct iovm_struct *iovm; WARN_ON(buf->dma_addr); - buf->iommu_addr = iommu_vmalloc(isp->iommu, 0, size, - IOMMU_FLAG); + buf->iommu_addr = omap_iommu_vmalloc(isp->domain, isp->dev, 0, + size, IOMMU_FLAG); if (IS_ERR((void *)buf->iommu_addr)) { dev_err(stat->isp->dev, "%s: Can't acquire memory for " @@ -409,7 +410,7 @@ static int isp_stat_bufs_alloc_iommu(struct ispstat *stat, unsigned int size) return -ENOMEM; } - iovm = find_iovm_area(isp->iommu, buf->iommu_addr); + iovm = omap_find_iovm_area(isp->dev, buf->iommu_addr); if (!iovm || !dma_map_sg(isp->dev, iovm->sgt->sgl, iovm->sgt->nents, DMA_FROM_DEVICE)) { @@ -418,7 +419,7 @@ static int isp_stat_bufs_alloc_iommu(struct ispstat *stat, unsigned int size) } buf->iovm = iovm; - buf->virt_addr = da_to_va(stat->isp->iommu, + buf->virt_addr = omap_da_to_va(stat->isp->dev, (u32)buf->iommu_addr); buf->empty = 1; dev_dbg(stat->isp->dev, "%s: buffer[%d] allocated." diff --git a/drivers/media/video/omap3isp/ispvideo.c b/drivers/media/video/omap3isp/ispvideo.c index fd965adfd597..6d1d55b7c72c 100644 --- a/drivers/media/video/omap3isp/ispvideo.c +++ b/drivers/media/video/omap3isp/ispvideo.c @@ -446,7 +446,7 @@ ispmmu_vmap(struct isp_device *isp, const struct scatterlist *sglist, int sglen) sgt->nents = sglen; sgt->orig_nents = sglen; - da = iommu_vmap(isp->iommu, 0, sgt, IOMMU_FLAG); + da = omap_iommu_vmap(isp->domain, isp->dev, 0, sgt, IOMMU_FLAG); if (IS_ERR_VALUE(da)) kfree(sgt); @@ -462,7 +462,7 @@ static void ispmmu_vunmap(struct isp_device *isp, dma_addr_t da) { struct sg_table *sgt; - sgt = iommu_vunmap(isp->iommu, (u32)da); + sgt = omap_iommu_vunmap(isp->domain, isp->dev, (u32)da); kfree(sgt); } diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index cec462927317..37d35c938b5a 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2788,7 +2788,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE823, ricoh_ DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE823, ricoh_mmc_fixup_r5c832); #endif /*CONFIG_MMC_RICOH_MMC*/ -#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP) +#ifdef CONFIG_DMAR_TABLE #define VTUNCERRMSK_REG 0x1ac #define VTD_MSK_SPEC_ERRORS (1 << 31) /* diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index a6863a2dec1f..ef00610837d4 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -20,12 +20,148 @@ #ifndef _ASM_X86_AMD_IOMMU_H #define _ASM_X86_AMD_IOMMU_H -#include <linux/irqreturn.h> +#include <linux/types.h> #ifdef CONFIG_AMD_IOMMU +struct task_struct; +struct pci_dev; + extern int amd_iommu_detect(void); + +/** + * amd_iommu_enable_device_erratum() - Enable erratum workaround for device + * in the IOMMUv2 driver + * @pdev: The PCI device the workaround is necessary for + * @erratum: The erratum workaround to enable + * + * The function needs to be called before amd_iommu_init_device(). + * Possible values for the erratum number are for now: + * - AMD_PRI_DEV_ERRATUM_ENABLE_RESET - Reset PRI capability when PRI + * is enabled + * - AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE - Limit number of outstanding PRI + * requests to one + */ +#define AMD_PRI_DEV_ERRATUM_ENABLE_RESET 0 +#define AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE 1 + +extern void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum); + +/** + * amd_iommu_init_device() - Init device for use with IOMMUv2 driver + * @pdev: The PCI device to initialize + * @pasids: Number of PASIDs to support for this device + * + * This function does all setup for the device pdev so that it can be + * used with IOMMUv2. + * Returns 0 on success or negative value on error. + */ +extern int amd_iommu_init_device(struct pci_dev *pdev, int pasids); + +/** + * amd_iommu_free_device() - Free all IOMMUv2 related device resources + * and disable IOMMUv2 usage for this device + * @pdev: The PCI device to disable IOMMUv2 usage for' + */ +extern void amd_iommu_free_device(struct pci_dev *pdev); + +/** + * amd_iommu_bind_pasid() - Bind a given task to a PASID on a device + * @pdev: The PCI device to bind the task to + * @pasid: The PASID on the device the task should be bound to + * @task: the task to bind + * + * The function returns 0 on success or a negative value on error. + */ +extern int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, + struct task_struct *task); + +/** + * amd_iommu_unbind_pasid() - Unbind a PASID from its task on + * a device + * @pdev: The device of the PASID + * @pasid: The PASID to unbind + * + * When this function returns the device is no longer using the PASID + * and the PASID is no longer bound to its task. + */ +extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid); + +/** + * amd_iommu_set_invalid_ppr_cb() - Register a call-back for failed + * PRI requests + * @pdev: The PCI device the call-back should be registered for + * @cb: The call-back function + * + * The IOMMUv2 driver invokes this call-back when it is unable to + * successfully handle a PRI request. The device driver can then decide + * which PRI response the device should see. Possible return values for + * the call-back are: + * + * - AMD_IOMMU_INV_PRI_RSP_SUCCESS - Send SUCCESS back to the device + * - AMD_IOMMU_INV_PRI_RSP_INVALID - Send INVALID back to the device + * - AMD_IOMMU_INV_PRI_RSP_FAIL - Send Failure back to the device, + * the device is required to disable + * PRI when it receives this response + * + * The function returns 0 on success or negative value on error. + */ +#define AMD_IOMMU_INV_PRI_RSP_SUCCESS 0 +#define AMD_IOMMU_INV_PRI_RSP_INVALID 1 +#define AMD_IOMMU_INV_PRI_RSP_FAIL 2 + +typedef int (*amd_iommu_invalid_ppr_cb)(struct pci_dev *pdev, + int pasid, + unsigned long address, + u16); + +extern int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev, + amd_iommu_invalid_ppr_cb cb); + +/** + * amd_iommu_device_info() - Get information about IOMMUv2 support of a + * PCI device + * @pdev: PCI device to query information from + * @info: A pointer to an amd_iommu_device_info structure which will contain + * the information about the PCI device + * + * Returns 0 on success, negative value on error + */ + +#define AMD_IOMMU_DEVICE_FLAG_ATS_SUP 0x1 /* ATS feature supported */ +#define AMD_IOMMU_DEVICE_FLAG_PRI_SUP 0x2 /* PRI feature supported */ +#define AMD_IOMMU_DEVICE_FLAG_PASID_SUP 0x4 /* PASID context supported */ +#define AMD_IOMMU_DEVICE_FLAG_EXEC_SUP 0x8 /* Device may request execution + on memory pages */ +#define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP 0x10 /* Device may request + super-user privileges */ + +struct amd_iommu_device_info { + int max_pasids; + u32 flags; +}; + +extern int amd_iommu_device_info(struct pci_dev *pdev, + struct amd_iommu_device_info *info); + +/** + * amd_iommu_set_invalidate_ctx_cb() - Register a call-back for invalidating + * a pasid context. This call-back is + * invoked when the IOMMUv2 driver needs to + * invalidate a PASID context, for example + * because the task that is bound to that + * context is about to exit. + * + * @pdev: The PCI device the call-back should be registered for + * @cb: The call-back function + */ + +typedef void (*amd_iommu_invalidate_ctx)(struct pci_dev *pdev, int pasid); + +extern int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev, + amd_iommu_invalidate_ctx cb); + #else static inline int amd_iommu_detect(void) { return -ENODEV; } diff --git a/include/linux/device.h b/include/linux/device.h index c20dfbfc49b4..e838e143baa7 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -33,6 +33,7 @@ struct class; struct subsys_private; struct bus_type; struct device_node; +struct iommu_ops; struct bus_attribute { struct attribute attr; @@ -67,6 +68,9 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *); * @resume: Called to bring a device on this bus out of sleep mode. * @pm: Power management operations of this bus, callback the specific * device driver's pm-ops. + * @iommu_ops IOMMU specific operations for this bus, used to attach IOMMU + * driver implementations to a bus and allow the driver to do + * bus-specific setup * @p: The private data of the driver core, only the driver core can * touch this. * @@ -96,6 +100,8 @@ struct bus_type { const struct dev_pm_ops *pm; + struct iommu_ops *iommu_ops; + struct subsys_private *p; }; diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index bbd8661b3473..57c9a8ae4f2d 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -25,11 +25,13 @@ struct intel_iommu; struct dmar_domain; struct root_entry; -extern void free_dmar_iommu(struct intel_iommu *iommu); -#ifdef CONFIG_DMAR +#ifdef CONFIG_INTEL_IOMMU +extern void free_dmar_iommu(struct intel_iommu *iommu); extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); +extern int dmar_disabled; +extern int intel_iommu_enabled; #else static inline int iommu_calculate_agaw(struct intel_iommu *iommu) { @@ -39,8 +41,12 @@ static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu) { return 0; } +static inline void free_dmar_iommu(struct intel_iommu *iommu) +{ +} +#define dmar_disabled (1) +#define intel_iommu_enabled (0) #endif -extern int dmar_disabled; #endif diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 7b776d71d36d..a8b1a847c103 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -26,8 +26,13 @@ #include <linux/msi.h> #include <linux/irqreturn.h> +/* DMAR Flags */ +#define DMAR_INTR_REMAP 0x1 +#define DMAR_X2APIC_OPT_OUT 0x2 + struct intel_iommu; -#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP) +#ifdef CONFIG_DMAR_TABLE +extern struct acpi_table_header *dmar_tbl; struct dmar_drhd_unit { struct list_head list; /* list of drhd units */ struct acpi_dmar_header *hdr; /* ACPI header */ @@ -76,7 +81,7 @@ static inline int enable_drhd_fault_handling(void) { return -1; } -#endif /* !CONFIG_DMAR && !CONFIG_INTR_REMAP */ +#endif /* !CONFIG_DMAR_TABLE */ struct irte { union { @@ -107,10 +112,10 @@ struct irte { }; }; -#ifdef CONFIG_INTR_REMAP +#ifdef CONFIG_IRQ_REMAP extern int intr_remapping_enabled; extern int intr_remapping_supported(void); -extern int enable_intr_remapping(int); +extern int enable_intr_remapping(void); extern void disable_intr_remapping(void); extern int reenable_intr_remapping(int); @@ -177,7 +182,7 @@ static inline int set_msi_sid(struct irte *irte, struct pci_dev *dev) #define intr_remapping_enabled (0) -static inline int enable_intr_remapping(int eim) +static inline int enable_intr_remapping(void) { return -1; } @@ -192,6 +197,11 @@ static inline int reenable_intr_remapping(int eim) } #endif +enum { + IRQ_REMAP_XAPIC_MODE, + IRQ_REMAP_X2APIC_MODE, +}; + /* Can't use the common MSI interrupt functions * since DMAR is not a pci device */ @@ -204,7 +214,7 @@ extern int dmar_set_interrupt(struct intel_iommu *iommu); extern irqreturn_t dmar_fault(int irq, void *dev_id); extern int arch_setup_dmar_msi(unsigned int irq); -#ifdef CONFIG_DMAR +#ifdef CONFIG_INTEL_IOMMU extern int iommu_detected, no_iommu; extern struct list_head dmar_rmrr_units; struct dmar_rmrr_unit { @@ -227,9 +237,26 @@ struct dmar_atsr_unit { u8 include_all:1; /* include all ports */ }; +int dmar_parse_rmrr_atsr_dev(void); +extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header); +extern int dmar_parse_one_atsr(struct acpi_dmar_header *header); +extern int dmar_parse_dev_scope(void *start, void *end, int *cnt, + struct pci_dev ***devices, u16 segment); extern int intel_iommu_init(void); -#else /* !CONFIG_DMAR: */ +#else /* !CONFIG_INTEL_IOMMU: */ static inline int intel_iommu_init(void) { return -ENODEV; } -#endif /* CONFIG_DMAR */ +static inline int dmar_parse_one_rmrr(struct acpi_dmar_header *header) +{ + return 0; +} +static inline int dmar_parse_one_atsr(struct acpi_dmar_header *header) +{ + return 0; +} +static inline int dmar_parse_rmrr_atsr_dev(void) +{ + return 0; +} +#endif /* CONFIG_INTEL_IOMMU */ #endif /* __DMAR_H__ */ diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 9310c699a37d..e6ca56de9936 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -271,7 +271,7 @@ struct qi_desc { }; struct q_inval { - spinlock_t q_lock; + raw_spinlock_t q_lock; struct qi_desc *desc; /* invalidation queue */ int *desc_status; /* desc status */ int free_head; /* first free entry */ @@ -279,7 +279,7 @@ struct q_inval { int free_cnt; }; -#ifdef CONFIG_INTR_REMAP +#ifdef CONFIG_IRQ_REMAP /* 1MB - maximum possible interrupt remapping table size */ #define INTR_REMAP_PAGE_ORDER 8 #define INTR_REMAP_TABLE_REG_SIZE 0xf @@ -311,14 +311,14 @@ struct intel_iommu { u64 cap; u64 ecap; u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ - spinlock_t register_lock; /* protect register handling */ + raw_spinlock_t register_lock; /* protect register handling */ int seq_id; /* sequence id of the iommu */ int agaw; /* agaw of this iommu */ int msagaw; /* max sagaw of this iommu */ unsigned int irq; unsigned char name[13]; /* Device Name */ -#ifdef CONFIG_DMAR +#ifdef CONFIG_INTEL_IOMMU unsigned long *domain_ids; /* bitmap of domains */ struct dmar_domain **domains; /* ptr to domains */ spinlock_t lock; /* protect context, domain ids */ @@ -329,7 +329,7 @@ struct intel_iommu { struct q_inval *qi; /* Queued invalidation info */ u32 *iommu_state; /* Store iommu states between suspend and resume.*/ -#ifdef CONFIG_INTR_REMAP +#ifdef CONFIG_IRQ_REMAP struct ir_table *ir_table; /* Interrupt remapping info */ #endif int node; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 9940319d6f9d..d937580417ba 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -25,61 +25,128 @@ #define IOMMU_WRITE (2) #define IOMMU_CACHE (4) /* DMA cache coherency */ +struct iommu_ops; +struct bus_type; struct device; +struct iommu_domain; + +/* iommu fault flags */ +#define IOMMU_FAULT_READ 0x0 +#define IOMMU_FAULT_WRITE 0x1 + +typedef int (*iommu_fault_handler_t)(struct iommu_domain *, + struct device *, unsigned long, int); struct iommu_domain { + struct iommu_ops *ops; void *priv; + iommu_fault_handler_t handler; }; #define IOMMU_CAP_CACHE_COHERENCY 0x1 #define IOMMU_CAP_INTR_REMAP 0x2 /* isolates device intrs */ +#ifdef CONFIG_IOMMU_API + +/** + * struct iommu_ops - iommu ops and capabilities + * @domain_init: init iommu domain + * @domain_destroy: destroy iommu domain + * @attach_dev: attach device to an iommu domain + * @detach_dev: detach device from an iommu domain + * @map: map a physically contiguous memory region to an iommu domain + * @unmap: unmap a physically contiguous memory region from an iommu domain + * @iova_to_phys: translate iova to physical address + * @domain_has_cap: domain capabilities query + * @commit: commit iommu domain + * @pgsize_bitmap: bitmap of supported page sizes + */ struct iommu_ops { int (*domain_init)(struct iommu_domain *domain); void (*domain_destroy)(struct iommu_domain *domain); int (*attach_dev)(struct iommu_domain *domain, struct device *dev); void (*detach_dev)(struct iommu_domain *domain, struct device *dev); int (*map)(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, int gfp_order, int prot); - int (*unmap)(struct iommu_domain *domain, unsigned long iova, - int gfp_order); + phys_addr_t paddr, size_t size, int prot); + size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, + size_t size); phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, unsigned long iova); int (*domain_has_cap)(struct iommu_domain *domain, unsigned long cap); + int (*device_group)(struct device *dev, unsigned int *groupid); + unsigned long pgsize_bitmap; }; -#ifdef CONFIG_IOMMU_API - -extern void register_iommu(struct iommu_ops *ops); -extern bool iommu_found(void); -extern struct iommu_domain *iommu_domain_alloc(void); +extern int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops); +extern bool iommu_present(struct bus_type *bus); +extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus); extern void iommu_domain_free(struct iommu_domain *domain); extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); extern void iommu_detach_device(struct iommu_domain *domain, struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, int gfp_order, int prot); -extern int iommu_unmap(struct iommu_domain *domain, unsigned long iova, - int gfp_order); + phys_addr_t paddr, size_t size, int prot); +extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, + size_t size); extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, unsigned long iova); extern int iommu_domain_has_cap(struct iommu_domain *domain, unsigned long cap); +extern void iommu_set_fault_handler(struct iommu_domain *domain, + iommu_fault_handler_t handler); +extern int iommu_device_group(struct device *dev, unsigned int *groupid); + +/** + * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework + * @domain: the iommu domain where the fault has happened + * @dev: the device where the fault has happened + * @iova: the faulting address + * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...) + * + * This function should be called by the low-level IOMMU implementations + * whenever IOMMU faults happen, to allow high-level users, that are + * interested in such events, to know about them. + * + * This event may be useful for several possible use cases: + * - mere logging of the event + * - dynamic TLB/PTE loading + * - if restarting of the faulting device is required + * + * Returns 0 on success and an appropriate error code otherwise (if dynamic + * PTE/TLB loading will one day be supported, implementations will be able + * to tell whether it succeeded or not according to this return value). + * + * Specifically, -ENOSYS is returned if a fault handler isn't installed + * (though fault handlers can also return -ENOSYS, in case they want to + * elicit the default behavior of the IOMMU drivers). + */ +static inline int report_iommu_fault(struct iommu_domain *domain, + struct device *dev, unsigned long iova, int flags) +{ + int ret = -ENOSYS; -#else /* CONFIG_IOMMU_API */ + /* + * if upper layers showed interest and installed a fault handler, + * invoke it. + */ + if (domain->handler) + ret = domain->handler(domain, dev, iova, flags); -static inline void register_iommu(struct iommu_ops *ops) -{ + return ret; } -static inline bool iommu_found(void) +#else /* CONFIG_IOMMU_API */ + +struct iommu_ops {}; + +static inline bool iommu_present(struct bus_type *bus) { return false; } -static inline struct iommu_domain *iommu_domain_alloc(void) +static inline struct iommu_domain *iommu_domain_alloc(struct bus_type *bus) { return NULL; } @@ -123,6 +190,16 @@ static inline int domain_has_cap(struct iommu_domain *domain, return 0; } +static inline void iommu_set_fault_handler(struct iommu_domain *domain, + iommu_fault_handler_t handler) +{ +} + +static inline int iommu_device_group(struct device *dev, unsigned int *groupid) +{ + return -ENODEV; +} + #endif /* CONFIG_IOMMU_API */ #endif /* __LINUX_IOMMU_H */ diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 78c80f67f535..511e160f7062 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -111,7 +111,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) /* Map into IO address space */ r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn), - get_order(page_size), flags); + page_size, flags); if (r) { printk(KERN_ERR "kvm_iommu_map_address:" "iommu failed to map pfn=%llx\n", pfn); @@ -228,12 +228,12 @@ int kvm_iommu_map_guest(struct kvm *kvm) { int r; - if (!iommu_found()) { + if (!iommu_present(&pci_bus_type)) { printk(KERN_ERR "%s: iommu not found\n", __func__); return -ENODEV; } - kvm->arch.iommu_domain = iommu_domain_alloc(); + kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type); if (!kvm->arch.iommu_domain) return -ENOMEM; @@ -286,15 +286,15 @@ static void kvm_iommu_put_pages(struct kvm *kvm, while (gfn < end_gfn) { unsigned long unmap_pages; - int order; + size_t size; /* Get physical address */ phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); pfn = phys >> PAGE_SHIFT; /* Unmap address from IO address space */ - order = iommu_unmap(domain, gfn_to_gpa(gfn), 0); - unmap_pages = 1ULL << order; + size = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE); + unmap_pages = 1ULL << get_order(size); /* Unpin all pages we just unmapped to not leak any memory */ kvm_unpin_pages(kvm, pfn, unmap_pages); |