summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/amd_iommu.h1
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h12
-rw-r--r--arch/x86/kernel/amd_iommu.c261
-rw-r--r--arch/x86/kernel/amd_iommu_init.c42
-rw-r--r--arch/x86/kernel/pci-dma.c9
6 files changed, 266 insertions, 60 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 13ffa5df37d7..1d9c18aa17eb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -586,7 +586,6 @@ config GART_IOMMU
bool "GART IOMMU support" if EMBEDDED
default y
select SWIOTLB
- select AGP
depends on X86_64 && PCI
---help---
Support for full DMA access of devices with 32bit memory access only
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index bdf96f119f06..ac95995b7bad 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -25,6 +25,7 @@
#ifdef CONFIG_AMD_IOMMU
extern int amd_iommu_init(void);
extern int amd_iommu_init_dma_ops(void);
+extern int amd_iommu_init_passthrough(void);
extern void amd_iommu_detect(void);
extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
extern void amd_iommu_flush_all_domains(void);
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 9e8bb9746dcf..2a2cc7a78a81 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -143,6 +143,7 @@
#define EVT_BUFFER_SIZE 8192 /* 512 entries */
#define EVT_LEN_MASK (0x9ULL << 56)
+#define PAGE_MODE_NONE 0x00
#define PAGE_MODE_1_LEVEL 0x01
#define PAGE_MODE_2_LEVEL 0x02
#define PAGE_MODE_3_LEVEL 0x03
@@ -195,11 +196,14 @@
#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */
#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops
domain for an IOMMU */
+#define PD_PASSTHROUGH_MASK (1UL << 2) /* domain has no page
+ translation */
+
extern bool amd_iommu_dump;
#define DUMP_printk(format, arg...) \
do { \
if (amd_iommu_dump) \
- printk(KERN_INFO "AMD IOMMU: " format, ## arg); \
+ printk(KERN_INFO "AMD-Vi: " format, ## arg); \
} while(0);
/*
@@ -339,6 +343,9 @@ struct amd_iommu {
/* if one, we need to send a completion wait command */
bool need_sync;
+ /* becomes true if a command buffer reset is running */
+ bool reset_in_progress;
+
/* default dma_ops domain for that IOMMU */
struct dma_ops_domain *default_dom;
};
@@ -459,4 +466,7 @@ static inline void amd_iommu_stats_init(void) { }
#endif /* CONFIG_AMD_IOMMU_STATS */
+/* some function prototypes */
+extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
+
#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 45be9499c973..98f230f6a28d 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -41,9 +41,13 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock);
static LIST_HEAD(iommu_pd_list);
static DEFINE_SPINLOCK(iommu_pd_list_lock);
-#ifdef CONFIG_IOMMU_API
+/*
+ * Domain for untranslated devices - only allocated
+ * if iommu=pt passed on kernel cmd line.
+ */
+static struct protection_domain *pt_domain;
+
static struct iommu_ops amd_iommu_ops;
-#endif
/*
* general struct to manage commands send to an IOMMU
@@ -61,14 +65,11 @@ static u64 *alloc_pte(struct protection_domain *domain,
static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
unsigned long start_page,
unsigned int pages);
+static void reset_iommu_command_buffer(struct amd_iommu *iommu);
static u64 *fetch_pte(struct protection_domain *domain,
unsigned long address, int map_size);
static void update_domain(struct protection_domain *domain);
-#ifndef BUS_NOTIFY_UNBOUND_DRIVER
-#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005
-#endif
-
#ifdef CONFIG_AMD_IOMMU_STATS
/*
@@ -141,7 +142,25 @@ static int iommu_has_npcache(struct amd_iommu *iommu)
*
****************************************************************************/
-static void iommu_print_event(void *__evt)
+static void dump_dte_entry(u16 devid)
+{
+ int i;
+
+ for (i = 0; i < 8; ++i)
+ pr_err("AMD-Vi: DTE[%d]: %08x\n", i,
+ amd_iommu_dev_table[devid].data[i]);
+}
+
+static void dump_command(unsigned long phys_addr)
+{
+ struct iommu_cmd *cmd = phys_to_virt(phys_addr);
+ int i;
+
+ for (i = 0; i < 4; ++i)
+ pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
+}
+
+static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
{
u32 *event = __evt;
int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK;
@@ -150,7 +169,7 @@ static void iommu_print_event(void *__evt)
int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
u64 address = (u64)(((u64)event[3]) << 32) | event[2];
- printk(KERN_ERR "AMD IOMMU: Event logged [");
+ printk(KERN_ERR "AMD-Vi: Event logged [");
switch (type) {
case EVENT_TYPE_ILL_DEV:
@@ -158,6 +177,7 @@ static void iommu_print_event(void *__evt)
"address=0x%016llx flags=0x%04x]\n",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
address, flags);
+ dump_dte_entry(devid);
break;
case EVENT_TYPE_IO_FAULT:
printk("IO_PAGE_FAULT device=%02x:%02x.%x "
@@ -179,6 +199,8 @@ static void iommu_print_event(void *__evt)
break;
case EVENT_TYPE_ILL_CMD:
printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
+ reset_iommu_command_buffer(iommu);
+ dump_command(address);
break;
case EVENT_TYPE_CMD_HARD_ERR:
printk("COMMAND_HARDWARE_ERROR address=0x%016llx "
@@ -212,7 +234,7 @@ static void iommu_poll_events(struct amd_iommu *iommu)
tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
while (head != tail) {
- iommu_print_event(iommu->evt_buf + head);
+ iommu_print_event(iommu, iommu->evt_buf + head);
head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
}
@@ -299,8 +321,11 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu)
status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
- if (unlikely(i == EXIT_LOOP_COUNT))
- panic("AMD IOMMU: Completion wait loop failed\n");
+ if (unlikely(i == EXIT_LOOP_COUNT)) {
+ spin_unlock(&iommu->lock);
+ reset_iommu_command_buffer(iommu);
+ spin_lock(&iommu->lock);
+ }
}
/*
@@ -448,37 +473,67 @@ static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid)
}
/*
+ * This function flushes one domain on one IOMMU
+ */
+static void flush_domain_on_iommu(struct amd_iommu *iommu, u16 domid)
+{
+ struct iommu_cmd cmd;
+ unsigned long flags;
+
+ __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
+ domid, 1, 1);
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ __iommu_queue_command(iommu, &cmd);
+ __iommu_completion_wait(iommu);
+ __iommu_wait_for_completion(iommu);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static void flush_all_domains_on_iommu(struct amd_iommu *iommu)
+{
+ int i;
+
+ for (i = 1; i < MAX_DOMAIN_ID; ++i) {
+ if (!test_bit(i, amd_iommu_pd_alloc_bitmap))
+ continue;
+ flush_domain_on_iommu(iommu, i);
+ }
+
+}
+
+/*
* This function is used to flush the IO/TLB for a given protection domain
* on every IOMMU in the system
*/
static void iommu_flush_domain(u16 domid)
{
- unsigned long flags;
struct amd_iommu *iommu;
- struct iommu_cmd cmd;
INC_STATS_COUNTER(domain_flush_all);
- __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
- domid, 1, 1);
-
- for_each_iommu(iommu) {
- spin_lock_irqsave(&iommu->lock, flags);
- __iommu_queue_command(iommu, &cmd);
- __iommu_completion_wait(iommu);
- __iommu_wait_for_completion(iommu);
- spin_unlock_irqrestore(&iommu->lock, flags);
- }
+ for_each_iommu(iommu)
+ flush_domain_on_iommu(iommu, domid);
}
void amd_iommu_flush_all_domains(void)
{
+ struct amd_iommu *iommu;
+
+ for_each_iommu(iommu)
+ flush_all_domains_on_iommu(iommu);
+}
+
+static void flush_all_devices_for_iommu(struct amd_iommu *iommu)
+{
int i;
- for (i = 1; i < MAX_DOMAIN_ID; ++i) {
- if (!test_bit(i, amd_iommu_pd_alloc_bitmap))
+ for (i = 0; i <= amd_iommu_last_bdf; ++i) {
+ if (iommu != amd_iommu_rlookup_table[i])
continue;
- iommu_flush_domain(i);
+
+ iommu_queue_inv_dev_entry(iommu, i);
+ iommu_completion_wait(iommu);
}
}
@@ -501,6 +556,22 @@ static void flush_devices_by_domain(struct protection_domain *domain)
}
}
+static void reset_iommu_command_buffer(struct amd_iommu *iommu)
+{
+ pr_err("AMD-Vi: Resetting IOMMU command buffer\n");
+
+ if (iommu->reset_in_progress)
+ panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n");
+
+ iommu->reset_in_progress = true;
+
+ amd_iommu_reset_cmd_buffer(iommu);
+ flush_all_devices_for_iommu(iommu);
+ flush_all_domains_on_iommu(iommu);
+
+ iommu->reset_in_progress = false;
+}
+
void amd_iommu_flush_all_devices(void)
{
flush_devices_by_domain(NULL);
@@ -1077,19 +1148,36 @@ static struct protection_domain *domain_for_device(u16 devid)
static void set_dte_entry(u16 devid, struct protection_domain *domain)
{
u64 pte_root = virt_to_phys(domain->pt_root);
- unsigned long flags;
pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
- write_lock_irqsave(&amd_iommu_devtable_lock, flags);
- amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
- amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
amd_iommu_dev_table[devid].data[2] = domain->id;
+ amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
+ amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
amd_iommu_pd_table[devid] = domain;
- write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+}
+
+/*
+ * If a device is not yet associated with a domain, this function does
+ * assigns it visible for the hardware
+ */
+static void __attach_device(struct amd_iommu *iommu,
+ struct protection_domain *domain,
+ u16 devid)
+{
+ /* lock domain */
+ spin_lock(&domain->lock);
+
+ /* update DTE entry */
+ set_dte_entry(devid, domain);
+
+ domain->dev_cnt += 1;
+
+ /* ready */
+ spin_unlock(&domain->lock);
}
/*
@@ -1100,17 +1188,17 @@ static void attach_device(struct amd_iommu *iommu,
struct protection_domain *domain,
u16 devid)
{
- /* set the DTE entry */
- set_dte_entry(devid, domain);
+ unsigned long flags;
- /* increase reference counter */
- domain->dev_cnt += 1;
+ write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+ __attach_device(iommu, domain, devid);
+ write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
- /*
- * We might boot into a crash-kernel here. The crashed kernel
- * left the caches in the IOMMU dirty. So we have to flush
- * here to evict all dirty stuff.
- */
+ /*
+ * We might boot into a crash-kernel here. The crashed kernel
+ * left the caches in the IOMMU dirty. So we have to flush
+ * here to evict all dirty stuff.
+ */
iommu_queue_inv_dev_entry(iommu, devid);
iommu_flush_tlb_pde(iommu, domain->id);
}
@@ -1137,6 +1225,15 @@ static void __detach_device(struct protection_domain *domain, u16 devid)
/* ready */
spin_unlock(&domain->lock);
+
+ /*
+ * If we run in passthrough mode the device must be assigned to the
+ * passthrough domain if it is detached from any other domain
+ */
+ if (iommu_pass_through) {
+ struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
+ __attach_device(iommu, pt_domain, devid);
+ }
}
/*
@@ -1182,6 +1279,8 @@ static int device_change_notifier(struct notifier_block *nb,
case BUS_NOTIFY_UNBOUND_DRIVER:
if (!domain)
goto out;
+ if (iommu_pass_through)
+ break;
detach_device(domain, devid);
break;
case BUS_NOTIFY_ADD_DEVICE:
@@ -1312,12 +1411,15 @@ static int get_device_resources(struct device *dev,
static void update_device_table(struct protection_domain *domain)
{
+ unsigned long flags;
int i;
for (i = 0; i <= amd_iommu_last_bdf; ++i) {
if (amd_iommu_pd_table[i] != domain)
continue;
+ write_lock_irqsave(&amd_iommu_devtable_lock, flags);
set_dte_entry(i, domain);
+ write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
}
}
@@ -2058,19 +2160,47 @@ static void cleanup_domain(struct protection_domain *domain)
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
}
-static int amd_iommu_domain_init(struct iommu_domain *dom)
+static void protection_domain_free(struct protection_domain *domain)
+{
+ if (!domain)
+ return;
+
+ if (domain->id)
+ domain_id_free(domain->id);
+
+ kfree(domain);
+}
+
+static struct protection_domain *protection_domain_alloc(void)
{
struct protection_domain *domain;
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
- return -ENOMEM;
+ return NULL;
spin_lock_init(&domain->lock);
- domain->mode = PAGE_MODE_3_LEVEL;
domain->id = domain_id_alloc();
if (!domain->id)
+ goto out_err;
+
+ return domain;
+
+out_err:
+ kfree(domain);
+
+ return NULL;
+}
+
+static int amd_iommu_domain_init(struct iommu_domain *dom)
+{
+ struct protection_domain *domain;
+
+ domain = protection_domain_alloc();
+ if (!domain)
goto out_free;
+
+ domain->mode = PAGE_MODE_3_LEVEL;
domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
if (!domain->pt_root)
goto out_free;
@@ -2080,7 +2210,7 @@ static int amd_iommu_domain_init(struct iommu_domain *dom)
return 0;
out_free:
- kfree(domain);
+ protection_domain_free(domain);
return -ENOMEM;
}
@@ -2249,3 +2379,46 @@ static struct iommu_ops amd_iommu_ops = {
.domain_has_cap = amd_iommu_domain_has_cap,
};
+/*****************************************************************************
+ *
+ * The next functions do a basic initialization of IOMMU for pass through
+ * mode
+ *
+ * In passthrough mode the IOMMU is initialized and enabled but not used for
+ * DMA-API translation.
+ *
+ *****************************************************************************/
+
+int __init amd_iommu_init_passthrough(void)
+{
+ struct pci_dev *dev = NULL;
+ u16 devid, devid2;
+
+ /* allocate passthroug domain */
+ pt_domain = protection_domain_alloc();
+ if (!pt_domain)
+ return -ENOMEM;
+
+ pt_domain->mode |= PAGE_MODE_NONE;
+
+ while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+ struct amd_iommu *iommu;
+
+ devid = calc_devid(dev->bus->number, dev->devfn);
+ if (devid > amd_iommu_last_bdf)
+ continue;
+
+ devid2 = amd_iommu_alias_table[devid];
+
+ iommu = amd_iommu_rlookup_table[devid2];
+ if (!iommu)
+ continue;
+
+ __attach_device(iommu, pt_domain, devid);
+ __attach_device(iommu, pt_domain, devid2);
+ }
+
+ pr_info("AMD-Vi: Initialized for Passthrough Mode\n");
+
+ return 0;
+}
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c1b17e97252e..b4b61d462dcc 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -252,7 +252,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
/* Function to enable the hardware */
static void iommu_enable(struct amd_iommu *iommu)
{
- printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
+ printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx\n",
dev_name(&iommu->dev->dev), iommu->cap_ptr);
iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
@@ -435,6 +435,20 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
}
/*
+ * This function resets the command buffer if the IOMMU stopped fetching
+ * commands from it.
+ */
+void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
+{
+ iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
+
+ writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
+ writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+
+ iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
+}
+
+/*
* This function writes the command buffer address to the hardware and
* enables it.
*/
@@ -450,11 +464,7 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu)
memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
&entry, sizeof(entry));
- /* set head and tail to zero manually */
- writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
- writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
-
- iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
+ amd_iommu_reset_cmd_buffer(iommu);
}
static void __init free_command_buffer(struct amd_iommu *iommu)
@@ -858,7 +868,7 @@ static int __init init_iommu_all(struct acpi_table_header *table)
switch (*p) {
case ACPI_IVHD_TYPE:
- DUMP_printk("IOMMU: device: %02x:%02x.%01x cap: %04x "
+ DUMP_printk("device: %02x:%02x.%01x cap: %04x "
"seg: %d flags: %01x info %04x\n",
PCI_BUS(h->devid), PCI_SLOT(h->devid),
PCI_FUNC(h->devid), h->cap_ptr,
@@ -902,7 +912,7 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu)
r = request_irq(iommu->dev->irq, amd_iommu_int_handler,
IRQF_SAMPLE_RANDOM,
- "AMD IOMMU",
+ "AMD-Vi",
NULL);
if (r) {
@@ -1150,7 +1160,7 @@ int __init amd_iommu_init(void)
if (no_iommu) {
- printk(KERN_INFO "AMD IOMMU disabled by kernel command line\n");
+ printk(KERN_INFO "AMD-Vi disabled by kernel command line\n");
return 0;
}
@@ -1242,22 +1252,28 @@ int __init amd_iommu_init(void)
if (ret)
goto free;
- ret = amd_iommu_init_dma_ops();
+ if (iommu_pass_through)
+ ret = amd_iommu_init_passthrough();
+ else
+ ret = amd_iommu_init_dma_ops();
if (ret)
goto free;
enable_iommus();
- printk(KERN_INFO "AMD IOMMU: device isolation ");
+ if (iommu_pass_through)
+ goto out;
+
+ printk(KERN_INFO "AMD-Vi: device isolation ");
if (amd_iommu_isolate)
printk("enabled\n");
else
printk("disabled\n");
if (amd_iommu_unmap_flush)
- printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n");
+ printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n");
else
- printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n");
+ printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n");
out:
return ret;
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 1a041bcf506b..873aa079d166 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -32,7 +32,14 @@ int no_iommu __read_mostly;
/* Set this to 1 if there is a HW IOMMU in the system */
int iommu_detected __read_mostly = 0;
-int iommu_pass_through;
+/*
+ * This variable becomes 1 if iommu=pt is passed on the kernel command line.
+ * If this variable is 1, IOMMU implementations do no DMA ranslation for
+ * devices and allow every device to access to whole physical memory. This is
+ * useful if a user want to use an IOMMU only for KVM device assignment to
+ * guests and not for driver dma translation.
+ */
+int iommu_pass_through __read_mostly;
dma_addr_t bad_dma_address __read_mostly = 0;
EXPORT_SYMBOL(bad_dma_address);