276 files changed, 12294 insertions, 3861 deletions
diff --git a/drivers/acpi/acpica/acconfig.h b/drivers/acpi/acpica/acconfig.h
index bc533dde16c4..f895a244ca7e 100644
--- a/drivers/acpi/acpica/acconfig.h
+++ b/drivers/acpi/acpica/acconfig.h
@@ -121,7 +121,7 @@
 
 /* Maximum sleep allowed via Sleep() operator */
 
-#define ACPI_MAX_SLEEP                  20000	/* Two seconds */
+#define ACPI_MAX_SLEEP                  2000	/* Two seconds */
 
 /******************************************************************************
  *
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index c34aa51af4ee..e3f47872ec22 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -13,6 +13,7 @@ config ACPI_APEI_GHES
 	bool "APEI Generic Hardware Error Source"
 	depends on ACPI_APEI && X86
 	select ACPI_HED
+	select IRQ_WORK
 	select LLIST
 	select GENERIC_ALLOCATOR
 	help
diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c
index 8041248fce9b..61540360d5ce 100644
--- a/drivers/acpi/apei/apei-base.c
+++ b/drivers/acpi/apei/apei-base.c
@@ -618,7 +618,7 @@ int apei_osc_setup(void)
 	};
 
 	capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
-	capbuf[OSC_SUPPORT_TYPE] = 0;
+	capbuf[OSC_SUPPORT_TYPE] = 1;
 	capbuf[OSC_CONTROL_TYPE] = 0;
 
 	if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))
diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
index 2c18d584066d..b97294e2d95b 100644
--- a/drivers/base/power/clock_ops.c
+++ b/drivers/base/power/clock_ops.c
@@ -42,6 +42,22 @@ static struct pm_clk_data *__to_pcd(struct device *dev)
 }
 
 /**
+ * pm_clk_acquire - Acquire a device clock.
+ * @dev: Device whose clock is to be acquired.
+ * @ce: PM clock entry corresponding to the clock.
+ */
+static void pm_clk_acquire(struct device *dev, struct pm_clock_entry *ce)
+{
+	ce->clk = clk_get(dev, ce->con_id);
+	if (IS_ERR(ce->clk)) {
+		ce->status = PCE_STATUS_ERROR;
+	} else {
+		ce->status = PCE_STATUS_ACQUIRED;
+		dev_dbg(dev, "Clock %s managed by runtime PM.\n", ce->con_id);
+	}
+}
+
+/**
  * pm_clk_add - Start using a device clock for power management.
  * @dev: Device whose clock is going to be used for power management.
  * @con_id: Connection ID of the clock.
@@ -73,6 +89,8 @@ int pm_clk_add(struct device *dev, const char *con_id)
 		}
 	}
 
+	pm_clk_acquire(dev, ce);
+
 	spin_lock_irq(&pcd->lock);
 	list_add_tail(&ce->node, &pcd->clock_list);
 	spin_unlock_irq(&pcd->lock);
@@ -82,17 +100,12 @@ int pm_clk_add(struct device *dev, const char *con_id)
 /**
  * __pm_clk_remove - Destroy PM clock entry.
  * @ce: PM clock entry to destroy.
- *
- * This routine must be called under the spinlock protecting the PM list of
- * clocks corresponding the the @ce's device.
  */
 static void __pm_clk_remove(struct pm_clock_entry *ce)
 {
 	if (!ce)
 		return;
 
-	list_del(&ce->node);
-
 	if (ce->status < PCE_STATUS_ERROR) {
 		if (ce->status == PCE_STATUS_ENABLED)
 			clk_disable(ce->clk);
@@ -126,18 +139,22 @@ void pm_clk_remove(struct device *dev, const char *con_id)
 	spin_lock_irq(&pcd->lock);
 
 	list_for_each_entry(ce, &pcd->clock_list, node) {
-		if (!con_id && !ce->con_id) {
-			__pm_clk_remove(ce);
-			break;
-		} else if (!con_id || !ce->con_id) {
+		if (!con_id && !ce->con_id)
+			goto remove;
+		else if (!con_id || !ce->con_id)
 			continue;
-		} else if (!strcmp(con_id, ce->con_id)) {
-			__pm_clk_remove(ce);
-			break;
-		}
+		else if (!strcmp(con_id, ce->con_id))
+			goto remove;
 	}
 
 	spin_unlock_irq(&pcd->lock);
+	return;
+
+ remove:
+	list_del(&ce->node);
+	spin_unlock_irq(&pcd->lock);
+
+	__pm_clk_remove(ce);
 }
 
 /**
@@ -175,20 +192,27 @@ void pm_clk_destroy(struct device *dev)
 {
 	struct pm_clk_data *pcd = __to_pcd(dev);
 	struct pm_clock_entry *ce, *c;
+	struct list_head list;
 
 	if (!pcd)
 		return;
 
 	dev->power.subsys_data = NULL;
+	INIT_LIST_HEAD(&list);
 
 	spin_lock_irq(&pcd->lock);
 
 	list_for_each_entry_safe_reverse(ce, c, &pcd->clock_list, node)
-		__pm_clk_remove(ce);
+		list_move(&ce->node, &list);
 
 	spin_unlock_irq(&pcd->lock);
 
 	kfree(pcd);
+
+	list_for_each_entry_safe_reverse(ce, c, &list, node) {
+		list_del(&ce->node);
+		__pm_clk_remove(ce);
+	}
 }
 
 #endif /* CONFIG_PM */
@@ -196,23 +220,6 @@ void pm_clk_destroy(struct device *dev)
 #ifdef CONFIG_PM_RUNTIME
 
 /**
- * pm_clk_acquire - Acquire a device clock.
- * @dev: Device whose clock is to be acquired.
- * @con_id: Connection ID of the clock.
- */
-static void pm_clk_acquire(struct device *dev,
-				    struct pm_clock_entry *ce)
-{
-	ce->clk = clk_get(dev, ce->con_id);
-	if (IS_ERR(ce->clk)) {
-		ce->status = PCE_STATUS_ERROR;
-	} else {
-		ce->status = PCE_STATUS_ACQUIRED;
-		dev_dbg(dev, "Clock %s managed by runtime PM.\n", ce->con_id);
-	}
-}
-
-/**
  * pm_clk_suspend - Disable clocks in a device's PM clock list.
  * @dev: Device to disable the clocks for.
  */
@@ -230,9 +237,6 @@ int pm_clk_suspend(struct device *dev)
 	spin_lock_irqsave(&pcd->lock, flags);
 
 	list_for_each_entry_reverse(ce, &pcd->clock_list, node) {
-		if (ce->status == PCE_STATUS_NONE)
-			pm_clk_acquire(dev, ce);
-
 		if (ce->status < PCE_STATUS_ERROR) {
 			clk_disable(ce->clk);
 			ce->status = PCE_STATUS_ACQUIRED;
@@ -262,9 +266,6 @@ int pm_clk_resume(struct device *dev)
 	spin_lock_irqsave(&pcd->lock, flags);
 
 	list_for_each_entry(ce, &pcd->clock_list, node) {
-		if (ce->status == PCE_STATUS_NONE)
-			pm_clk_acquire(dev, ce);
-
 		if (ce->status < PCE_STATUS_ERROR) {
 			clk_enable(ce->clk);
 			ce->status = PCE_STATUS_ENABLED;
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 98de8f418676..9955a53733b2 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4250,7 +4250,7 @@ static int __init floppy_init(void)
 	use_virtual_dma = can_use_virtual_dma & 1;
 	fdc_state[0].address = FDC1;
 	if (fdc_state[0].address == -1) {
-		del_timer(&fd_timeout);
+		del_timer_sync(&fd_timeout);
 		err = -ENODEV;
 		goto out_unreg_region;
 	}
@@ -4261,7 +4261,7 @@ static int __init floppy_init(void)
 	fdc = 0;		/* reset fdc in case of unexpected interrupt */
 	err = floppy_grab_irq_and_dma();
 	if (err) {
-		del_timer(&fd_timeout);
+		del_timer_sync(&fd_timeout);
 		err = -EBUSY;
 		goto out_unreg_region;
 	}
@@ -4318,7 +4318,7 @@ static int __init floppy_init(void)
 		user_reset_fdc(-1, FD_RESET_ALWAYS, false);
 	}
 	fdc = 0;
-	del_timer(&fd_timeout);
+	del_timer_sync(&fd_timeout);
 	current_drive = 0;
 	initialized = true;
 	if (have_no_fdc) {
@@ -4368,7 +4368,7 @@ out_unreg_blkdev:
 	unregister_blkdev(FLOPPY_MAJOR, "fd");
 out_put_disk:
 	while (dr--) {
-		del_timer(&motor_off_timer[dr]);
+		del_timer_sync(&motor_off_timer[dr]);
 		if (disks[dr]->queue)
 			blk_cleanup_queue(disks[dr]->queue);
 		put_disk(disks[dr]);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 9e40b283a468..00c57c90e2d6 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -46,7 +46,7 @@
 
 #define DRV_PFX "xen-blkback:"
 #define DPRINTK(fmt, args...)				\
-	pr_debug(DRV_PFX "(%s:%d) " fmt ".\n",	\
+	pr_debug(DRV_PFX "(%s:%d) " fmt ".\n",		\
 		 __func__, __LINE__, ##args)
 
 
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 3f129b45451a..5fd2010f7d2b 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -590,7 +590,7 @@ static void frontend_changed(struct xenbus_device *dev,
 
 		/*
 		 * Enforce precondition before potential leak point.
-		 * blkif_disconnect() is idempotent.
+		 * xen_blkif_disconnect() is idempotent.
 		 */
 		xen_blkif_disconnect(be->blkif);
 
@@ -601,17 +601,17 @@ static void frontend_changed(struct xenbus_device *dev,
 		break;
 
 	case XenbusStateClosing:
-		xen_blkif_disconnect(be->blkif);
 		xenbus_switch_state(dev, XenbusStateClosing);
 		break;
 
 	case XenbusStateClosed:
+		xen_blkif_disconnect(be->blkif);
 		xenbus_switch_state(dev, XenbusStateClosed);
 		if (xenbus_dev_is_online(dev))
 			break;
 		/* fall through if not online */
 	case XenbusStateUnknown:
-		/* implies blkif_disconnect() via blkback_remove() */
+		/* implies xen_blkif_disconnect() via xen_blkbk_remove() */
 		device_unregister(&dev->dev);
 		break;
 
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 3ef476070baf..9cbac6b445e1 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -72,9 +72,15 @@ static struct usb_device_id btusb_table[] = {
 	/* Apple MacBookAir3,1, MacBookAir3,2 */
 	{ USB_DEVICE(0x05ac, 0x821b) },
 
+	/* Apple MacBookAir4,1 */
+	{ USB_DEVICE(0x05ac, 0x821f) },
+
 	/* Apple MacBookPro8,2 */
 	{ USB_DEVICE(0x05ac, 0x821a) },
 
+	/* Apple MacMini5,1 */
+	{ USB_DEVICE(0x05ac, 0x8281) },
+
 	/* AVM BlueFRITZ! USB v2.0 */
 	{ USB_DEVICE(0x057c, 0x3800) },
 
diff --git a/drivers/bluetooth/btwilink.c b/drivers/bluetooth/btwilink.c
index 65d27aff553a..04d353f58d71 100644
--- a/drivers/bluetooth/btwilink.c
+++ b/drivers/bluetooth/btwilink.c
@@ -125,6 +125,13 @@ static long st_receive(void *priv_data, struct sk_buff *skb)
 /* protocol structure registered with shared transport */
 static struct st_proto_s ti_st_proto[MAX_BT_CHNL_IDS] = {
 	{
+		.chnl_id = HCI_EVENT_PKT, /* HCI Events */
+		.hdr_len = sizeof(struct hci_event_hdr),
+		.offset_len_in_hdr = offsetof(struct hci_event_hdr, plen),
+		.len_size = 1, /* sizeof(plen) in struct hci_event_hdr */
+		.reserve = 8,
+	},
+	{
 		.chnl_id = HCI_ACLDATA_PKT, /* ACL */
 		.hdr_len = sizeof(struct hci_acl_hdr),
 		.offset_len_in_hdr = offsetof(struct hci_acl_hdr, dlen),
@@ -138,13 +145,6 @@ static struct st_proto_s ti_st_proto[MAX_BT_CHNL_IDS] = {
 		.len_size = 1, /* sizeof(dlen) in struct hci_sco_hdr */
 		.reserve = 8,
 	},
-	{
-		.chnl_id = HCI_EVENT_PKT, /* HCI Events */
-		.hdr_len = sizeof(struct hci_event_hdr),
-		.offset_len_in_hdr = offsetof(struct hci_event_hdr, plen),
-		.len_size = 1, /* sizeof(plen) in struct hci_event_hdr */
-		.reserve = 8,
-	},
 };
 
 /* Called from HCI core to initialize the device */
@@ -240,7 +240,7 @@ static int ti_st_close(struct hci_dev *hdev)
 	if (!test_and_clear_bit(HCI_RUNNING, &hdev->flags))
 		return 0;
 
-	for (i = 0; i < MAX_BT_CHNL_IDS; i++) {
+	for (i = MAX_BT_CHNL_IDS-1; i >= 0; i--) {
 		err = st_unregister(&ti_st_proto[i]);
 		if (err)
 			BT_ERR("st_unregister(%d) failed with error %d",
diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig
index f6595aba4f0f..fa567f1158c2 100644
--- a/drivers/char/tpm/Kconfig
+++ b/drivers/char/tpm/Kconfig
@@ -43,6 +43,7 @@ config TCG_NSC
 
 config TCG_ATMEL
 	tristate "Atmel TPM Interface"
+	depends on PPC64 || HAS_IOPORT
 	---help---
 	  If you have a TPM security chip from Atmel say Yes and it 
 	  will be accessible from within Linux.  To compile this driver 
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
index caf8012ef47c..9ca5c021d0b6 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -383,6 +383,9 @@ static ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf,
 	u32 count, ordinal;
 	unsigned long stop;
 
+	if (bufsiz > TPM_BUFSIZE)
+		bufsiz = TPM_BUFSIZE;
+
 	count = be32_to_cpu(*((__be32 *) (buf + 2)));
 	ordinal = be32_to_cpu(*((__be32 *) (buf + 6)));
 	if (count == 0)
@@ -1102,6 +1105,7 @@ ssize_t tpm_read(struct file *file, char __user *buf,
 {
 	struct tpm_chip *chip = file->private_data;
 	ssize_t ret_size;
+	int rc;
 
 	del_singleshot_timer_sync(&chip->user_read_timer);
 	flush_work_sync(&chip->work);
@@ -1112,8 +1116,11 @@ ssize_t tpm_read(struct file *file, char __user *buf,
 			ret_size = size;
 
 		mutex_lock(&chip->buffer_mutex);
-		if (copy_to_user(buf, chip->data_buffer, ret_size))
+		rc = copy_to_user(buf, chip->data_buffer, ret_size);
+		memset(chip->data_buffer, 0, ret_size);
+		if (rc)
 			ret_size = -EFAULT;
+
 		mutex_unlock(&chip->buffer_mutex);
 	}
 
diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c
index 82facc9104c7..4d2464871ada 100644
--- a/drivers/char/tpm/tpm_nsc.c
+++ b/drivers/char/tpm/tpm_nsc.c
@@ -396,8 +396,6 @@ static void __exit cleanup_nsc(void)
 	if (pdev) {
 		tpm_nsc_remove(&pdev->dev);
 		platform_device_unregister(pdev);
-		kfree(pdev);
-		pdev = NULL;
 	}
 
 	platform_driver_unregister(&nsc_drv);
diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c
index 7b0603eb0129..cdc02ac8f41a 100644
--- a/drivers/cpufreq/pcc-cpufreq.c
+++ b/drivers/cpufreq/pcc-cpufreq.c
@@ -261,6 +261,9 @@ static int pcc_get_offset(int cpu)
 	pr = per_cpu(processors, cpu);
 	pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
 
+	if (!pr)
+		return -ENODEV;
+
 	status = acpi_evaluate_object(pr->handle, "PCCP", NULL, &buffer);
 	if (ACPI_FAILURE(status))
 		return -ENODEV;
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 57cd3a406edf..fd7170a9ad2c 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -290,6 +290,9 @@ static const struct {
 	{PCI_VENDOR_ID_NEC, PCI_ANY_ID, PCI_ANY_ID,
 		QUIRK_CYCLE_TIMER},
 
+	{PCI_VENDOR_ID_O2, PCI_ANY_ID, PCI_ANY_ID,
+		QUIRK_NO_MSI},
+
 	{PCI_VENDOR_ID_RICOH, PCI_ANY_ID, PCI_ANY_ID,
 		QUIRK_CYCLE_TIMER},
 
diff --git a/drivers/gpio/gpio-generic.c b/drivers/gpio/gpio-generic.c
index 231714def4d2..4e24436b0f82 100644
--- a/drivers/gpio/gpio-generic.c
+++ b/drivers/gpio/gpio-generic.c
@@ -351,7 +351,7 @@ static int bgpio_setup_direction(struct bgpio_chip *bgc,
 	return 0;
 }
 
-int __devexit bgpio_remove(struct bgpio_chip *bgc)
+int bgpio_remove(struct bgpio_chip *bgc)
 {
 	int err = gpiochip_remove(&bgc->gc);
 
@@ -361,15 +361,10 @@ int __devexit bgpio_remove(struct bgpio_chip *bgc)
 }
 EXPORT_SYMBOL_GPL(bgpio_remove);
 
-int __devinit bgpio_init(struct bgpio_chip *bgc,
-			 struct device *dev,
-			 unsigned long sz,
-			 void __iomem *dat,
-			 void __iomem *set,
-			 void __iomem *clr,
-			 void __iomem *dirout,
-			 void __iomem *dirin,
-			 bool big_endian)
+int bgpio_init(struct bgpio_chip *bgc, struct device *dev,
+	       unsigned long sz, void __iomem *dat, void __iomem *set,
+	       void __iomem *clr, void __iomem *dirout, void __iomem *dirin,
+	       bool big_endian)
 {
 	int ret;
 
diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
index 0599854e2217..118ec12d2d5f 100644
--- a/drivers/gpio/gpio-omap.c
+++ b/drivers/gpio/gpio-omap.c
@@ -34,8 +34,8 @@ struct gpio_bank {
 	u16 irq;
 	u16 virtual_irq_start;
 	int method;
-#if defined(CONFIG_ARCH_OMAP16XX) || defined(CONFIG_ARCH_OMAP2PLUS)
 	u32 suspend_wakeup;
+#if defined(CONFIG_ARCH_OMAP16XX) || defined(CONFIG_ARCH_OMAP2PLUS)
 	u32 saved_wakeup;
 #endif
 	u32 non_wakeup_gpios;
diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index c43b8ff626a7..0550dcb85814 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -577,6 +577,7 @@ pca953x_get_alt_pdata(struct i2c_client *client, int *gpio_base, int *invert)
 void
 pca953x_get_alt_pdata(struct i2c_client *client, int *gpio_base, int *invert)
 {
+	*gpio_base = -1;
 }
 #endif
 
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index b493663c7ba7..785127cb281b 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -158,3 +158,7 @@ config DRM_SAVAGE
 	help
 	  Choose this option if you have a Savage3D/4/SuperSavage/Pro/Twister
 	  chipset. If M is selected the module will be called savage.
+
+source "drivers/gpu/drm/exynos/Kconfig"
+
+source "drivers/gpu/drm/vmwgfx/Kconfig"
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 89cf05a72d1c..c0496f660707 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -35,4 +35,5 @@ obj-$(CONFIG_DRM_SAVAGE)+= savage/
 obj-$(CONFIG_DRM_VMWGFX)+= vmwgfx/
 obj-$(CONFIG_DRM_VIA)	+=via/
 obj-$(CONFIG_DRM_NOUVEAU) +=nouveau/
+obj-$(CONFIG_DRM_EXYNOS) +=exynos/
 obj-y			+= i2c/
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index f88a9b2c977b..f2366440b738 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -372,11 +372,13 @@ bool drm_crtc_helper_set_mode(struct drm_crtc *crtc,
 		encoder_funcs = encoder->helper_private;
 		if (!(ret = encoder_funcs->mode_fixup(encoder, mode,
 						      adjusted_mode))) {
+			DRM_DEBUG_KMS("Encoder fixup failed\n");
 			goto done;
 		}
 	}
 
 	if (!(ret = crtc_funcs->mode_fixup(crtc, mode, adjusted_mode))) {
+		DRM_DEBUG_KMS("CRTC fixup failed\n");
 		goto done;
 	}
 	DRM_DEBUG_KMS("[CRTC:%d]\n", crtc->base.id);
diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
index 9d2668a50872..b9dc2629ea9a 100644
--- a/drivers/gpu/drm/drm_debugfs.c
+++ b/drivers/gpu/drm/drm_debugfs.c
@@ -107,11 +107,8 @@ int drm_debugfs_create_files(struct drm_info_list *files, int count,
 		ent = debugfs_create_file(files[i].name, S_IFREG | S_IRUGO,
 					  root, tmp, &drm_debugfs_fops);
 		if (!ent) {
-			char name[64];
-			strncpy(name, root->d_name.name,
-						min(root->d_name.len, 64U));
 			DRM_ERROR("Cannot create /sys/kernel/debug/dri/%s/%s\n",
-				  name, files[i].name);
+				  root->d_name.name, files[i].name);
 			kfree(tmp);
 			ret = -1;
 			goto fail;
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 93a112d45c1a..7a87e0878f30 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -438,6 +438,8 @@ long drm_ioctl(struct file *filp,
 					goto err_i1;
 				}
 			}
+			if (asize > usize)
+				memset(kdata + usize, 0, asize - usize);
 		}
 
 		if (cmd & IOC_IN) {
diff --git a/drivers/gpu/drm/drm_proc.c b/drivers/gpu/drm/drm_proc.c
index 9e5b07efebb7..0f3c4e3cafc3 100644
--- a/drivers/gpu/drm/drm_proc.c
+++ b/drivers/gpu/drm/drm_proc.c
@@ -95,7 +95,6 @@ int drm_proc_create_files(struct drm_info_list *files, int count,
 	struct drm_device *dev = minor->dev;
 	struct proc_dir_entry *ent;
 	struct drm_info_node *tmp;
-	char name[64];
 	int i, ret;
 
 	for (i = 0; i < count; i++) {
@@ -118,7 +117,7 @@ int drm_proc_create_files(struct drm_info_list *files, int count,
 				       &drm_proc_fops, tmp);
 		if (!ent) {
 			DRM_ERROR("Cannot create /proc/dri/%s/%s\n",
-				  name, files[i].name);
+				  root->name, files[i].name);
 			list_del(&tmp->list);
 			kfree(tmp);
 			ret = -1;
diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig
new file mode 100644
index 000000000000..847466aab435
--- /dev/null
+++ b/drivers/gpu/drm/exynos/Kconfig
@@ -0,0 +1,20 @@
+config DRM_EXYNOS
+	tristate "DRM Support for Samsung SoC EXYNOS Series"
+	depends on DRM && PLAT_SAMSUNG
+	default	n
+	select DRM_KMS_HELPER
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	select VT_HW_CONSOLE_BINDING if FRAMEBUFFER_CONSOLE
+	help
+	  Choose this option if you have a Samsung SoC EXYNOS chipset.
+	  If M is selected the module will be called exynosdrm.
+
+config DRM_EXYNOS_FIMD
+	tristate "Exynos DRM FIMD"
+	depends on DRM_EXYNOS
+	default n
+	help
+	  Choose this option if you want to use Exynos FIMD for DRM.
+	  If M is selected, the module will be called exynos_drm_fimd
diff --git a/drivers/gpu/drm/exynos/Makefile b/drivers/gpu/drm/exynos/Makefile
new file mode 100644
index 000000000000..0496d3ff2683
--- /dev/null
+++ b/drivers/gpu/drm/exynos/Makefile
@@ -0,0 +1,11 @@
+#
+# Makefile for the drm device driver.  This driver provides support for the
+# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
+
+ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/exynos
+exynosdrm-y := exynos_drm_drv.o exynos_drm_encoder.o exynos_drm_connector.o \
+		exynos_drm_crtc.o exynos_drm_fbdev.o exynos_drm_fb.o \
+		exynos_drm_buf.o exynos_drm_gem.o exynos_drm_core.o
+
+obj-$(CONFIG_DRM_EXYNOS) += exynosdrm.o
+obj-$(CONFIG_DRM_EXYNOS_FIMD) += exynos_drm_fimd.o
diff --git a/drivers/gpu/drm/exynos/exynos_drm_buf.c b/drivers/gpu/drm/exynos/exynos_drm_buf.c
new file mode 100644
index 000000000000..6f8afea94fc9
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_buf.c
@@ -0,0 +1,110 @@
+/* exynos_drm_buf.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Author: Inki Dae <inki.dae@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm.h"
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_buf.h"
+
+static DEFINE_MUTEX(exynos_drm_buf_lock);
+
+static int lowlevel_buffer_allocate(struct drm_device *dev,
+		struct exynos_drm_buf_entry *entry)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	entry->vaddr = dma_alloc_writecombine(dev->dev, entry->size,
+			(dma_addr_t *)&entry->paddr, GFP_KERNEL);
+	if (!entry->paddr) {
+		DRM_ERROR("failed to allocate buffer.\n");
+		return -ENOMEM;
+	}
+
+	DRM_DEBUG_KMS("allocated : vaddr(0x%x), paddr(0x%x), size(0x%x)\n",
+			(unsigned int)entry->vaddr, entry->paddr, entry->size);
+
+	return 0;
+}
+
+static void lowlevel_buffer_deallocate(struct drm_device *dev,
+		struct exynos_drm_buf_entry *entry)
+{
+	DRM_DEBUG_KMS("%s.\n", __FILE__);
+
+	if (entry->paddr && entry->vaddr && entry->size)
+		dma_free_writecombine(dev->dev, entry->size, entry->vaddr,
+				entry->paddr);
+	else
+		DRM_DEBUG_KMS("entry data is null.\n");
+}
+
+struct exynos_drm_buf_entry *exynos_drm_buf_create(struct drm_device *dev,
+		unsigned int size)
+{
+	struct exynos_drm_buf_entry *entry;
+
+	DRM_DEBUG_KMS("%s.\n", __FILE__);
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		DRM_ERROR("failed to allocate exynos_drm_buf_entry.\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	entry->size = size;
+
+	/*
+	 * allocate memory region with size and set the memory information
+	 * to vaddr and paddr of a entry object.
+	 */
+	if (lowlevel_buffer_allocate(dev, entry) < 0) {
+		kfree(entry);
+		entry = NULL;
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return entry;
+}
+
+void exynos_drm_buf_destroy(struct drm_device *dev,
+		struct exynos_drm_buf_entry *entry)
+{
+	DRM_DEBUG_KMS("%s.\n", __FILE__);
+
+	if (!entry) {
+		DRM_DEBUG_KMS("entry is null.\n");
+		return;
+	}
+
+	lowlevel_buffer_deallocate(dev, entry);
+
+	kfree(entry);
+	entry = NULL;
+}
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM Buffer Management Module");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_buf.h b/drivers/gpu/drm/exynos/exynos_drm_buf.h
new file mode 100644
index 000000000000..045d59eab01a
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_buf.h
@@ -0,0 +1,53 @@
+/* exynos_drm_buf.h
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Author: Inki Dae <inki.dae@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_BUF_H_
+#define _EXYNOS_DRM_BUF_H_
+
+/*
+ * exynos drm buffer entry structure.
+ *
+ * @paddr: physical address of allocated memory.
+ * @vaddr: kernel virtual address of allocated memory.
+ * @size: size of allocated memory.
+ */
+struct exynos_drm_buf_entry {
+	dma_addr_t paddr;
+	void __iomem *vaddr;
+	unsigned int size;
+};
+
+/* allocate physical memory. */
+struct exynos_drm_buf_entry *exynos_drm_buf_create(struct drm_device *dev,
+		unsigned int size);
+
+/* get physical memory information of a drm framebuffer. */
+struct exynos_drm_buf_entry *exynos_drm_fb_get_buf(struct drm_framebuffer *fb);
+
+/* remove allocated physical memory. */
+void exynos_drm_buf_destroy(struct drm_device *dev,
+		struct exynos_drm_buf_entry *entry);
+
+#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_connector.c b/drivers/gpu/drm/exynos/exynos_drm_connector.c
new file mode 100644
index 000000000000..985d9e768728
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_connector.c
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm_crtc_helper.h"
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_encoder.h"
+
+#define MAX_EDID 256
+#define to_exynos_connector(x)	container_of(x, struct exynos_drm_connector,\
+				drm_connector)
+
+struct exynos_drm_connector {
+	struct drm_connector	drm_connector;
+};
+
+/* convert exynos_video_timings to drm_display_mode */
+static inline void
+convert_to_display_mode(struct drm_display_mode *mode,
+			struct fb_videomode *timing)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	mode->clock = timing->pixclock / 1000;
+
+	mode->hdisplay = timing->xres;
+	mode->hsync_start = mode->hdisplay + timing->left_margin;
+	mode->hsync_end = mode->hsync_start + timing->hsync_len;
+	mode->htotal = mode->hsync_end + timing->right_margin;
+
+	mode->vdisplay = timing->yres;
+	mode->vsync_start = mode->vdisplay + timing->upper_margin;
+	mode->vsync_end = mode->vsync_start + timing->vsync_len;
+	mode->vtotal = mode->vsync_end + timing->lower_margin;
+}
+
+/* convert drm_display_mode to exynos_video_timings */
+static inline void
+convert_to_video_timing(struct fb_videomode *timing,
+			struct drm_display_mode *mode)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	memset(timing, 0, sizeof(*timing));
+
+	timing->pixclock = mode->clock * 1000;
+	timing->refresh = mode->vrefresh;
+
+	timing->xres = mode->hdisplay;
+	timing->left_margin = mode->hsync_start - mode->hdisplay;
+	timing->hsync_len = mode->hsync_end - mode->hsync_start;
+	timing->right_margin = mode->htotal - mode->hsync_end;
+
+	timing->yres = mode->vdisplay;
+	timing->upper_margin = mode->vsync_start - mode->vdisplay;
+	timing->vsync_len = mode->vsync_end - mode->vsync_start;
+	timing->lower_margin = mode->vtotal - mode->vsync_end;
+
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+		timing->vmode = FB_VMODE_INTERLACED;
+	else
+		timing->vmode = FB_VMODE_NONINTERLACED;
+
+	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		timing->vmode |= FB_VMODE_DOUBLE;
+}
+
+static int exynos_drm_connector_get_modes(struct drm_connector *connector)
+{
+	struct exynos_drm_manager *manager =
+				exynos_drm_get_manager(connector->encoder);
+	struct exynos_drm_display *display = manager->display;
+	unsigned int count;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (!display) {
+		DRM_DEBUG_KMS("display is null.\n");
+		return 0;
+	}
+
+	/*
+	 * if get_edid() exists then get_edid() callback of hdmi side
+	 * is called to get edid data through i2c interface else
+	 * get timing from the FIMD driver(display controller).
+	 *
+	 * P.S. in case of lcd panel, count is always 1 if success
+	 * because lcd panel has only one mode.
+	 */
+	if (display->get_edid) {
+		int ret;
+		void *edid;
+
+		edid = kzalloc(MAX_EDID, GFP_KERNEL);
+		if (!edid) {
+			DRM_ERROR("failed to allocate edid\n");
+			return 0;
+		}
+
+		ret = display->get_edid(manager->dev, connector,
+						edid, MAX_EDID);
+		if (ret < 0) {
+			DRM_ERROR("failed to get edid data.\n");
+			kfree(edid);
+			edid = NULL;
+			return 0;
+		}
+
+		drm_mode_connector_update_edid_property(connector, edid);
+		count = drm_add_edid_modes(connector, edid);
+
+		kfree(connector->display_info.raw_edid);
+		connector->display_info.raw_edid = edid;
+	} else {
+		struct drm_display_mode *mode = drm_mode_create(connector->dev);
+		struct fb_videomode *timing;
+
+		if (display->get_timing)
+			timing = display->get_timing(manager->dev);
+		else {
+			drm_mode_destroy(connector->dev, mode);
+			return 0;
+		}
+
+		convert_to_display_mode(mode, timing);
+
+		mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
+		drm_mode_set_name(mode);
+		drm_mode_probed_add(connector, mode);
+
+		count = 1;
+	}
+
+	return count;
+}
+
+static int exynos_drm_connector_mode_valid(struct drm_connector *connector,
+					    struct drm_display_mode *mode)
+{
+	struct exynos_drm_manager *manager =
+				exynos_drm_get_manager(connector->encoder);
+	struct exynos_drm_display *display = manager->display;
+	struct fb_videomode timing;
+	int ret = MODE_BAD;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	convert_to_video_timing(&timing, mode);
+
+	if (display && display->check_timing)
+		if (!display->check_timing(manager->dev, (void *)&timing))
+			ret = MODE_OK;
+
+	return ret;
+}
+
+struct drm_encoder *exynos_drm_best_encoder(struct drm_connector *connector)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	return connector->encoder;
+}
+
+static struct drm_connector_helper_funcs exynos_connector_helper_funcs = {
+	.get_modes	= exynos_drm_connector_get_modes,
+	.mode_valid	= exynos_drm_connector_mode_valid,
+	.best_encoder	= exynos_drm_best_encoder,
+};
+
+/* get detection status of display device. */
+static enum drm_connector_status
+exynos_drm_connector_detect(struct drm_connector *connector, bool force)
+{
+	struct exynos_drm_manager *manager =
+				exynos_drm_get_manager(connector->encoder);
+	struct exynos_drm_display *display = manager->display;
+	enum drm_connector_status status = connector_status_disconnected;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (display && display->is_connected) {
+		if (display->is_connected(manager->dev))
+			status = connector_status_connected;
+		else
+			status = connector_status_disconnected;
+	}
+
+	return status;
+}
+
+static void exynos_drm_connector_destroy(struct drm_connector *connector)
+{
+	struct exynos_drm_connector *exynos_connector =
+		to_exynos_connector(connector);
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	drm_sysfs_connector_remove(connector);
+	drm_connector_cleanup(connector);
+	kfree(exynos_connector);
+}
+
+static struct drm_connector_funcs exynos_connector_funcs = {
+	.dpms		= drm_helper_connector_dpms,
+	.fill_modes	= drm_helper_probe_single_connector_modes,
+	.detect		= exynos_drm_connector_detect,
+	.destroy	= exynos_drm_connector_destroy,
+};
+
+struct drm_connector *exynos_drm_connector_create(struct drm_device *dev,
+						   struct drm_encoder *encoder)
+{
+	struct exynos_drm_connector *exynos_connector;
+	struct exynos_drm_manager *manager = exynos_drm_get_manager(encoder);
+	struct drm_connector *connector;
+	int type;
+	int err;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	exynos_connector = kzalloc(sizeof(*exynos_connector), GFP_KERNEL);
+	if (!exynos_connector) {
+		DRM_ERROR("failed to allocate connector\n");
+		return NULL;
+	}
+
+	connector = &exynos_connector->drm_connector;
+
+	switch (manager->display->type) {
+	case EXYNOS_DISPLAY_TYPE_HDMI:
+		type = DRM_MODE_CONNECTOR_HDMIA;
+		break;
+	default:
+		type = DRM_MODE_CONNECTOR_Unknown;
+		break;
+	}
+
+	drm_connector_init(dev, connector, &exynos_connector_funcs, type);
+	drm_connector_helper_add(connector, &exynos_connector_helper_funcs);
+
+	err = drm_sysfs_connector_add(connector);
+	if (err)
+		goto err_connector;
+
+	connector->encoder = encoder;
+	err = drm_mode_connector_attach_encoder(connector, encoder);
+	if (err) {
+		DRM_ERROR("failed to attach a connector to a encoder\n");
+		goto err_sysfs;
+	}
+
+	DRM_DEBUG_KMS("connector has been created\n");
+
+	return connector;
+
+err_sysfs:
+	drm_sysfs_connector_remove(connector);
+err_connector:
+	drm_connector_cleanup(connector);
+	kfree(exynos_connector);
+	return NULL;
+}
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("Seung-Woo Kim <sw0312.kim@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM Connector Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_connector.h b/drivers/gpu/drm/exynos/exynos_drm_connector.h
new file mode 100644
index 000000000000..1c7b2b5b579c
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_connector.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_CONNECTOR_H_
+#define _EXYNOS_DRM_CONNECTOR_H_
+
+struct drm_connector *exynos_drm_connector_create(struct drm_device *dev,
+						   struct drm_encoder *encoder);
+
+#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_core.c b/drivers/gpu/drm/exynos/exynos_drm_core.c
new file mode 100644
index 000000000000..661a03571d0c
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_core.c
@@ -0,0 +1,272 @@
+/* exynos_drm_core.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Author:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "exynos_drm_drv.h"
+#include "exynos_drm_encoder.h"
+#include "exynos_drm_connector.h"
+#include "exynos_drm_fbdev.h"
+
+static DEFINE_MUTEX(exynos_drm_mutex);
+static LIST_HEAD(exynos_drm_subdrv_list);
+static struct drm_device *drm_dev;
+
+static int exynos_drm_subdrv_probe(struct drm_device *dev,
+					struct exynos_drm_subdrv *subdrv)
+{
+	struct drm_encoder *encoder;
+	struct drm_connector *connector;
+
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	if (subdrv->probe) {
+		int ret;
+
+		/*
+		 * this probe callback would be called by sub driver
+		 * after setting of all resources to this sub driver,
+		 * such as clock, irq and register map are done or by load()
+		 * of exynos drm driver.
+		 *
+		 * P.S. note that this driver is considered for modularization.
+		 */
+		ret = subdrv->probe(dev, subdrv->manager.dev);
+		if (ret)
+			return ret;
+	}
+
+	/* create and initialize a encoder for this sub driver. */
+	encoder = exynos_drm_encoder_create(dev, &subdrv->manager,
+			(1 << MAX_CRTC) - 1);
+	if (!encoder) {
+		DRM_ERROR("failed to create encoder\n");
+		return -EFAULT;
+	}
+
+	/*
+	 * create and initialize a connector for this sub driver and
+	 * attach the encoder created above to the connector.
+	 */
+	connector = exynos_drm_connector_create(dev, encoder);
+	if (!connector) {
+		DRM_ERROR("failed to create connector\n");
+		encoder->funcs->destroy(encoder);
+		return -EFAULT;
+	}
+
+	subdrv->encoder = encoder;
+	subdrv->connector = connector;
+
+	return 0;
+}
+
+static void exynos_drm_subdrv_remove(struct drm_device *dev,
+				      struct exynos_drm_subdrv *subdrv)
+{
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	if (subdrv->remove)
+		subdrv->remove(dev);
+
+	if (subdrv->encoder) {
+		struct drm_encoder *encoder = subdrv->encoder;
+		encoder->funcs->destroy(encoder);
+		subdrv->encoder = NULL;
+	}
+
+	if (subdrv->connector) {
+		struct drm_connector *connector = subdrv->connector;
+		connector->funcs->destroy(connector);
+		subdrv->connector = NULL;
+	}
+}
+
+int exynos_drm_device_register(struct drm_device *dev)
+{
+	struct exynos_drm_subdrv *subdrv, *n;
+	int err;
+
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	if (!dev)
+		return -EINVAL;
+
+	if (drm_dev) {
+		DRM_ERROR("Already drm device were registered\n");
+		return -EBUSY;
+	}
+
+	mutex_lock(&exynos_drm_mutex);
+	list_for_each_entry_safe(subdrv, n, &exynos_drm_subdrv_list, list) {
+		err = exynos_drm_subdrv_probe(dev, subdrv);
+		if (err) {
+			DRM_DEBUG("exynos drm subdrv probe failed.\n");
+			list_del(&subdrv->list);
+		}
+	}
+
+	drm_dev = dev;
+	mutex_unlock(&exynos_drm_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(exynos_drm_device_register);
+
+int exynos_drm_device_unregister(struct drm_device *dev)
+{
+	struct exynos_drm_subdrv *subdrv;
+
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	if (!dev || dev != drm_dev) {
+		WARN(1, "Unexpected drm device unregister!\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&exynos_drm_mutex);
+	list_for_each_entry(subdrv, &exynos_drm_subdrv_list, list)
+		exynos_drm_subdrv_remove(dev, subdrv);
+
+	drm_dev = NULL;
+	mutex_unlock(&exynos_drm_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(exynos_drm_device_unregister);
+
+static int exynos_drm_mode_group_reinit(struct drm_device *dev)
+{
+	struct drm_mode_group *group = &dev->primary->mode_group;
+	uint32_t *id_list = group->id_list;
+	int ret;
+
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	ret = drm_mode_group_init_legacy_group(dev, group);
+	if (ret < 0)
+		return ret;
+
+	kfree(id_list);
+	return 0;
+}
+
+int exynos_drm_subdrv_register(struct exynos_drm_subdrv *subdrv)
+{
+	int err;
+
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	if (!subdrv)
+		return -EINVAL;
+
+	mutex_lock(&exynos_drm_mutex);
+	if (drm_dev) {
+		err = exynos_drm_subdrv_probe(drm_dev, subdrv);
+		if (err) {
+			DRM_ERROR("failed to probe exynos drm subdrv\n");
+			mutex_unlock(&exynos_drm_mutex);
+			return err;
+		}
+
+		/*
+		 * if any specific driver such as fimd or hdmi driver called
+		 * exynos_drm_subdrv_register() later than drm_load(),
+		 * the fb helper should be re-initialized and re-configured.
+		 */
+		err = exynos_drm_fbdev_reinit(drm_dev);
+		if (err) {
+			DRM_ERROR("failed to reinitialize exynos drm fbdev\n");
+			exynos_drm_subdrv_remove(drm_dev, subdrv);
+			mutex_unlock(&exynos_drm_mutex);
+			return err;
+		}
+
+		err = exynos_drm_mode_group_reinit(drm_dev);
+		if (err) {
+			DRM_ERROR("failed to reinitialize mode group\n");
+			exynos_drm_fbdev_fini(drm_dev);
+			exynos_drm_subdrv_remove(drm_dev, subdrv);
+			mutex_unlock(&exynos_drm_mutex);
+			return err;
+		}
+	}
+
+	subdrv->drm_dev = drm_dev;
+
+	list_add_tail(&subdrv->list, &exynos_drm_subdrv_list);
+	mutex_unlock(&exynos_drm_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(exynos_drm_subdrv_register);
+
+int exynos_drm_subdrv_unregister(struct exynos_drm_subdrv *subdrv)
+{
+	int ret = -EFAULT;
+
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	if (!subdrv) {
+		DRM_DEBUG("Unexpected exynos drm subdrv unregister!\n");
+		return ret;
+	}
+
+	mutex_lock(&exynos_drm_mutex);
+	if (drm_dev) {
+		exynos_drm_subdrv_remove(drm_dev, subdrv);
+		list_del(&subdrv->list);
+
+		/*
+		 * fb helper should be updated once a sub driver is released
+		 * to re-configure crtc and connector and also to re-setup
+		 * drm framebuffer.
+		 */
+		ret = exynos_drm_fbdev_reinit(drm_dev);
+		if (ret < 0) {
+			DRM_ERROR("failed fb helper reinit.\n");
+			goto fail;
+		}
+
+		ret = exynos_drm_mode_group_reinit(drm_dev);
+		if (ret < 0) {
+			DRM_ERROR("failed drm mode group reinit.\n");
+			goto fail;
+		}
+	}
+
+fail:
+	mutex_unlock(&exynos_drm_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(exynos_drm_subdrv_unregister);
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("Seung-Woo Kim <sw0312.kim@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM Core Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
new file mode 100644
index 000000000000..9337e5e2dbb6
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
@@ -0,0 +1,381 @@
+/* exynos_drm_crtc.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm_crtc_helper.h"
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_fb.h"
+#include "exynos_drm_encoder.h"
+#include "exynos_drm_buf.h"
+
+#define to_exynos_crtc(x)	container_of(x, struct exynos_drm_crtc,\
+				drm_crtc)
+
+/*
+ * Exynos specific crtc postion structure.
+ *
+ * @fb_x: offset x on a framebuffer to be displyed
+ *	- the unit is screen coordinates.
+ * @fb_y: offset y on a framebuffer to be displayed
+ *	- the unit is screen coordinates.
+ * @crtc_x: offset x on hardware screen.
+ * @crtc_y: offset y on hardware screen.
+ * @crtc_w: width of hardware screen.
+ * @crtc_h: height of hardware screen.
+ */
+struct exynos_drm_crtc_pos {
+	unsigned int fb_x;
+	unsigned int fb_y;
+	unsigned int crtc_x;
+	unsigned int crtc_y;
+	unsigned int crtc_w;
+	unsigned int crtc_h;
+};
+
+/*
+ * Exynos specific crtc structure.
+ *
+ * @drm_crtc: crtc object.
+ * @overlay: contain information common to display controller and hdmi and
+ *	contents of this overlay object would be copied to sub driver size.
+ * @pipe: a crtc index created at load() with a new crtc object creation
+ *	and the crtc object would be set to private->crtc array
+ *	to get a crtc object corresponding to this pipe from private->crtc
+ *	array when irq interrupt occured. the reason of using this pipe is that
+ *	drm framework doesn't support multiple irq yet.
+ *	we can refer to the crtc to current hardware interrupt occured through
+ *	this pipe value.
+ */
+struct exynos_drm_crtc {
+	struct drm_crtc			drm_crtc;
+	struct exynos_drm_overlay	overlay;
+	unsigned int			pipe;
+};
+
+static void exynos_drm_crtc_apply(struct drm_crtc *crtc)
+{
+	struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc);
+	struct exynos_drm_overlay *overlay = &exynos_crtc->overlay;
+
+	exynos_drm_fn_encoder(crtc, overlay,
+			exynos_drm_encoder_crtc_mode_set);
+	exynos_drm_fn_encoder(crtc, NULL, exynos_drm_encoder_crtc_commit);
+}
+
+static int exynos_drm_overlay_update(struct exynos_drm_overlay *overlay,
+				       struct drm_framebuffer *fb,
+				       struct drm_display_mode *mode,
+				       struct exynos_drm_crtc_pos *pos)
+{
+	struct exynos_drm_buf_entry *entry;
+	unsigned int actual_w;
+	unsigned int actual_h;
+
+	entry = exynos_drm_fb_get_buf(fb);
+	if (!entry) {
+		DRM_LOG_KMS("entry is null.\n");
+		return -EFAULT;
+	}
+
+	overlay->paddr = entry->paddr;
+	overlay->vaddr = entry->vaddr;
+
+	DRM_DEBUG_KMS("vaddr = 0x%lx, paddr = 0x%lx\n",
+			(unsigned long)overlay->vaddr,
+			(unsigned long)overlay->paddr);
+
+	actual_w = min((mode->hdisplay - pos->crtc_x), pos->crtc_w);
+	actual_h = min((mode->vdisplay - pos->crtc_y), pos->crtc_h);
+
+	/* set drm framebuffer data. */
+	overlay->fb_x = pos->fb_x;
+	overlay->fb_y = pos->fb_y;
+	overlay->fb_width = fb->width;
+	overlay->fb_height = fb->height;
+	overlay->bpp = fb->bits_per_pixel;
+	overlay->pitch = fb->pitch;
+
+	/* set overlay range to be displayed. */
+	overlay->crtc_x = pos->crtc_x;
+	overlay->crtc_y = pos->crtc_y;
+	overlay->crtc_width = actual_w;
+	overlay->crtc_height = actual_h;
+
+	/* set drm mode data. */
+	overlay->mode_width = mode->hdisplay;
+	overlay->mode_height = mode->vdisplay;
+	overlay->refresh = mode->vrefresh;
+	overlay->scan_flag = mode->flags;
+
+	DRM_DEBUG_KMS("overlay : offset_x/y(%d,%d), width/height(%d,%d)",
+			overlay->crtc_x, overlay->crtc_y,
+			overlay->crtc_width, overlay->crtc_height);
+
+	return 0;
+}
+
+static int exynos_drm_crtc_update(struct drm_crtc *crtc)
+{
+	struct exynos_drm_crtc *exynos_crtc;
+	struct exynos_drm_overlay *overlay;
+	struct exynos_drm_crtc_pos pos;
+	struct drm_display_mode *mode = &crtc->mode;
+	struct drm_framebuffer *fb = crtc->fb;
+
+	if (!mode || !fb)
+		return -EINVAL;
+
+	exynos_crtc = to_exynos_crtc(crtc);
+	overlay = &exynos_crtc->overlay;
+
+	memset(&pos, 0, sizeof(struct exynos_drm_crtc_pos));
+
+	/* it means the offset of framebuffer to be displayed. */
+	pos.fb_x = crtc->x;
+	pos.fb_y = crtc->y;
+
+	/* OSD position to be displayed. */
+	pos.crtc_x = 0;
+	pos.crtc_y = 0;
+	pos.crtc_w = fb->width - crtc->x;
+	pos.crtc_h = fb->height - crtc->y;
+
+	return exynos_drm_overlay_update(overlay, crtc->fb, mode, &pos);
+}
+
+static void exynos_drm_crtc_dpms(struct drm_crtc *crtc, int mode)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* TODO */
+}
+
+static void exynos_drm_crtc_prepare(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* drm framework doesn't check NULL. */
+}
+
+static void exynos_drm_crtc_commit(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* drm framework doesn't check NULL. */
+}
+
+static bool
+exynos_drm_crtc_mode_fixup(struct drm_crtc *crtc,
+			    struct drm_display_mode *mode,
+			    struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* drm framework doesn't check NULL */
+	return true;
+}
+
+static int
+exynos_drm_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode,
+			  struct drm_display_mode *adjusted_mode, int x, int y,
+			  struct drm_framebuffer *old_fb)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	mode = adjusted_mode;
+
+	return exynos_drm_crtc_update(crtc);
+}
+
+static int exynos_drm_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
+					  struct drm_framebuffer *old_fb)
+{
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	ret = exynos_drm_crtc_update(crtc);
+	if (ret)
+		return ret;
+
+	exynos_drm_crtc_apply(crtc);
+
+	return ret;
+}
+
+static void exynos_drm_crtc_load_lut(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+	/* drm framework doesn't check NULL */
+}
+
+static struct drm_crtc_helper_funcs exynos_crtc_helper_funcs = {
+	.dpms		= exynos_drm_crtc_dpms,
+	.prepare	= exynos_drm_crtc_prepare,
+	.commit		= exynos_drm_crtc_commit,
+	.mode_fixup	= exynos_drm_crtc_mode_fixup,
+	.mode_set	= exynos_drm_crtc_mode_set,
+	.mode_set_base	= exynos_drm_crtc_mode_set_base,
+	.load_lut	= exynos_drm_crtc_load_lut,
+};
+
+static int exynos_drm_crtc_page_flip(struct drm_crtc *crtc,
+				      struct drm_framebuffer *fb,
+				      struct drm_pending_vblank_event *event)
+{
+	struct drm_device *dev = crtc->dev;
+	struct exynos_drm_private *dev_priv = dev->dev_private;
+	struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc);
+	struct drm_framebuffer *old_fb = crtc->fb;
+	int ret = -EINVAL;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	mutex_lock(&dev->struct_mutex);
+
+	if (event) {
+		/*
+		 * the pipe from user always is 0 so we can set pipe number
+		 * of current owner to event.
+		 */
+		event->pipe = exynos_crtc->pipe;
+
+		list_add_tail(&event->base.link,
+				&dev_priv->pageflip_event_list);
+
+		ret = drm_vblank_get(dev, exynos_crtc->pipe);
+		if (ret) {
+			DRM_DEBUG("failed to acquire vblank counter\n");
+			list_del(&event->base.link);
+
+			goto out;
+		}
+
+		crtc->fb = fb;
+		ret = exynos_drm_crtc_update(crtc);
+		if (ret) {
+			crtc->fb = old_fb;
+			drm_vblank_put(dev, exynos_crtc->pipe);
+			list_del(&event->base.link);
+
+			goto out;
+		}
+
+		/*
+		 * the values related to a buffer of the drm framebuffer
+		 * to be applied should be set at here. because these values
+		 * first, are set to shadow registers and then to
+		 * real registers at vsync front porch period.
+		 */
+		exynos_drm_crtc_apply(crtc);
+	}
+out:
+	mutex_unlock(&dev->struct_mutex);
+	return ret;
+}
+
+static void exynos_drm_crtc_destroy(struct drm_crtc *crtc)
+{
+	struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc);
+	struct exynos_drm_private *private = crtc->dev->dev_private;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	private->crtc[exynos_crtc->pipe] = NULL;
+
+	drm_crtc_cleanup(crtc);
+	kfree(exynos_crtc);
+}
+
+static struct drm_crtc_funcs exynos_crtc_funcs = {
+	.set_config	= drm_crtc_helper_set_config,
+	.page_flip	= exynos_drm_crtc_page_flip,
+	.destroy	= exynos_drm_crtc_destroy,
+};
+
+struct exynos_drm_overlay *get_exynos_drm_overlay(struct drm_device *dev,
+		struct drm_crtc *crtc)
+{
+	struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc);
+
+	return &exynos_crtc->overlay;
+}
+
+int exynos_drm_crtc_create(struct drm_device *dev, unsigned int nr)
+{
+	struct exynos_drm_crtc *exynos_crtc;
+	struct exynos_drm_private *private = dev->dev_private;
+	struct drm_crtc *crtc;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	exynos_crtc = kzalloc(sizeof(*exynos_crtc), GFP_KERNEL);
+	if (!exynos_crtc) {
+		DRM_ERROR("failed to allocate exynos crtc\n");
+		return -ENOMEM;
+	}
+
+	exynos_crtc->pipe = nr;
+	crtc = &exynos_crtc->drm_crtc;
+
+	private->crtc[nr] = crtc;
+
+	drm_crtc_init(dev, crtc, &exynos_crtc_funcs);
+	drm_crtc_helper_add(crtc, &exynos_crtc_helper_funcs);
+
+	return 0;
+}
+
+int exynos_drm_crtc_enable_vblank(struct drm_device *dev, int crtc)
+{
+	struct exynos_drm_private *private = dev->dev_private;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	exynos_drm_fn_encoder(private->crtc[crtc], &crtc,
+			exynos_drm_enable_vblank);
+
+	return 0;
+}
+
+void exynos_drm_crtc_disable_vblank(struct drm_device *dev, int crtc)
+{
+	struct exynos_drm_private *private = dev->dev_private;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	exynos_drm_fn_encoder(private->crtc[crtc], &crtc,
+			exynos_drm_disable_vblank);
+}
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("Seung-Woo Kim <sw0312.kim@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM CRTC Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.h b/drivers/gpu/drm/exynos/exynos_drm_crtc.h
new file mode 100644
index 000000000000..c584042d6d2c
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.h
@@ -0,0 +1,38 @@
+/* exynos_drm_crtc.h
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_CRTC_H_
+#define _EXYNOS_DRM_CRTC_H_
+
+struct exynos_drm_overlay *get_exynos_drm_overlay(struct drm_device *dev,
+		struct drm_crtc *crtc);
+int exynos_drm_crtc_create(struct drm_device *dev, unsigned int nr);
+int exynos_drm_crtc_enable_vblank(struct drm_device *dev, int crtc);
+void exynos_drm_crtc_disable_vblank(struct drm_device *dev, int crtc);
+
+#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
new file mode 100644
index 000000000000..83810cbe3c17
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm.h"
+
+#include <drm/exynos_drm.h>
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_crtc.h"
+#include "exynos_drm_fbdev.h"
+#include "exynos_drm_fb.h"
+#include "exynos_drm_gem.h"
+
+#define DRIVER_NAME	"exynos-drm"
+#define DRIVER_DESC	"Samsung SoC DRM"
+#define DRIVER_DATE	"20110530"
+#define DRIVER_MAJOR	1
+#define DRIVER_MINOR	0
+
+static int exynos_drm_load(struct drm_device *dev, unsigned long flags)
+{
+	struct exynos_drm_private *private;
+	int ret;
+	int nr;
+
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	private = kzalloc(sizeof(struct exynos_drm_private), GFP_KERNEL);
+	if (!private) {
+		DRM_ERROR("failed to allocate private\n");
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&private->pageflip_event_list);
+	dev->dev_private = (void *)private;
+
+	drm_mode_config_init(dev);
+
+	exynos_drm_mode_config_init(dev);
+
+	/*
+	 * EXYNOS4 is enough to have two CRTCs and each crtc would be used
+	 * without dependency of hardware.
+	 */
+	for (nr = 0; nr < MAX_CRTC; nr++) {
+		ret = exynos_drm_crtc_create(dev, nr);
+		if (ret)
+			goto err_crtc;
+	}
+
+	ret = drm_vblank_init(dev, MAX_CRTC);
+	if (ret)
+		goto err_crtc;
+
+	/*
+	 * probe sub drivers such as display controller and hdmi driver,
+	 * that were registered at probe() of platform driver
+	 * to the sub driver and create encoder and connector for them.
+	 */
+	ret = exynos_drm_device_register(dev);
+	if (ret)
+		goto err_vblank;
+
+	/*
+	 * create and configure fb helper and also exynos specific
+	 * fbdev object.
+	 */
+	ret = exynos_drm_fbdev_init(dev);
+	if (ret) {
+		DRM_ERROR("failed to initialize drm fbdev\n");
+		goto err_drm_device;
+	}
+
+	return 0;
+
+err_drm_device:
+	exynos_drm_device_unregister(dev);
+err_vblank:
+	drm_vblank_cleanup(dev);
+err_crtc:
+	drm_mode_config_cleanup(dev);
+	kfree(private);
+
+	return ret;
+}
+
+static int exynos_drm_unload(struct drm_device *dev)
+{
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	exynos_drm_fbdev_fini(dev);
+	exynos_drm_device_unregister(dev);
+	drm_vblank_cleanup(dev);
+	drm_mode_config_cleanup(dev);
+	kfree(dev->dev_private);
+
+	dev->dev_private = NULL;
+
+	return 0;
+}
+
+static void exynos_drm_preclose(struct drm_device *dev,
+					struct drm_file *file_priv)
+{
+	struct exynos_drm_private *dev_priv = dev->dev_private;
+
+	/*
+	 * drm framework frees all events at release time,
+	 * so private event list should be cleared.
+	 */
+	if (!list_empty(&dev_priv->pageflip_event_list))
+		INIT_LIST_HEAD(&dev_priv->pageflip_event_list);
+}
+
+static void exynos_drm_lastclose(struct drm_device *dev)
+{
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	exynos_drm_fbdev_restore_mode(dev);
+}
+
+static struct vm_operations_struct exynos_drm_gem_vm_ops = {
+	.fault = exynos_drm_gem_fault,
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+};
+
+static struct drm_ioctl_desc exynos_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(EXYNOS_GEM_CREATE, exynos_drm_gem_create_ioctl,
+			DRM_UNLOCKED | DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(EXYNOS_GEM_MAP_OFFSET,
+			exynos_drm_gem_map_offset_ioctl, DRM_UNLOCKED |
+			DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(EXYNOS_GEM_MMAP,
+			exynos_drm_gem_mmap_ioctl, DRM_UNLOCKED | DRM_AUTH),
+};
+
+static struct drm_driver exynos_drm_driver = {
+	.driver_features	= DRIVER_HAVE_IRQ | DRIVER_BUS_PLATFORM |
+				  DRIVER_MODESET | DRIVER_GEM,
+	.load			= exynos_drm_load,
+	.unload			= exynos_drm_unload,
+	.preclose		= exynos_drm_preclose,
+	.lastclose		= exynos_drm_lastclose,
+	.get_vblank_counter	= drm_vblank_count,
+	.enable_vblank		= exynos_drm_crtc_enable_vblank,
+	.disable_vblank		= exynos_drm_crtc_disable_vblank,
+	.gem_init_object	= exynos_drm_gem_init_object,
+	.gem_free_object	= exynos_drm_gem_free_object,
+	.gem_vm_ops		= &exynos_drm_gem_vm_ops,
+	.dumb_create		= exynos_drm_gem_dumb_create,
+	.dumb_map_offset	= exynos_drm_gem_dumb_map_offset,
+	.dumb_destroy		= exynos_drm_gem_dumb_destroy,
+	.ioctls			= exynos_ioctls,
+	.fops = {
+		.owner		= THIS_MODULE,
+		.open		= drm_open,
+		.mmap		= exynos_drm_gem_mmap,
+		.poll		= drm_poll,
+		.read		= drm_read,
+		.unlocked_ioctl	= drm_ioctl,
+		.release	= drm_release,
+	},
+	.name	= DRIVER_NAME,
+	.desc	= DRIVER_DESC,
+	.date	= DRIVER_DATE,
+	.major	= DRIVER_MAJOR,
+	.minor	= DRIVER_MINOR,
+};
+
+static int exynos_drm_platform_probe(struct platform_device *pdev)
+{
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	exynos_drm_driver.num_ioctls = DRM_ARRAY_SIZE(exynos_ioctls);
+
+	return drm_platform_init(&exynos_drm_driver, pdev);
+}
+
+static int exynos_drm_platform_remove(struct platform_device *pdev)
+{
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	drm_platform_exit(&exynos_drm_driver, pdev);
+
+	return 0;
+}
+
+static struct platform_driver exynos_drm_platform_driver = {
+	.probe		= exynos_drm_platform_probe,
+	.remove		= __devexit_p(exynos_drm_platform_remove),
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= DRIVER_NAME,
+	},
+};
+
+static int __init exynos_drm_init(void)
+{
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	return platform_driver_register(&exynos_drm_platform_driver);
+}
+
+static void __exit exynos_drm_exit(void)
+{
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	platform_driver_unregister(&exynos_drm_platform_driver);
+}
+
+module_init(exynos_drm_init);
+module_exit(exynos_drm_exit);
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("Seung-Woo Kim <sw0312.kim@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
new file mode 100644
index 000000000000..c03683f2ae72
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -0,0 +1,254 @@
+/* exynos_drm_drv.h
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_DRV_H_
+#define _EXYNOS_DRM_DRV_H_
+
+#include "drm.h"
+
+#define MAX_CRTC	2
+
+struct drm_device;
+struct exynos_drm_overlay;
+struct drm_connector;
+
+/* this enumerates display type. */
+enum exynos_drm_output_type {
+	EXYNOS_DISPLAY_TYPE_NONE,
+	/* RGB or CPU Interface. */
+	EXYNOS_DISPLAY_TYPE_LCD,
+	/* HDMI Interface. */
+	EXYNOS_DISPLAY_TYPE_HDMI,
+};
+
+/*
+ * Exynos drm overlay ops structure.
+ *
+ * @mode_set: copy drm overlay info to hw specific overlay info.
+ * @commit: apply hardware specific overlay data to registers.
+ * @disable: disable hardware specific overlay.
+ */
+struct exynos_drm_overlay_ops {
+	void (*mode_set)(struct device *subdrv_dev,
+			 struct exynos_drm_overlay *overlay);
+	void (*commit)(struct device *subdrv_dev);
+	void (*disable)(struct device *subdrv_dev);
+};
+
+/*
+ * Exynos drm common overlay structure.
+ *
+ * @fb_x: offset x on a framebuffer to be displayed.
+ *	- the unit is screen coordinates.
+ * @fb_y: offset y on a framebuffer to be displayed.
+ *	- the unit is screen coordinates.
+ * @fb_width: width of a framebuffer.
+ * @fb_height: height of a framebuffer.
+ * @crtc_x: offset x on hardware screen.
+ * @crtc_y: offset y on hardware screen.
+ * @crtc_width: window width to be displayed (hardware screen).
+ * @crtc_height: window height to be displayed (hardware screen).
+ * @mode_width: width of screen mode.
+ * @mode_height: height of screen mode.
+ * @refresh: refresh rate.
+ * @scan_flag: interlace or progressive way.
+ *	(it could be DRM_MODE_FLAG_*)
+ * @bpp: pixel size.(in bit)
+ * @paddr: bus(accessed by dma) physical memory address to this overlay
+ *		and this is physically continuous.
+ * @vaddr: virtual memory addresss to this overlay.
+ * @default_win: a window to be enabled.
+ * @color_key: color key on or off.
+ * @index_color: if using color key feature then this value would be used
+ *			as index color.
+ * @local_path: in case of lcd type, local path mode on or off.
+ * @transparency: transparency on or off.
+ * @activated: activated or not.
+ *
+ * this structure is common to exynos SoC and its contents would be copied
+ * to hardware specific overlay info.
+ */
+struct exynos_drm_overlay {
+	unsigned int fb_x;
+	unsigned int fb_y;
+	unsigned int fb_width;
+	unsigned int fb_height;
+	unsigned int crtc_x;
+	unsigned int crtc_y;
+	unsigned int crtc_width;
+	unsigned int crtc_height;
+	unsigned int mode_width;
+	unsigned int mode_height;
+	unsigned int refresh;
+	unsigned int scan_flag;
+	unsigned int bpp;
+	unsigned int pitch;
+	dma_addr_t paddr;
+	void __iomem *vaddr;
+
+	bool default_win;
+	bool color_key;
+	unsigned int index_color;
+	bool local_path;
+	bool transparency;
+	bool activated;
+};
+
+/*
+ * Exynos DRM Display Structure.
+ *	- this structure is common to analog tv, digital tv and lcd panel.
+ *
+ * @type: one of EXYNOS_DISPLAY_TYPE_LCD and HDMI.
+ * @is_connected: check for that display is connected or not.
+ * @get_edid: get edid modes from display driver.
+ * @get_timing: get timing object from display driver.
+ * @check_timing: check if timing is valid or not.
+ * @power_on: display device on or off.
+ */
+struct exynos_drm_display {
+	enum exynos_drm_output_type type;
+	bool (*is_connected)(struct device *dev);
+	int (*get_edid)(struct device *dev, struct drm_connector *connector,
+				u8 *edid, int len);
+	void *(*get_timing)(struct device *dev);
+	int (*check_timing)(struct device *dev, void *timing);
+	int (*power_on)(struct device *dev, int mode);
+};
+
+/*
+ * Exynos drm manager ops
+ *
+ * @mode_set: convert drm_display_mode to hw specific display mode and
+ *	      would be called by encoder->mode_set().
+ * @commit: set current hw specific display mode to hw.
+ * @enable_vblank: specific driver callback for enabling vblank interrupt.
+ * @disable_vblank: specific driver callback for disabling vblank interrupt.
+ */
+struct exynos_drm_manager_ops {
+	void (*mode_set)(struct device *subdrv_dev, void *mode);
+	void (*commit)(struct device *subdrv_dev);
+	int (*enable_vblank)(struct device *subdrv_dev);
+	void (*disable_vblank)(struct device *subdrv_dev);
+};
+
+/*
+ * Exynos drm common manager structure.
+ *
+ * @dev: pointer to device object for subdrv device driver.
+ *	sub drivers such as display controller or hdmi driver,
+ *	have their own device object.
+ * @ops: pointer to callbacks for exynos drm specific framebuffer.
+ *	these callbacks should be set by specific drivers such fimd
+ *	or hdmi driver and are used to control hardware global registers.
+ * @overlay_ops: pointer to callbacks for exynos drm specific framebuffer.
+ *	these callbacks should be set by specific drivers such fimd
+ *	or hdmi driver and are used to control hardware overlay reigsters.
+ * @display: pointer to callbacks for exynos drm specific framebuffer.
+ *	these callbacks should be set by specific drivers such fimd
+ *	or hdmi driver and are used to control display devices such as
+ *	analog tv, digital tv and lcd panel and also get timing data for them.
+ */
+struct exynos_drm_manager {
+	struct device *dev;
+	int pipe;
+	struct exynos_drm_manager_ops *ops;
+	struct exynos_drm_overlay_ops *overlay_ops;
+	struct exynos_drm_display *display;
+};
+
+/*
+ * Exynos drm private structure.
+ */
+struct exynos_drm_private {
+	struct drm_fb_helper *fb_helper;
+
+	/* list head for new event to be added. */
+	struct list_head pageflip_event_list;
+
+	/*
+	 * created crtc object would be contained at this array and
+	 * this array is used to be aware of which crtc did it request vblank.
+	 */
+	struct drm_crtc *crtc[MAX_CRTC];
+};
+
+/*
+ * Exynos drm sub driver structure.
+ *
+ * @list: sub driver has its own list object to register to exynos drm driver.
+ * @drm_dev: pointer to drm_device and this pointer would be set
+ *	when sub driver calls exynos_drm_subdrv_register().
+ * @probe: this callback would be called by exynos drm driver after
+ *	subdrv is registered to it.
+ * @remove: this callback is used to release resources created
+ *	by probe callback.
+ * @manager: subdrv has its own manager to control a hardware appropriately
+ *	and we can access a hardware drawing on this manager.
+ * @encoder: encoder object owned by this sub driver.
+ * @connector: connector object owned by this sub driver.
+ */
+struct exynos_drm_subdrv {
+	struct list_head list;
+	struct drm_device *drm_dev;
+
+	int (*probe)(struct drm_device *drm_dev, struct device *dev);
+	void (*remove)(struct drm_device *dev);
+
+	struct exynos_drm_manager manager;
+	struct drm_encoder *encoder;
+	struct drm_connector *connector;
+};
+
+/*
+ * this function calls a probe callback registered to sub driver list and
+ * create its own encoder and connector and then set drm_device object
+ * to global one.
+ */
+int exynos_drm_device_register(struct drm_device *dev);
+/*
+ * this function calls a remove callback registered to sub driver list and
+ * destroy its own encoder and connetor.
+ */
+int exynos_drm_device_unregister(struct drm_device *dev);
+
+/*
+ * this function would be called by sub drivers such as display controller
+ * or hdmi driver to register this sub driver object to exynos drm driver
+ * and when a sub driver is registered to exynos drm driver a probe callback
+ * of the sub driver is called and creates its own encoder and connector
+ * and then fb helper and drm mode group would be re-initialized.
+ */
+int exynos_drm_subdrv_register(struct exynos_drm_subdrv *drm_subdrv);
+
+/*
+ * this function removes subdrv list from exynos drm driver and fb helper
+ * and drm mode group would be re-initialized.
+ */
+int exynos_drm_subdrv_unregister(struct exynos_drm_subdrv *drm_subdrv);
+
+#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_encoder.c b/drivers/gpu/drm/exynos/exynos_drm_encoder.c
new file mode 100644
index 000000000000..7cf6fa86a67e
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_encoder.c
@@ -0,0 +1,271 @@
+/* exynos_drm_encoder.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm_crtc_helper.h"
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_crtc.h"
+#include "exynos_drm_encoder.h"
+
+#define to_exynos_encoder(x)	container_of(x, struct exynos_drm_encoder,\
+				drm_encoder)
+
+/*
+ * exynos specific encoder structure.
+ *
+ * @drm_encoder: encoder object.
+ * @manager: specific encoder has its own manager to control a hardware
+ *	appropriately and we can access a hardware drawing on this manager.
+ */
+struct exynos_drm_encoder {
+	struct drm_encoder		drm_encoder;
+	struct exynos_drm_manager	*manager;
+};
+
+static void exynos_drm_encoder_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct drm_connector *connector;
+	struct exynos_drm_manager *manager = exynos_drm_get_manager(encoder);
+
+	DRM_DEBUG_KMS("%s, encoder dpms: %d\n", __FILE__, mode);
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		if (connector->encoder == encoder) {
+			struct exynos_drm_display *display = manager->display;
+
+			if (display && display->power_on)
+				display->power_on(manager->dev, mode);
+		}
+	}
+}
+
+static bool
+exynos_drm_encoder_mode_fixup(struct drm_encoder *encoder,
+			       struct drm_display_mode *mode,
+			       struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* drm framework doesn't check NULL. */
+
+	return true;
+}
+
+static void exynos_drm_encoder_mode_set(struct drm_encoder *encoder,
+					 struct drm_display_mode *mode,
+					 struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct drm_connector *connector;
+	struct exynos_drm_manager *manager = exynos_drm_get_manager(encoder);
+	struct exynos_drm_manager_ops *manager_ops = manager->ops;
+	struct exynos_drm_overlay_ops *overlay_ops = manager->overlay_ops;
+	struct exynos_drm_overlay *overlay = get_exynos_drm_overlay(dev,
+						encoder->crtc);
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	mode = adjusted_mode;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		if (connector->encoder == encoder) {
+			if (manager_ops && manager_ops->mode_set)
+				manager_ops->mode_set(manager->dev, mode);
+
+			if (overlay_ops && overlay_ops->mode_set)
+				overlay_ops->mode_set(manager->dev, overlay);
+		}
+	}
+}
+
+static void exynos_drm_encoder_prepare(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* drm framework doesn't check NULL. */
+}
+
+static void exynos_drm_encoder_commit(struct drm_encoder *encoder)
+{
+	struct exynos_drm_manager *manager = exynos_drm_get_manager(encoder);
+	struct exynos_drm_manager_ops *manager_ops = manager->ops;
+	struct exynos_drm_overlay_ops *overlay_ops = manager->overlay_ops;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (manager_ops && manager_ops->commit)
+		manager_ops->commit(manager->dev);
+
+	if (overlay_ops && overlay_ops->commit)
+		overlay_ops->commit(manager->dev);
+}
+
+static struct drm_crtc *
+exynos_drm_encoder_get_crtc(struct drm_encoder *encoder)
+{
+	return encoder->crtc;
+}
+
+static struct drm_encoder_helper_funcs exynos_encoder_helper_funcs = {
+	.dpms		= exynos_drm_encoder_dpms,
+	.mode_fixup	= exynos_drm_encoder_mode_fixup,
+	.mode_set	= exynos_drm_encoder_mode_set,
+	.prepare	= exynos_drm_encoder_prepare,
+	.commit		= exynos_drm_encoder_commit,
+	.get_crtc	= exynos_drm_encoder_get_crtc,
+};
+
+static void exynos_drm_encoder_destroy(struct drm_encoder *encoder)
+{
+	struct exynos_drm_encoder *exynos_encoder =
+		to_exynos_encoder(encoder);
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	exynos_encoder->manager->pipe = -1;
+
+	drm_encoder_cleanup(encoder);
+	encoder->dev->mode_config.num_encoder--;
+	kfree(exynos_encoder);
+}
+
+static struct drm_encoder_funcs exynos_encoder_funcs = {
+	.destroy = exynos_drm_encoder_destroy,
+};
+
+struct drm_encoder *
+exynos_drm_encoder_create(struct drm_device *dev,
+			   struct exynos_drm_manager *manager,
+			   unsigned int possible_crtcs)
+{
+	struct drm_encoder *encoder;
+	struct exynos_drm_encoder *exynos_encoder;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (!manager || !possible_crtcs)
+		return NULL;
+
+	if (!manager->dev)
+		return NULL;
+
+	exynos_encoder = kzalloc(sizeof(*exynos_encoder), GFP_KERNEL);
+	if (!exynos_encoder) {
+		DRM_ERROR("failed to allocate encoder\n");
+		return NULL;
+	}
+
+	exynos_encoder->manager = manager;
+	encoder = &exynos_encoder->drm_encoder;
+	encoder->possible_crtcs = possible_crtcs;
+
+	DRM_DEBUG_KMS("possible_crtcs = 0x%x\n", encoder->possible_crtcs);
+
+	drm_encoder_init(dev, encoder, &exynos_encoder_funcs,
+			DRM_MODE_ENCODER_TMDS);
+
+	drm_encoder_helper_add(encoder, &exynos_encoder_helper_funcs);
+
+	DRM_DEBUG_KMS("encoder has been created\n");
+
+	return encoder;
+}
+
+struct exynos_drm_manager *exynos_drm_get_manager(struct drm_encoder *encoder)
+{
+	return to_exynos_encoder(encoder)->manager;
+}
+
+void exynos_drm_fn_encoder(struct drm_crtc *crtc, void *data,
+			    void (*fn)(struct drm_encoder *, void *))
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_encoder *encoder;
+
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		if (encoder->crtc != crtc)
+			continue;
+
+		fn(encoder, data);
+	}
+}
+
+void exynos_drm_enable_vblank(struct drm_encoder *encoder, void *data)
+{
+	struct exynos_drm_manager *manager =
+		to_exynos_encoder(encoder)->manager;
+	struct exynos_drm_manager_ops *manager_ops = manager->ops;
+	int crtc = *(int *)data;
+
+	if (manager->pipe == -1)
+		manager->pipe = crtc;
+
+	if (manager_ops->enable_vblank)
+		manager_ops->enable_vblank(manager->dev);
+}
+
+void exynos_drm_disable_vblank(struct drm_encoder *encoder, void *data)
+{
+	struct exynos_drm_manager *manager =
+		to_exynos_encoder(encoder)->manager;
+	struct exynos_drm_manager_ops *manager_ops = manager->ops;
+	int crtc = *(int *)data;
+
+	if (manager->pipe == -1)
+		manager->pipe = crtc;
+
+	if (manager_ops->disable_vblank)
+		manager_ops->disable_vblank(manager->dev);
+}
+
+void exynos_drm_encoder_crtc_commit(struct drm_encoder *encoder, void *data)
+{
+	struct exynos_drm_manager *manager =
+		to_exynos_encoder(encoder)->manager;
+	struct exynos_drm_overlay_ops *overlay_ops = manager->overlay_ops;
+
+	overlay_ops->commit(manager->dev);
+}
+
+void exynos_drm_encoder_crtc_mode_set(struct drm_encoder *encoder, void *data)
+{
+	struct exynos_drm_manager *manager =
+		to_exynos_encoder(encoder)->manager;
+	struct exynos_drm_overlay_ops *overlay_ops = manager->overlay_ops;
+	struct exynos_drm_overlay *overlay = data;
+
+	overlay_ops->mode_set(manager->dev, overlay);
+}
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("Seung-Woo Kim <sw0312.kim@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM Encoder Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_encoder.h b/drivers/gpu/drm/exynos/exynos_drm_encoder.h
new file mode 100644
index 000000000000..5ecd645d06a9
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_encoder.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_ENCODER_H_
+#define _EXYNOS_DRM_ENCODER_H_
+
+struct exynos_drm_manager;
+
+struct drm_encoder *exynos_drm_encoder_create(struct drm_device *dev,
+					       struct exynos_drm_manager *mgr,
+					       unsigned int possible_crtcs);
+struct exynos_drm_manager *
+exynos_drm_get_manager(struct drm_encoder *encoder);
+void exynos_drm_fn_encoder(struct drm_crtc *crtc, void *data,
+			    void (*fn)(struct drm_encoder *, void *));
+void exynos_drm_enable_vblank(struct drm_encoder *encoder, void *data);
+void exynos_drm_disable_vblank(struct drm_encoder *encoder, void *data);
+void exynos_drm_encoder_crtc_commit(struct drm_encoder *encoder, void *data);
+void exynos_drm_encoder_crtc_mode_set(struct drm_encoder *encoder, void *data);
+
+#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c
new file mode 100644
index 000000000000..48d29cfd5240
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c
@@ -0,0 +1,265 @@
+/* exynos_drm_fb.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm_crtc.h"
+#include "drm_crtc_helper.h"
+
+#include "exynos_drm_fb.h"
+#include "exynos_drm_buf.h"
+#include "exynos_drm_gem.h"
+
+#define to_exynos_fb(x)	container_of(x, struct exynos_drm_fb, fb)
+
+/*
+ * exynos specific framebuffer structure.
+ *
+ * @fb: drm framebuffer obejct.
+ * @exynos_gem_obj: exynos specific gem object containing a gem object.
+ * @entry: pointer to exynos drm buffer entry object.
+ *	- containing only the information to physically continuous memory
+ *	region allocated at default framebuffer creation.
+ */
+struct exynos_drm_fb {
+	struct drm_framebuffer		fb;
+	struct exynos_drm_gem_obj	*exynos_gem_obj;
+	struct exynos_drm_buf_entry	*entry;
+};
+
+static void exynos_drm_fb_destroy(struct drm_framebuffer *fb)
+{
+	struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb);
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	drm_framebuffer_cleanup(fb);
+
+	/*
+	 * default framebuffer has no gem object so
+	 * a buffer of the default framebuffer should be released at here.
+	 */
+	if (!exynos_fb->exynos_gem_obj && exynos_fb->entry)
+		exynos_drm_buf_destroy(fb->dev, exynos_fb->entry);
+
+	kfree(exynos_fb);
+	exynos_fb = NULL;
+}
+
+static int exynos_drm_fb_create_handle(struct drm_framebuffer *fb,
+					struct drm_file *file_priv,
+					unsigned int *handle)
+{
+	struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb);
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	return drm_gem_handle_create(file_priv,
+			&exynos_fb->exynos_gem_obj->base, handle);
+}
+
+static int exynos_drm_fb_dirty(struct drm_framebuffer *fb,
+				struct drm_file *file_priv, unsigned flags,
+				unsigned color, struct drm_clip_rect *clips,
+				unsigned num_clips)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* TODO */
+
+	return 0;
+}
+
+static struct drm_framebuffer_funcs exynos_drm_fb_funcs = {
+	.destroy	= exynos_drm_fb_destroy,
+	.create_handle	= exynos_drm_fb_create_handle,
+	.dirty		= exynos_drm_fb_dirty,
+};
+
+static struct drm_framebuffer *
+exynos_drm_fb_init(struct drm_file *file_priv, struct drm_device *dev,
+		    struct drm_mode_fb_cmd *mode_cmd)
+{
+	struct exynos_drm_fb *exynos_fb;
+	struct drm_framebuffer *fb;
+	struct exynos_drm_gem_obj *exynos_gem_obj = NULL;
+	struct drm_gem_object *obj;
+	unsigned int size;
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	mode_cmd->pitch = max(mode_cmd->pitch,
+			mode_cmd->width * (mode_cmd->bpp >> 3));
+
+	DRM_LOG_KMS("drm fb create(%dx%d)\n",
+			mode_cmd->width, mode_cmd->height);
+
+	exynos_fb = kzalloc(sizeof(*exynos_fb), GFP_KERNEL);
+	if (!exynos_fb) {
+		DRM_ERROR("failed to allocate exynos drm framebuffer.\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	fb = &exynos_fb->fb;
+	ret = drm_framebuffer_init(dev, fb, &exynos_drm_fb_funcs);
+	if (ret) {
+		DRM_ERROR("failed to initialize framebuffer.\n");
+		goto err_init;
+	}
+
+	DRM_LOG_KMS("create: fb id: %d\n", fb->base.id);
+
+	size = mode_cmd->pitch * mode_cmd->height;
+
+	/*
+	 * mode_cmd->handle could be NULL at booting time or
+	 * with user request. if NULL, a new buffer or a gem object
+	 * would be allocated.
+	 */
+	if (!mode_cmd->handle) {
+		if (!file_priv) {
+			struct exynos_drm_buf_entry *entry;
+
+			/*
+			 * in case that file_priv is NULL, it allocates
+			 * only buffer and this buffer would be used
+			 * for default framebuffer.
+			 */
+			entry = exynos_drm_buf_create(dev, size);
+			if (IS_ERR(entry)) {
+				ret = PTR_ERR(entry);
+				goto err_buffer;
+			}
+
+			exynos_fb->entry = entry;
+
+			DRM_LOG_KMS("default fb: paddr = 0x%lx, size = 0x%x\n",
+					(unsigned long)entry->paddr, size);
+
+			goto out;
+		} else {
+			exynos_gem_obj = exynos_drm_gem_create(file_priv, dev,
+							size,
+							&mode_cmd->handle);
+			if (IS_ERR(exynos_gem_obj)) {
+				ret = PTR_ERR(exynos_gem_obj);
+				goto err_buffer;
+			}
+		}
+	} else {
+		obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handle);
+		if (!obj) {
+			DRM_ERROR("failed to lookup gem object.\n");
+			goto err_buffer;
+		}
+
+		exynos_gem_obj = to_exynos_gem_obj(obj);
+
+		drm_gem_object_unreference_unlocked(obj);
+	}
+
+	/*
+	 * if got a exynos_gem_obj from either a handle or
+	 * a new creation then exynos_fb->exynos_gem_obj is NULL
+	 * so that default framebuffer has no its own gem object,
+	 * only its own buffer object.
+	 */
+	exynos_fb->entry = exynos_gem_obj->entry;
+
+	DRM_LOG_KMS("paddr = 0x%lx, size = 0x%x, gem object = 0x%x\n",
+			(unsigned long)exynos_fb->entry->paddr, size,
+			(unsigned int)&exynos_gem_obj->base);
+
+out:
+	exynos_fb->exynos_gem_obj = exynos_gem_obj;
+
+	drm_helper_mode_fill_fb_struct(fb, mode_cmd);
+
+	return fb;
+
+err_buffer:
+	drm_framebuffer_cleanup(fb);
+
+err_init:
+	kfree(exynos_fb);
+
+	return ERR_PTR(ret);
+}
+
+struct drm_framebuffer *exynos_drm_fb_create(struct drm_device *dev,
+					      struct drm_file *file_priv,
+					      struct drm_mode_fb_cmd *mode_cmd)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	return exynos_drm_fb_init(file_priv, dev, mode_cmd);
+}
+
+struct exynos_drm_buf_entry *exynos_drm_fb_get_buf(struct drm_framebuffer *fb)
+{
+	struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb);
+	struct exynos_drm_buf_entry *entry;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	entry = exynos_fb->entry;
+	if (!entry)
+		return NULL;
+
+	DRM_DEBUG_KMS("vaddr = 0x%lx, paddr = 0x%lx\n",
+			(unsigned long)entry->vaddr,
+			(unsigned long)entry->paddr);
+
+	return entry;
+}
+
+static struct drm_mode_config_funcs exynos_drm_mode_config_funcs = {
+	.fb_create = exynos_drm_fb_create,
+};
+
+void exynos_drm_mode_config_init(struct drm_device *dev)
+{
+	dev->mode_config.min_width = 0;
+	dev->mode_config.min_height = 0;
+
+	/*
+	 * set max width and height as default value(4096x4096).
+	 * this value would be used to check framebuffer size limitation
+	 * at drm_mode_addfb().
+	 */
+	dev->mode_config.max_width = 4096;
+	dev->mode_config.max_height = 4096;
+
+	dev->mode_config.funcs = &exynos_drm_mode_config_funcs;
+}
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("Seung-Woo Kim <sw0312.kim@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM FB Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.h b/drivers/gpu/drm/exynos/exynos_drm_fb.h
new file mode 100644
index 000000000000..eb35931d302c
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_fb.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_FB_H_
+#define _EXYNOS_DRM_FB_H
+
+struct drm_framebuffer *exynos_drm_fb_create(struct drm_device *dev,
+					      struct drm_file *filp,
+					      struct drm_mode_fb_cmd *mode_cmd);
+
+void exynos_drm_mode_config_init(struct drm_device *dev);
+
+#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
new file mode 100644
index 000000000000..1f4b3d1a7713
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
@@ -0,0 +1,456 @@
+/* exynos_drm_fbdev.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm_crtc.h"
+#include "drm_fb_helper.h"
+#include "drm_crtc_helper.h"
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_fb.h"
+#include "exynos_drm_buf.h"
+
+#define MAX_CONNECTOR		4
+#define PREFERRED_BPP		32
+
+#define to_exynos_fbdev(x)	container_of(x, struct exynos_drm_fbdev,\
+				drm_fb_helper)
+
+struct exynos_drm_fbdev {
+	struct drm_fb_helper	drm_fb_helper;
+	struct drm_framebuffer	*fb;
+};
+
+static int exynos_drm_fbdev_set_par(struct fb_info *info)
+{
+	struct fb_var_screeninfo *var = &info->var;
+
+	switch (var->bits_per_pixel) {
+	case 32:
+	case 24:
+	case 18:
+	case 16:
+	case 12:
+		info->fix.visual = FB_VISUAL_TRUECOLOR;
+		break;
+	case 1:
+		info->fix.visual = FB_VISUAL_MONO01;
+		break;
+	default:
+		info->fix.visual = FB_VISUAL_PSEUDOCOLOR;
+		break;
+	}
+
+	info->fix.line_length = (var->xres_virtual * var->bits_per_pixel) / 8;
+
+	return drm_fb_helper_set_par(info);
+}
+
+
+static struct fb_ops exynos_drm_fb_ops = {
+	.owner		= THIS_MODULE,
+	.fb_fillrect	= cfb_fillrect,
+	.fb_copyarea	= cfb_copyarea,
+	.fb_imageblit	= cfb_imageblit,
+	.fb_check_var	= drm_fb_helper_check_var,
+	.fb_set_par	= exynos_drm_fbdev_set_par,
+	.fb_blank	= drm_fb_helper_blank,
+	.fb_pan_display	= drm_fb_helper_pan_display,
+	.fb_setcmap	= drm_fb_helper_setcmap,
+};
+
+static int exynos_drm_fbdev_update(struct drm_fb_helper *helper,
+				     struct drm_framebuffer *fb,
+				     unsigned int fb_width,
+				     unsigned int fb_height)
+{
+	struct fb_info *fbi = helper->fbdev;
+	struct drm_device *dev = helper->dev;
+	struct exynos_drm_fbdev *exynos_fb = to_exynos_fbdev(helper);
+	struct exynos_drm_buf_entry *entry;
+	unsigned int size = fb_width * fb_height * (fb->bits_per_pixel >> 3);
+	unsigned long offset;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	exynos_fb->fb = fb;
+
+	drm_fb_helper_fill_fix(fbi, fb->pitch, fb->depth);
+	drm_fb_helper_fill_var(fbi, helper, fb_width, fb_height);
+
+	entry = exynos_drm_fb_get_buf(fb);
+	if (!entry) {
+		DRM_LOG_KMS("entry is null.\n");
+		return -EFAULT;
+	}
+
+	offset = fbi->var.xoffset * (fb->bits_per_pixel >> 3);
+	offset += fbi->var.yoffset * fb->pitch;
+
+	dev->mode_config.fb_base = entry->paddr;
+	fbi->screen_base = entry->vaddr + offset;
+	fbi->fix.smem_start = entry->paddr + offset;
+	fbi->screen_size = size;
+	fbi->fix.smem_len = size;
+
+	return 0;
+}
+
+static int exynos_drm_fbdev_create(struct drm_fb_helper *helper,
+				    struct drm_fb_helper_surface_size *sizes)
+{
+	struct exynos_drm_fbdev *exynos_fbdev = to_exynos_fbdev(helper);
+	struct drm_device *dev = helper->dev;
+	struct fb_info *fbi;
+	struct drm_mode_fb_cmd mode_cmd = { 0 };
+	struct platform_device *pdev = dev->platformdev;
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	DRM_DEBUG_KMS("surface width(%d), height(%d) and bpp(%d\n",
+			sizes->surface_width, sizes->surface_height,
+			sizes->surface_bpp);
+
+	mode_cmd.width = sizes->surface_width;
+	mode_cmd.height = sizes->surface_height;
+	mode_cmd.bpp = sizes->surface_bpp;
+	mode_cmd.depth = sizes->surface_depth;
+
+	mutex_lock(&dev->struct_mutex);
+
+	fbi = framebuffer_alloc(0, &pdev->dev);
+	if (!fbi) {
+		DRM_ERROR("failed to allocate fb info.\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	exynos_fbdev->fb = exynos_drm_fb_create(dev, NULL, &mode_cmd);
+	if (IS_ERR_OR_NULL(exynos_fbdev->fb)) {
+		DRM_ERROR("failed to create drm framebuffer.\n");
+		ret = PTR_ERR(exynos_fbdev->fb);
+		goto out;
+	}
+
+	helper->fb = exynos_fbdev->fb;
+	helper->fbdev = fbi;
+
+	fbi->par = helper;
+	fbi->flags = FBINFO_FLAG_DEFAULT;
+	fbi->fbops = &exynos_drm_fb_ops;
+
+	ret = fb_alloc_cmap(&fbi->cmap, 256, 0);
+	if (ret) {
+		DRM_ERROR("failed to allocate cmap.\n");
+		goto out;
+	}
+
+	ret = exynos_drm_fbdev_update(helper, helper->fb, sizes->fb_width,
+			sizes->fb_height);
+	if (ret < 0)
+		fb_dealloc_cmap(&fbi->cmap);
+
+/*
+ * if failed, all resources allocated above would be released by
+ * drm_mode_config_cleanup() when drm_load() had been called prior
+ * to any specific driver such as fimd or hdmi driver.
+ */
+out:
+	mutex_unlock(&dev->struct_mutex);
+	return ret;
+}
+
+static bool
+exynos_drm_fbdev_is_samefb(struct drm_framebuffer *fb,
+			    struct drm_fb_helper_surface_size *sizes)
+{
+	if (fb->width != sizes->surface_width)
+		return false;
+	if (fb->height != sizes->surface_height)
+		return false;
+	if (fb->bits_per_pixel != sizes->surface_bpp)
+		return false;
+	if (fb->depth != sizes->surface_depth)
+		return false;
+
+	return true;
+}
+
+static int exynos_drm_fbdev_recreate(struct drm_fb_helper *helper,
+				      struct drm_fb_helper_surface_size *sizes)
+{
+	struct drm_device *dev = helper->dev;
+	struct exynos_drm_fbdev *exynos_fbdev = to_exynos_fbdev(helper);
+	struct drm_framebuffer *fb = exynos_fbdev->fb;
+	struct drm_mode_fb_cmd mode_cmd = { 0 };
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (helper->fb != fb) {
+		DRM_ERROR("drm framebuffer is different\n");
+		return -EINVAL;
+	}
+
+	if (exynos_drm_fbdev_is_samefb(fb, sizes))
+		return 0;
+
+	mode_cmd.width = sizes->surface_width;
+	mode_cmd.height = sizes->surface_height;
+	mode_cmd.bpp = sizes->surface_bpp;
+	mode_cmd.depth = sizes->surface_depth;
+
+	if (fb->funcs->destroy)
+		fb->funcs->destroy(fb);
+
+	exynos_fbdev->fb = exynos_drm_fb_create(dev, NULL, &mode_cmd);
+	if (IS_ERR(exynos_fbdev->fb)) {
+		DRM_ERROR("failed to allocate fb.\n");
+		return PTR_ERR(exynos_fbdev->fb);
+	}
+
+	helper->fb = exynos_fbdev->fb;
+	return exynos_drm_fbdev_update(helper, helper->fb, sizes->fb_width,
+			sizes->fb_height);
+}
+
+static int exynos_drm_fbdev_probe(struct drm_fb_helper *helper,
+				   struct drm_fb_helper_surface_size *sizes)
+{
+	int ret = 0;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (!helper->fb) {
+		ret = exynos_drm_fbdev_create(helper, sizes);
+		if (ret < 0) {
+			DRM_ERROR("failed to create fbdev.\n");
+			return ret;
+		}
+
+		/*
+		 * fb_helper expects a value more than 1 if succeed
+		 * because register_framebuffer() should be called.
+		 */
+		ret = 1;
+	} else {
+		ret = exynos_drm_fbdev_recreate(helper, sizes);
+		if (ret < 0) {
+			DRM_ERROR("failed to reconfigure fbdev\n");
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
+static struct drm_fb_helper_funcs exynos_drm_fb_helper_funcs = {
+	.fb_probe =	exynos_drm_fbdev_probe,
+};
+
+int exynos_drm_fbdev_init(struct drm_device *dev)
+{
+	struct exynos_drm_fbdev *fbdev;
+	struct exynos_drm_private *private = dev->dev_private;
+	struct drm_fb_helper *helper;
+	unsigned int num_crtc;
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (!dev->mode_config.num_crtc || !dev->mode_config.num_connector)
+		return 0;
+
+	fbdev = kzalloc(sizeof(*fbdev), GFP_KERNEL);
+	if (!fbdev) {
+		DRM_ERROR("failed to allocate drm fbdev.\n");
+		return -ENOMEM;
+	}
+
+	private->fb_helper = helper = &fbdev->drm_fb_helper;
+	helper->funcs = &exynos_drm_fb_helper_funcs;
+
+	num_crtc = dev->mode_config.num_crtc;
+
+	ret = drm_fb_helper_init(dev, helper, num_crtc, MAX_CONNECTOR);
+	if (ret < 0) {
+		DRM_ERROR("failed to initialize drm fb helper.\n");
+		goto err_init;
+	}
+
+	ret = drm_fb_helper_single_add_all_connectors(helper);
+	if (ret < 0) {
+		DRM_ERROR("failed to register drm_fb_helper_connector.\n");
+		goto err_setup;
+
+	}
+
+	ret = drm_fb_helper_initial_config(helper, PREFERRED_BPP);
+	if (ret < 0) {
+		DRM_ERROR("failed to set up hw configuration.\n");
+		goto err_setup;
+	}
+
+	return 0;
+
+err_setup:
+	drm_fb_helper_fini(helper);
+
+err_init:
+	private->fb_helper = NULL;
+	kfree(fbdev);
+
+	return ret;
+}
+
+static void exynos_drm_fbdev_destroy(struct drm_device *dev,
+				      struct drm_fb_helper *fb_helper)
+{
+	struct drm_framebuffer *fb;
+
+	/* release drm framebuffer and real buffer */
+	if (fb_helper->fb && fb_helper->fb->funcs) {
+		fb = fb_helper->fb;
+		if (fb && fb->funcs->destroy)
+			fb->funcs->destroy(fb);
+	}
+
+	/* release linux framebuffer */
+	if (fb_helper->fbdev) {
+		struct fb_info *info;
+		int ret;
+
+		info = fb_helper->fbdev;
+		ret = unregister_framebuffer(info);
+		if (ret < 0)
+			DRM_DEBUG_KMS("failed unregister_framebuffer()\n");
+
+		if (info->cmap.len)
+			fb_dealloc_cmap(&info->cmap);
+
+		framebuffer_release(info);
+	}
+
+	drm_fb_helper_fini(fb_helper);
+}
+
+void exynos_drm_fbdev_fini(struct drm_device *dev)
+{
+	struct exynos_drm_private *private = dev->dev_private;
+	struct exynos_drm_fbdev *fbdev;
+
+	if (!private || !private->fb_helper)
+		return;
+
+	fbdev = to_exynos_fbdev(private->fb_helper);
+
+	exynos_drm_fbdev_destroy(dev, private->fb_helper);
+	kfree(fbdev);
+	private->fb_helper = NULL;
+}
+
+void exynos_drm_fbdev_restore_mode(struct drm_device *dev)
+{
+	struct exynos_drm_private *private = dev->dev_private;
+
+	if (!private || !private->fb_helper)
+		return;
+
+	drm_fb_helper_restore_fbdev_mode(private->fb_helper);
+}
+
+int exynos_drm_fbdev_reinit(struct drm_device *dev)
+{
+	struct exynos_drm_private *private = dev->dev_private;
+	struct drm_fb_helper *fb_helper;
+	int ret;
+
+	if (!private)
+		return -EINVAL;
+
+	/*
+	 * if all sub drivers were unloaded then num_connector is 0
+	 * so at this time, the framebuffers also should be destroyed.
+	 */
+	if (!dev->mode_config.num_connector) {
+		exynos_drm_fbdev_fini(dev);
+		return 0;
+	}
+
+	fb_helper = private->fb_helper;
+
+	if (fb_helper) {
+		drm_fb_helper_fini(fb_helper);
+
+		ret = drm_fb_helper_init(dev, fb_helper,
+				dev->mode_config.num_crtc, MAX_CONNECTOR);
+		if (ret < 0) {
+			DRM_ERROR("failed to initialize drm fb helper\n");
+			return ret;
+		}
+
+		ret = drm_fb_helper_single_add_all_connectors(fb_helper);
+		if (ret < 0) {
+			DRM_ERROR("failed to add fb helper to connectors\n");
+			goto err;
+		}
+
+		ret = drm_fb_helper_initial_config(fb_helper, PREFERRED_BPP);
+		if (ret < 0) {
+			DRM_ERROR("failed to set up hw configuration.\n");
+			goto err;
+		}
+	} else {
+		/*
+		 * if drm_load() failed whem drm load() was called prior
+		 * to specific drivers, fb_helper must be NULL and so
+		 * this fuction should be called again to re-initialize and
+		 * re-configure the fb helper. it means that this function
+		 * has been called by the specific drivers.
+		 */
+		ret = exynos_drm_fbdev_init(dev);
+	}
+
+	return ret;
+
+err:
+	/*
+	 * if drm_load() failed when drm load() was called prior
+	 * to specific drivers, the fb_helper must be NULL and so check it.
+	 */
+	if (fb_helper)
+		drm_fb_helper_fini(fb_helper);
+
+	return ret;
+}
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("Seung-Woo Kim <sw0312.kim@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM FBDEV Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.h b/drivers/gpu/drm/exynos/exynos_drm_fbdev.h
new file mode 100644
index 000000000000..ccfce8a1a451
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ *
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_FBDEV_H_
+#define _EXYNOS_DRM_FBDEV_H_
+
+int exynos_drm_fbdev_init(struct drm_device *dev);
+int exynos_drm_fbdev_reinit(struct drm_device *dev);
+void exynos_drm_fbdev_fini(struct drm_device *dev);
+void exynos_drm_fbdev_restore_mode(struct drm_device *dev);
+
+#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
new file mode 100644
index 000000000000..4659c88cdd9b
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -0,0 +1,811 @@
+/* exynos_drm_fimd.c
+ *
+ * Copyright (C) 2011 Samsung Electronics Co.Ltd
+ * Authors:
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Inki Dae <inki.dae@samsung.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+#include "drmP.h"
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+
+#include <drm/exynos_drm.h>
+#include <plat/regs-fb-v4.h>
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_fbdev.h"
+#include "exynos_drm_crtc.h"
+
+/*
+ * FIMD is stand for Fully Interactive Mobile Display and
+ * as a display controller, it transfers contents drawn on memory
+ * to a LCD Panel through Display Interfaces such as RGB or
+ * CPU Interface.
+ */
+
+/* position control register for hardware window 0, 2 ~ 4.*/
+#define VIDOSD_A(win)		(VIDOSD_BASE + 0x00 + (win) * 16)
+#define VIDOSD_B(win)		(VIDOSD_BASE + 0x04 + (win) * 16)
+/* size control register for hardware window 0. */
+#define VIDOSD_C_SIZE_W0	(VIDOSD_BASE + 0x08)
+/* alpha control register for hardware window 1 ~ 4. */
+#define VIDOSD_C(win)		(VIDOSD_BASE + 0x18 + (win) * 16)
+/* size control register for hardware window 1 ~ 4. */
+#define VIDOSD_D(win)		(VIDOSD_BASE + 0x0C + (win) * 16)
+
+#define VIDWx_BUF_START(win, buf)	(VIDW_BUF_START(buf) + (win) * 8)
+#define VIDWx_BUF_END(win, buf)		(VIDW_BUF_END(buf) + (win) * 8)
+#define VIDWx_BUF_SIZE(win, buf)	(VIDW_BUF_SIZE(buf) + (win) * 4)
+
+/* color key control register for hardware window 1 ~ 4. */
+#define WKEYCON0_BASE(x)		((WKEYCON0 + 0x140) + (x * 8))
+/* color key value register for hardware window 1 ~ 4. */
+#define WKEYCON1_BASE(x)		((WKEYCON1 + 0x140) + (x * 8))
+
+/* FIMD has totally five hardware windows. */
+#define WINDOWS_NR	5
+
+#define get_fimd_context(dev)	platform_get_drvdata(to_platform_device(dev))
+
+struct fimd_win_data {
+	unsigned int		offset_x;
+	unsigned int		offset_y;
+	unsigned int		ovl_width;
+	unsigned int		ovl_height;
+	unsigned int		fb_width;
+	unsigned int		fb_height;
+	unsigned int		bpp;
+	dma_addr_t		paddr;
+	void __iomem		*vaddr;
+	unsigned int		buf_offsize;
+	unsigned int		line_size;	/* bytes */
+};
+
+struct fimd_context {
+	struct exynos_drm_subdrv	subdrv;
+	int				irq;
+	struct drm_crtc			*crtc;
+	struct clk			*bus_clk;
+	struct clk			*lcd_clk;
+	struct resource			*regs_res;
+	void __iomem			*regs;
+	struct fimd_win_data		win_data[WINDOWS_NR];
+	unsigned int			clkdiv;
+	unsigned int			default_win;
+	unsigned long			irq_flags;
+	u32				vidcon0;
+	u32				vidcon1;
+
+	struct fb_videomode		*timing;
+};
+
+static bool fimd_display_is_connected(struct device *dev)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* TODO. */
+
+	return true;
+}
+
+static void *fimd_get_timing(struct device *dev)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	return ctx->timing;
+}
+
+static int fimd_check_timing(struct device *dev, void *timing)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* TODO. */
+
+	return 0;
+}
+
+static int fimd_display_power_on(struct device *dev, int mode)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* TODO. */
+
+	return 0;
+}
+
+static struct exynos_drm_display fimd_display = {
+	.type = EXYNOS_DISPLAY_TYPE_LCD,
+	.is_connected = fimd_display_is_connected,
+	.get_timing = fimd_get_timing,
+	.check_timing = fimd_check_timing,
+	.power_on = fimd_display_power_on,
+};
+
+static void fimd_commit(struct device *dev)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	struct fb_videomode *timing = ctx->timing;
+	u32 val;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* setup polarity values from machine code. */
+	writel(ctx->vidcon1, ctx->regs + VIDCON1);
+
+	/* setup vertical timing values. */
+	val = VIDTCON0_VBPD(timing->upper_margin - 1) |
+	       VIDTCON0_VFPD(timing->lower_margin - 1) |
+	       VIDTCON0_VSPW(timing->vsync_len - 1);
+	writel(val, ctx->regs + VIDTCON0);
+
+	/* setup horizontal timing values.  */
+	val = VIDTCON1_HBPD(timing->left_margin - 1) |
+	       VIDTCON1_HFPD(timing->right_margin - 1) |
+	       VIDTCON1_HSPW(timing->hsync_len - 1);
+	writel(val, ctx->regs + VIDTCON1);
+
+	/* setup horizontal and vertical display size. */
+	val = VIDTCON2_LINEVAL(timing->yres - 1) |
+	       VIDTCON2_HOZVAL(timing->xres - 1);
+	writel(val, ctx->regs + VIDTCON2);
+
+	/* setup clock source, clock divider, enable dma. */
+	val = ctx->vidcon0;
+	val &= ~(VIDCON0_CLKVAL_F_MASK | VIDCON0_CLKDIR);
+
+	if (ctx->clkdiv > 1)
+		val |= VIDCON0_CLKVAL_F(ctx->clkdiv - 1) | VIDCON0_CLKDIR;
+	else
+		val &= ~VIDCON0_CLKDIR;	/* 1:1 clock */
+
+	/*
+	 * fields of register with prefix '_F' would be updated
+	 * at vsync(same as dma start)
+	 */
+	val |= VIDCON0_ENVID | VIDCON0_ENVID_F;
+	writel(val, ctx->regs + VIDCON0);
+}
+
+static int fimd_enable_vblank(struct device *dev)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	u32 val;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (!test_and_set_bit(0, &ctx->irq_flags)) {
+		val = readl(ctx->regs + VIDINTCON0);
+
+		val |= VIDINTCON0_INT_ENABLE;
+		val |= VIDINTCON0_INT_FRAME;
+
+		val &= ~VIDINTCON0_FRAMESEL0_MASK;
+		val |= VIDINTCON0_FRAMESEL0_VSYNC;
+		val &= ~VIDINTCON0_FRAMESEL1_MASK;
+		val |= VIDINTCON0_FRAMESEL1_NONE;
+
+		writel(val, ctx->regs + VIDINTCON0);
+	}
+
+	return 0;
+}
+
+static void fimd_disable_vblank(struct device *dev)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	u32 val;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (test_and_clear_bit(0, &ctx->irq_flags)) {
+		val = readl(ctx->regs + VIDINTCON0);
+
+		val &= ~VIDINTCON0_INT_FRAME;
+		val &= ~VIDINTCON0_INT_ENABLE;
+
+		writel(val, ctx->regs + VIDINTCON0);
+	}
+}
+
+static struct exynos_drm_manager_ops fimd_manager_ops = {
+	.commit = fimd_commit,
+	.enable_vblank = fimd_enable_vblank,
+	.disable_vblank = fimd_disable_vblank,
+};
+
+static void fimd_win_mode_set(struct device *dev,
+			      struct exynos_drm_overlay *overlay)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	struct fimd_win_data *win_data;
+	unsigned long offset;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (!overlay) {
+		dev_err(dev, "overlay is NULL\n");
+		return;
+	}
+
+	offset = overlay->fb_x * (overlay->bpp >> 3);
+	offset += overlay->fb_y * overlay->pitch;
+
+	DRM_DEBUG_KMS("offset = 0x%lx, pitch = %x\n", offset, overlay->pitch);
+
+	win_data = &ctx->win_data[ctx->default_win];
+
+	win_data->offset_x = overlay->crtc_x;
+	win_data->offset_y = overlay->crtc_y;
+	win_data->ovl_width = overlay->crtc_width;
+	win_data->ovl_height = overlay->crtc_height;
+	win_data->fb_width = overlay->fb_width;
+	win_data->fb_height = overlay->fb_height;
+	win_data->paddr = overlay->paddr + offset;
+	win_data->vaddr = overlay->vaddr + offset;
+	win_data->bpp = overlay->bpp;
+	win_data->buf_offsize = (overlay->fb_width - overlay->crtc_width) *
+				(overlay->bpp >> 3);
+	win_data->line_size = overlay->crtc_width * (overlay->bpp >> 3);
+
+	DRM_DEBUG_KMS("offset_x = %d, offset_y = %d\n",
+			win_data->offset_x, win_data->offset_y);
+	DRM_DEBUG_KMS("ovl_width = %d, ovl_height = %d\n",
+			win_data->ovl_width, win_data->ovl_height);
+	DRM_DEBUG_KMS("paddr = 0x%lx, vaddr = 0x%lx\n",
+			(unsigned long)win_data->paddr,
+			(unsigned long)win_data->vaddr);
+	DRM_DEBUG_KMS("fb_width = %d, crtc_width = %d\n",
+			overlay->fb_width, overlay->crtc_width);
+}
+
+static void fimd_win_set_pixfmt(struct device *dev, unsigned int win)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	struct fimd_win_data *win_data = &ctx->win_data[win];
+	unsigned long val;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	val = WINCONx_ENWIN;
+
+	switch (win_data->bpp) {
+	case 1:
+		val |= WINCON0_BPPMODE_1BPP;
+		val |= WINCONx_BITSWP;
+		val |= WINCONx_BURSTLEN_4WORD;
+		break;
+	case 2:
+		val |= WINCON0_BPPMODE_2BPP;
+		val |= WINCONx_BITSWP;
+		val |= WINCONx_BURSTLEN_8WORD;
+		break;
+	case 4:
+		val |= WINCON0_BPPMODE_4BPP;
+		val |= WINCONx_BITSWP;
+		val |= WINCONx_BURSTLEN_8WORD;
+		break;
+	case 8:
+		val |= WINCON0_BPPMODE_8BPP_PALETTE;
+		val |= WINCONx_BURSTLEN_8WORD;
+		val |= WINCONx_BYTSWP;
+		break;
+	case 16:
+		val |= WINCON0_BPPMODE_16BPP_565;
+		val |= WINCONx_HAWSWP;
+		val |= WINCONx_BURSTLEN_16WORD;
+		break;
+	case 24:
+		val |= WINCON0_BPPMODE_24BPP_888;
+		val |= WINCONx_WSWP;
+		val |= WINCONx_BURSTLEN_16WORD;
+		break;
+	case 32:
+		val |= WINCON1_BPPMODE_28BPP_A4888
+			| WINCON1_BLD_PIX | WINCON1_ALPHA_SEL;
+		val |= WINCONx_WSWP;
+		val |= WINCONx_BURSTLEN_16WORD;
+		break;
+	default:
+		DRM_DEBUG_KMS("invalid pixel size so using unpacked 24bpp.\n");
+
+		val |= WINCON0_BPPMODE_24BPP_888;
+		val |= WINCONx_WSWP;
+		val |= WINCONx_BURSTLEN_16WORD;
+		break;
+	}
+
+	DRM_DEBUG_KMS("bpp = %d\n", win_data->bpp);
+
+	writel(val, ctx->regs + WINCON(win));
+}
+
+static void fimd_win_set_colkey(struct device *dev, unsigned int win)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	unsigned int keycon0 = 0, keycon1 = 0;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	keycon0 = ~(WxKEYCON0_KEYBL_EN | WxKEYCON0_KEYEN_F |
+			WxKEYCON0_DIRCON) | WxKEYCON0_COMPKEY(0);
+
+	keycon1 = WxKEYCON1_COLVAL(0xffffffff);
+
+	writel(keycon0, ctx->regs + WKEYCON0_BASE(win));
+	writel(keycon1, ctx->regs + WKEYCON1_BASE(win));
+}
+
+static void fimd_win_commit(struct device *dev)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	struct fimd_win_data *win_data;
+	int win = ctx->default_win;
+	unsigned long val, alpha, size;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (win < 0 || win > WINDOWS_NR)
+		return;
+
+	win_data = &ctx->win_data[win];
+
+	/*
+	 * SHADOWCON register is used for enabling timing.
+	 *
+	 * for example, once only width value of a register is set,
+	 * if the dma is started then fimd hardware could malfunction so
+	 * with protect window setting, the register fields with prefix '_F'
+	 * wouldn't be updated at vsync also but updated once unprotect window
+	 * is set.
+	 */
+
+	/* protect windows */
+	val = readl(ctx->regs + SHADOWCON);
+	val |= SHADOWCON_WINx_PROTECT(win);
+	writel(val, ctx->regs + SHADOWCON);
+
+	/* buffer start address */
+	val = win_data->paddr;
+	writel(val, ctx->regs + VIDWx_BUF_START(win, 0));
+
+	/* buffer end address */
+	size = win_data->fb_width * win_data->ovl_height * (win_data->bpp >> 3);
+	val = win_data->paddr + size;
+	writel(val, ctx->regs + VIDWx_BUF_END(win, 0));
+
+	DRM_DEBUG_KMS("start addr = 0x%lx, end addr = 0x%lx, size = 0x%lx\n",
+			(unsigned long)win_data->paddr, val, size);
+	DRM_DEBUG_KMS("ovl_width = %d, ovl_height = %d\n",
+			win_data->ovl_width, win_data->ovl_height);
+
+	/* buffer size */
+	val = VIDW_BUF_SIZE_OFFSET(win_data->buf_offsize) |
+		VIDW_BUF_SIZE_PAGEWIDTH(win_data->line_size);
+	writel(val, ctx->regs + VIDWx_BUF_SIZE(win, 0));
+
+	/* OSD position */
+	val = VIDOSDxA_TOPLEFT_X(win_data->offset_x) |
+		VIDOSDxA_TOPLEFT_Y(win_data->offset_y);
+	writel(val, ctx->regs + VIDOSD_A(win));
+
+	val = VIDOSDxB_BOTRIGHT_X(win_data->offset_x +
+					win_data->ovl_width - 1) |
+		VIDOSDxB_BOTRIGHT_Y(win_data->offset_y +
+					win_data->ovl_height - 1);
+	writel(val, ctx->regs + VIDOSD_B(win));
+
+	DRM_DEBUG_KMS("osd pos: tx = %d, ty = %d, bx = %d, by = %d\n",
+			win_data->offset_x, win_data->offset_y,
+			win_data->offset_x + win_data->ovl_width - 1,
+			win_data->offset_y + win_data->ovl_height - 1);
+
+	/* hardware window 0 doesn't support alpha channel. */
+	if (win != 0) {
+		/* OSD alpha */
+		alpha = VIDISD14C_ALPHA1_R(0xf) |
+			VIDISD14C_ALPHA1_G(0xf) |
+			VIDISD14C_ALPHA1_B(0xf);
+
+		writel(alpha, ctx->regs + VIDOSD_C(win));
+	}
+
+	/* OSD size */
+	if (win != 3 && win != 4) {
+		u32 offset = VIDOSD_D(win);
+		if (win == 0)
+			offset = VIDOSD_C_SIZE_W0;
+		val = win_data->ovl_width * win_data->ovl_height;
+		writel(val, ctx->regs + offset);
+
+		DRM_DEBUG_KMS("osd size = 0x%x\n", (unsigned int)val);
+	}
+
+	fimd_win_set_pixfmt(dev, win);
+
+	/* hardware window 0 doesn't support color key. */
+	if (win != 0)
+		fimd_win_set_colkey(dev, win);
+
+	/* Enable DMA channel and unprotect windows */
+	val = readl(ctx->regs + SHADOWCON);
+	val |= SHADOWCON_CHx_ENABLE(win);
+	val &= ~SHADOWCON_WINx_PROTECT(win);
+	writel(val, ctx->regs + SHADOWCON);
+}
+
+static void fimd_win_disable(struct device *dev)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	struct fimd_win_data *win_data;
+	int win = ctx->default_win;
+	u32 val;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (win < 0 || win > WINDOWS_NR)
+		return;
+
+	win_data = &ctx->win_data[win];
+
+	/* protect windows */
+	val = readl(ctx->regs + SHADOWCON);
+	val |= SHADOWCON_WINx_PROTECT(win);
+	writel(val, ctx->regs + SHADOWCON);
+
+	/* wincon */
+	val = readl(ctx->regs + WINCON(win));
+	val &= ~WINCONx_ENWIN;
+	writel(val, ctx->regs + WINCON(win));
+
+	/* unprotect windows */
+	val = readl(ctx->regs + SHADOWCON);
+	val &= ~SHADOWCON_CHx_ENABLE(win);
+	val &= ~SHADOWCON_WINx_PROTECT(win);
+	writel(val, ctx->regs + SHADOWCON);
+}
+
+static struct exynos_drm_overlay_ops fimd_overlay_ops = {
+	.mode_set = fimd_win_mode_set,
+	.commit = fimd_win_commit,
+	.disable = fimd_win_disable,
+};
+
+static void fimd_finish_pageflip(struct drm_device *drm_dev, int crtc)
+{
+	struct exynos_drm_private *dev_priv = drm_dev->dev_private;
+	struct drm_pending_vblank_event *e, *t;
+	struct timeval now;
+	unsigned long flags;
+	bool is_checked = false;
+
+	spin_lock_irqsave(&drm_dev->event_lock, flags);
+
+	list_for_each_entry_safe(e, t, &dev_priv->pageflip_event_list,
+			base.link) {
+		/* if event's pipe isn't same as crtc then ignore it. */
+		if (crtc != e->pipe)
+			continue;
+
+		is_checked = true;
+
+		do_gettimeofday(&now);
+		e->event.sequence = 0;
+		e->event.tv_sec = now.tv_sec;
+		e->event.tv_usec = now.tv_usec;
+
+		list_move_tail(&e->base.link, &e->base.file_priv->event_list);
+		wake_up_interruptible(&e->base.file_priv->event_wait);
+	}
+
+	if (is_checked)
+		drm_vblank_put(drm_dev, crtc);
+
+	spin_unlock_irqrestore(&drm_dev->event_lock, flags);
+}
+
+static irqreturn_t fimd_irq_handler(int irq, void *dev_id)
+{
+	struct fimd_context *ctx = (struct fimd_context *)dev_id;
+	struct exynos_drm_subdrv *subdrv = &ctx->subdrv;
+	struct drm_device *drm_dev = subdrv->drm_dev;
+	struct exynos_drm_manager *manager = &subdrv->manager;
+	u32 val;
+
+	val = readl(ctx->regs + VIDINTCON1);
+
+	if (val & VIDINTCON1_INT_FRAME)
+		/* VSYNC interrupt */
+		writel(VIDINTCON1_INT_FRAME, ctx->regs + VIDINTCON1);
+
+	drm_handle_vblank(drm_dev, manager->pipe);
+	fimd_finish_pageflip(drm_dev, manager->pipe);
+
+	return IRQ_HANDLED;
+}
+
+static int fimd_subdrv_probe(struct drm_device *drm_dev, struct device *dev)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/*
+	 * enable drm irq mode.
+	 * - with irq_enabled = 1, we can use the vblank feature.
+	 *
+	 * P.S. note that we wouldn't use drm irq handler but
+	 *	just specific driver own one instead because
+	 *	drm framework supports only one irq handler.
+	 */
+	drm_dev->irq_enabled = 1;
+
+	/*
+	 * with vblank_disable_allowed = 1, vblank interrupt will be disabled
+	 * by drm timer once a current process gives up ownership of
+	 * vblank event.(drm_vblank_put function was called)
+	 */
+	drm_dev->vblank_disable_allowed = 1;
+
+	return 0;
+}
+
+static void fimd_subdrv_remove(struct drm_device *drm_dev)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* TODO. */
+}
+
+static int fimd_calc_clkdiv(struct fimd_context *ctx,
+			    struct fb_videomode *timing)
+{
+	unsigned long clk = clk_get_rate(ctx->lcd_clk);
+	u32 retrace;
+	u32 clkdiv;
+	u32 best_framerate = 0;
+	u32 framerate;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	retrace = timing->left_margin + timing->hsync_len +
+				timing->right_margin + timing->xres;
+	retrace *= timing->upper_margin + timing->vsync_len +
+				timing->lower_margin + timing->yres;
+
+	/* default framerate is 60Hz */
+	if (!timing->refresh)
+		timing->refresh = 60;
+
+	clk /= retrace;
+
+	for (clkdiv = 1; clkdiv < 0x100; clkdiv++) {
+		int tmp;
+
+		/* get best framerate */
+		framerate = clk / clkdiv;
+		tmp = timing->refresh - framerate;
+		if (tmp < 0) {
+			best_framerate = framerate;
+			continue;
+		} else {
+			if (!best_framerate)
+				best_framerate = framerate;
+			else if (tmp < (best_framerate - framerate))
+				best_framerate = framerate;
+			break;
+		}
+	}
+
+	return clkdiv;
+}
+
+static void fimd_clear_win(struct fimd_context *ctx, int win)
+{
+	u32 val;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	writel(0, ctx->regs + WINCON(win));
+	writel(0, ctx->regs + VIDOSD_A(win));
+	writel(0, ctx->regs + VIDOSD_B(win));
+	writel(0, ctx->regs + VIDOSD_C(win));
+
+	if (win == 1 || win == 2)
+		writel(0, ctx->regs + VIDOSD_D(win));
+
+	val = readl(ctx->regs + SHADOWCON);
+	val &= ~SHADOWCON_WINx_PROTECT(win);
+	writel(val, ctx->regs + SHADOWCON);
+}
+
+static int __devinit fimd_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct fimd_context *ctx;
+	struct exynos_drm_subdrv *subdrv;
+	struct exynos_drm_fimd_pdata *pdata;
+	struct fb_videomode *timing;
+	struct resource *res;
+	int win;
+	int ret = -EINVAL;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	pdata = pdev->dev.platform_data;
+	if (!pdata) {
+		dev_err(dev, "no platform data specified\n");
+		return -EINVAL;
+	}
+
+	timing = &pdata->timing;
+	if (!timing) {
+		dev_err(dev, "timing is null.\n");
+		return -EINVAL;
+	}
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->bus_clk = clk_get(dev, "fimd");
+	if (IS_ERR(ctx->bus_clk)) {
+		dev_err(dev, "failed to get bus clock\n");
+		ret = PTR_ERR(ctx->bus_clk);
+		goto err_clk_get;
+	}
+
+	clk_enable(ctx->bus_clk);
+
+	ctx->lcd_clk = clk_get(dev, "sclk_fimd");
+	if (IS_ERR(ctx->lcd_clk)) {
+		dev_err(dev, "failed to get lcd clock\n");
+		ret = PTR_ERR(ctx->lcd_clk);
+		goto err_bus_clk;
+	}
+
+	clk_enable(ctx->lcd_clk);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(dev, "failed to find registers\n");
+		ret = -ENOENT;
+		goto err_clk;
+	}
+
+	ctx->regs_res = request_mem_region(res->start, resource_size(res),
+					   dev_name(dev));
+	if (!ctx->regs_res) {
+		dev_err(dev, "failed to claim register region\n");
+		ret = -ENOENT;
+		goto err_clk;
+	}
+
+	ctx->regs = ioremap(res->start, resource_size(res));
+	if (!ctx->regs) {
+		dev_err(dev, "failed to map registers\n");
+		ret = -ENXIO;
+		goto err_req_region_io;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res) {
+		dev_err(dev, "irq request failed.\n");
+		goto err_req_region_irq;
+	}
+
+	ctx->irq = res->start;
+
+	for (win = 0; win < WINDOWS_NR; win++)
+		fimd_clear_win(ctx, win);
+
+	ret = request_irq(ctx->irq, fimd_irq_handler, 0, "drm_fimd", ctx);
+	if (ret < 0) {
+		dev_err(dev, "irq request failed.\n");
+		goto err_req_irq;
+	}
+
+	ctx->clkdiv = fimd_calc_clkdiv(ctx, timing);
+	ctx->vidcon0 = pdata->vidcon0;
+	ctx->vidcon1 = pdata->vidcon1;
+	ctx->default_win = pdata->default_win;
+	ctx->timing = timing;
+
+	timing->pixclock = clk_get_rate(ctx->lcd_clk) / ctx->clkdiv;
+
+	DRM_DEBUG_KMS("pixel clock = %d, clkdiv = %d\n",
+			timing->pixclock, ctx->clkdiv);
+
+	subdrv = &ctx->subdrv;
+
+	subdrv->probe = fimd_subdrv_probe;
+	subdrv->remove = fimd_subdrv_remove;
+	subdrv->manager.pipe = -1;
+	subdrv->manager.ops = &fimd_manager_ops;
+	subdrv->manager.overlay_ops = &fimd_overlay_ops;
+	subdrv->manager.display = &fimd_display;
+	subdrv->manager.dev = dev;
+
+	platform_set_drvdata(pdev, ctx);
+	exynos_drm_subdrv_register(subdrv);
+
+	return 0;
+
+err_req_irq:
+err_req_region_irq:
+	iounmap(ctx->regs);
+
+err_req_region_io:
+	release_resource(ctx->regs_res);
+	kfree(ctx->regs_res);
+
+err_clk:
+	clk_disable(ctx->lcd_clk);
+	clk_put(ctx->lcd_clk);
+
+err_bus_clk:
+	clk_disable(ctx->bus_clk);
+	clk_put(ctx->bus_clk);
+
+err_clk_get:
+	kfree(ctx);
+	return ret;
+}
+
+static int __devexit fimd_remove(struct platform_device *pdev)
+{
+	struct fimd_context *ctx = platform_get_drvdata(pdev);
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	exynos_drm_subdrv_unregister(&ctx->subdrv);
+
+	clk_disable(ctx->lcd_clk);
+	clk_disable(ctx->bus_clk);
+	clk_put(ctx->lcd_clk);
+	clk_put(ctx->bus_clk);
+
+	iounmap(ctx->regs);
+	release_resource(ctx->regs_res);
+	kfree(ctx->regs_res);
+	free_irq(ctx->irq, ctx);
+
+	kfree(ctx);
+
+	return 0;
+}
+
+static struct platform_driver fimd_driver = {
+	.probe		= fimd_probe,
+	.remove		= __devexit_p(fimd_remove),
+	.driver		= {
+		.name	= "exynos4-fb",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init fimd_init(void)
+{
+	return platform_driver_register(&fimd_driver);
+}
+
+static void __exit fimd_exit(void)
+{
+	platform_driver_unregister(&fimd_driver);
+}
+
+module_init(fimd_init);
+module_exit(fimd_exit);
+
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_DESCRIPTION("Samsung DRM FIMD Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
new file mode 100644
index 000000000000..a8e7a88906ed
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -0,0 +1,415 @@
+/* exynos_drm_gem.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Author: Inki Dae <inki.dae@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm.h"
+
+#include <drm/exynos_drm.h>
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_gem.h"
+#include "exynos_drm_buf.h"
+
+static unsigned int convert_to_vm_err_msg(int msg)
+{
+	unsigned int out_msg;
+
+	switch (msg) {
+	case 0:
+	case -ERESTARTSYS:
+	case -EINTR:
+		out_msg = VM_FAULT_NOPAGE;
+		break;
+
+	case -ENOMEM:
+		out_msg = VM_FAULT_OOM;
+		break;
+
+	default:
+		out_msg = VM_FAULT_SIGBUS;
+		break;
+	}
+
+	return out_msg;
+}
+
+static unsigned int get_gem_mmap_offset(struct drm_gem_object *obj)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	return (unsigned int)obj->map_list.hash.key << PAGE_SHIFT;
+}
+
+struct exynos_drm_gem_obj *exynos_drm_gem_create(struct drm_file *file_priv,
+		struct drm_device *dev, unsigned int size,
+		unsigned int *handle)
+{
+	struct exynos_drm_gem_obj *exynos_gem_obj;
+	struct exynos_drm_buf_entry *entry;
+	struct drm_gem_object *obj;
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	size = roundup(size, PAGE_SIZE);
+
+	exynos_gem_obj = kzalloc(sizeof(*exynos_gem_obj), GFP_KERNEL);
+	if (!exynos_gem_obj) {
+		DRM_ERROR("failed to allocate exynos gem object.\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* allocate the new buffer object and memory region. */
+	entry = exynos_drm_buf_create(dev, size);
+	if (!entry) {
+		kfree(exynos_gem_obj);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	exynos_gem_obj->entry = entry;
+
+	obj = &exynos_gem_obj->base;
+
+	ret = drm_gem_object_init(dev, obj, size);
+	if (ret < 0) {
+		DRM_ERROR("failed to initailize gem object.\n");
+		goto err_obj_init;
+	}
+
+	DRM_DEBUG_KMS("created file object = 0x%x\n", (unsigned int)obj->filp);
+
+	ret = drm_gem_create_mmap_offset(obj);
+	if (ret < 0) {
+		DRM_ERROR("failed to allocate mmap offset.\n");
+		goto err_create_mmap_offset;
+	}
+
+	/*
+	 * allocate a id of idr table where the obj is registered
+	 * and handle has the id what user can see.
+	 */
+	ret = drm_gem_handle_create(file_priv, obj, handle);
+	if (ret)
+		goto err_handle_create;
+
+	DRM_DEBUG_KMS("gem handle = 0x%x\n", *handle);
+
+	/* drop reference from allocate - handle holds it now. */
+	drm_gem_object_unreference_unlocked(obj);
+
+	return exynos_gem_obj;
+
+err_handle_create:
+	drm_gem_free_mmap_offset(obj);
+
+err_create_mmap_offset:
+	drm_gem_object_release(obj);
+
+err_obj_init:
+	exynos_drm_buf_destroy(dev, exynos_gem_obj->entry);
+
+	kfree(exynos_gem_obj);
+
+	return ERR_PTR(ret);
+}
+
+int exynos_drm_gem_create_ioctl(struct drm_device *dev, void *data,
+		struct drm_file *file_priv)
+{
+	struct drm_exynos_gem_create *args = data;
+	struct exynos_drm_gem_obj *exynos_gem_obj;
+
+	DRM_DEBUG_KMS("%s : size = 0x%x\n", __FILE__, args->size);
+
+	exynos_gem_obj = exynos_drm_gem_create(file_priv, dev, args->size,
+			&args->handle);
+	if (IS_ERR(exynos_gem_obj))
+		return PTR_ERR(exynos_gem_obj);
+
+	return 0;
+}
+
+int exynos_drm_gem_map_offset_ioctl(struct drm_device *dev, void *data,
+		struct drm_file *file_priv)
+{
+	struct drm_exynos_gem_map_off *args = data;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	DRM_DEBUG_KMS("handle = 0x%x, offset = 0x%lx\n",
+			args->handle, (unsigned long)args->offset);
+
+	if (!(dev->driver->driver_features & DRIVER_GEM)) {
+		DRM_ERROR("does not support GEM.\n");
+		return -ENODEV;
+	}
+
+	return exynos_drm_gem_dumb_map_offset(file_priv, dev, args->handle,
+			&args->offset);
+}
+
+static int exynos_drm_gem_mmap_buffer(struct file *filp,
+		struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = filp->private_data;
+	struct exynos_drm_gem_obj *exynos_gem_obj = to_exynos_gem_obj(obj);
+	struct exynos_drm_buf_entry *entry;
+	unsigned long pfn, vm_size;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	vma->vm_flags |= (VM_IO | VM_RESERVED);
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vma->vm_file = filp;
+
+	vm_size = vma->vm_end - vma->vm_start;
+	/*
+	 * a entry contains information to physically continuous memory
+	 * allocated by user request or at framebuffer creation.
+	 */
+	entry = exynos_gem_obj->entry;
+
+	/* check if user-requested size is valid. */
+	if (vm_size > entry->size)
+		return -EINVAL;
+
+	/*
+	 * get page frame number to physical memory to be mapped
+	 * to user space.
+	 */
+	pfn = exynos_gem_obj->entry->paddr >> PAGE_SHIFT;
+
+	DRM_DEBUG_KMS("pfn = 0x%lx\n", pfn);
+
+	if (remap_pfn_range(vma, vma->vm_start, pfn, vm_size,
+				vma->vm_page_prot)) {
+		DRM_ERROR("failed to remap pfn range.\n");
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+static const struct file_operations exynos_drm_gem_fops = {
+	.mmap = exynos_drm_gem_mmap_buffer,
+};
+
+int exynos_drm_gem_mmap_ioctl(struct drm_device *dev, void *data,
+		struct drm_file *file_priv)
+{
+	struct drm_exynos_gem_mmap *args = data;
+	struct drm_gem_object *obj;
+	unsigned int addr;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (!(dev->driver->driver_features & DRIVER_GEM)) {
+		DRM_ERROR("does not support GEM.\n");
+		return -ENODEV;
+	}
+
+	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+	if (!obj) {
+		DRM_ERROR("failed to lookup gem object.\n");
+		return -EINVAL;
+	}
+
+	obj->filp->f_op = &exynos_drm_gem_fops;
+	obj->filp->private_data = obj;
+
+	down_write(&current->mm->mmap_sem);
+	addr = do_mmap(obj->filp, 0, args->size,
+			PROT_READ | PROT_WRITE, MAP_SHARED, 0);
+	up_write(&current->mm->mmap_sem);
+
+	drm_gem_object_unreference_unlocked(obj);
+
+	if (IS_ERR((void *)addr))
+		return PTR_ERR((void *)addr);
+
+	args->mapped = addr;
+
+	DRM_DEBUG_KMS("mapped = 0x%lx\n", (unsigned long)args->mapped);
+
+	return 0;
+}
+
+int exynos_drm_gem_init_object(struct drm_gem_object *obj)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	return 0;
+}
+
+void exynos_drm_gem_free_object(struct drm_gem_object *gem_obj)
+{
+	struct exynos_drm_gem_obj *exynos_gem_obj;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	DRM_DEBUG_KMS("handle count = %d\n",
+			atomic_read(&gem_obj->handle_count));
+
+	if (gem_obj->map_list.map)
+		drm_gem_free_mmap_offset(gem_obj);
+
+	/* release file pointer to gem object. */
+	drm_gem_object_release(gem_obj);
+
+	exynos_gem_obj = to_exynos_gem_obj(gem_obj);
+
+	exynos_drm_buf_destroy(gem_obj->dev, exynos_gem_obj->entry);
+
+	kfree(exynos_gem_obj);
+}
+
+int exynos_drm_gem_dumb_create(struct drm_file *file_priv,
+		struct drm_device *dev, struct drm_mode_create_dumb *args)
+{
+	struct exynos_drm_gem_obj *exynos_gem_obj;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/*
+	 * alocate memory to be used for framebuffer.
+	 * - this callback would be called by user application
+	 *	with DRM_IOCTL_MODE_CREATE_DUMB command.
+	 */
+
+	args->pitch = args->width * args->bpp >> 3;
+	args->size = args->pitch * args->height;
+
+	exynos_gem_obj = exynos_drm_gem_create(file_priv, dev, args->size,
+							&args->handle);
+	if (IS_ERR(exynos_gem_obj))
+		return PTR_ERR(exynos_gem_obj);
+
+	return 0;
+}
+
+int exynos_drm_gem_dumb_map_offset(struct drm_file *file_priv,
+		struct drm_device *dev, uint32_t handle, uint64_t *offset)
+{
+	struct exynos_drm_gem_obj *exynos_gem_obj;
+	struct drm_gem_object *obj;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	mutex_lock(&dev->struct_mutex);
+
+	/*
+	 * get offset of memory allocated for drm framebuffer.
+	 * - this callback would be called by user application
+	 *	with DRM_IOCTL_MODE_MAP_DUMB command.
+	 */
+
+	obj = drm_gem_object_lookup(dev, file_priv, handle);
+	if (!obj) {
+		DRM_ERROR("failed to lookup gem object.\n");
+		mutex_unlock(&dev->struct_mutex);
+		return -EINVAL;
+	}
+
+	exynos_gem_obj = to_exynos_gem_obj(obj);
+
+	*offset = get_gem_mmap_offset(&exynos_gem_obj->base);
+
+	drm_gem_object_unreference(obj);
+
+	DRM_DEBUG_KMS("offset = 0x%lx\n", (unsigned long)*offset);
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
+}
+
+int exynos_drm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct exynos_drm_gem_obj *exynos_gem_obj = to_exynos_gem_obj(obj);
+	struct drm_device *dev = obj->dev;
+	unsigned long pfn;
+	pgoff_t page_offset;
+	int ret;
+
+	page_offset = ((unsigned long)vmf->virtual_address -
+			vma->vm_start) >> PAGE_SHIFT;
+
+	mutex_lock(&dev->struct_mutex);
+
+	pfn = (exynos_gem_obj->entry->paddr >> PAGE_SHIFT) + page_offset;
+
+	ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, pfn);
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return convert_to_vm_err_msg(ret);
+}
+
+int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* set vm_area_struct. */
+	ret = drm_gem_mmap(filp, vma);
+	if (ret < 0) {
+		DRM_ERROR("failed to mmap.\n");
+		return ret;
+	}
+
+	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_flags |= VM_MIXEDMAP;
+
+	return ret;
+}
+
+
+int exynos_drm_gem_dumb_destroy(struct drm_file *file_priv,
+		struct drm_device *dev, unsigned int handle)
+{
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/*
+	 * obj->refcount and obj->handle_count are decreased and
+	 * if both them are 0 then exynos_drm_gem_free_object()
+	 * would be called by callback to release resources.
+	 */
+	ret = drm_gem_handle_delete(file_priv, handle);
+	if (ret < 0) {
+		DRM_ERROR("failed to delete drm_gem_handle.\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM GEM Module");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h
new file mode 100644
index 000000000000..e5fc0148277b
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h
@@ -0,0 +1,107 @@
+/* exynos_drm_gem.h
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authoer: Inki Dae <inki.dae@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_GEM_H_
+#define _EXYNOS_DRM_GEM_H_
+
+#define to_exynos_gem_obj(x)	container_of(x,\
+			struct exynos_drm_gem_obj, base)
+
+/*
+ * exynos drm buffer structure.
+ *
+ * @base: a gem object.
+ *	- a new handle to this gem object would be created
+ *	by drm_gem_handle_create().
+ * @entry: pointer to exynos drm buffer entry object.
+ *	- containing the information to physically
+ *	continuous memory region allocated by user request
+ *	or at framebuffer creation.
+ *
+ * P.S. this object would be transfered to user as kms_bo.handle so
+ *	user can access the buffer through kms_bo.handle.
+ */
+struct exynos_drm_gem_obj {
+	struct drm_gem_object base;
+	struct exynos_drm_buf_entry *entry;
+};
+
+/* create a new buffer and get a new gem handle. */
+struct exynos_drm_gem_obj *exynos_drm_gem_create(struct drm_file *file_priv,
+		struct drm_device *dev, unsigned int size,
+		unsigned int *handle);
+
+/*
+ * request gem object creation and buffer allocation as the size
+ * that it is calculated with framebuffer information such as width,
+ * height and bpp.
+ */
+int exynos_drm_gem_create_ioctl(struct drm_device *dev, void *data,
+		struct drm_file *file_priv);
+
+/* get buffer offset to map to user space. */
+int exynos_drm_gem_map_offset_ioctl(struct drm_device *dev, void *data,
+		struct drm_file *file_priv);
+
+/* unmap a buffer from user space. */
+int exynos_drm_gem_munmap_ioctl(struct drm_device *dev, void *data,
+		struct drm_file *file_priv);
+
+/* initialize gem object. */
+int exynos_drm_gem_init_object(struct drm_gem_object *obj);
+
+/* free gem object. */
+void exynos_drm_gem_free_object(struct drm_gem_object *gem_obj);
+
+/* create memory region for drm framebuffer. */
+int exynos_drm_gem_dumb_create(struct drm_file *file_priv,
+		struct drm_device *dev, struct drm_mode_create_dumb *args);
+
+/* map memory region for drm framebuffer to user space. */
+int exynos_drm_gem_dumb_map_offset(struct drm_file *file_priv,
+		struct drm_device *dev, uint32_t handle, uint64_t *offset);
+
+/* page fault handler and mmap fault address(virtual) to physical memory. */
+int exynos_drm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+
+/*
+ * mmap the physically continuous memory that a gem object contains
+ * to user space.
+ */
+int exynos_drm_gem_mmap_ioctl(struct drm_device *dev, void *data,
+		struct drm_file *file_priv);
+
+/* set vm_flags and we can change the vm attribute to other one at here. */
+int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
+
+/*
+ * destroy memory region allocated.
+ *	- a gem handle and physical memory region pointed by a gem object
+ *	would be released by drm_gem_handle_delete().
+ */
+int exynos_drm_gem_dumb_destroy(struct drm_file *file_priv,
+		struct drm_device *dev, unsigned int handle);
+
+#endif
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index 7ad43c6b1db7..79e8ebc05307 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -115,6 +115,7 @@ static int radeon_dp_aux_native_write(struct radeon_connector *radeon_connector,
 	u8 msg[20];
 	int msg_bytes = send_bytes + 4;
 	u8 ack;
+	unsigned retry;
 
 	if (send_bytes > 16)
 		return -1;
@@ -125,20 +126,22 @@ static int radeon_dp_aux_native_write(struct radeon_connector *radeon_connector,
 	msg[3] = (msg_bytes << 4) | (send_bytes - 1);
 	memcpy(&msg[4], send, send_bytes);
 
-	while (1) {
+	for (retry = 0; retry < 4; retry++) {
 		ret = radeon_process_aux_ch(dig_connector->dp_i2c_bus,
 					    msg, msg_bytes, NULL, 0, delay, &ack);
-		if (ret < 0)
+		if (ret == -EBUSY)
+			continue;
+		else if (ret < 0)
 			return ret;
 		if ((ack & AUX_NATIVE_REPLY_MASK) == AUX_NATIVE_REPLY_ACK)
-			break;
+			return send_bytes;
 		else if ((ack & AUX_NATIVE_REPLY_MASK) == AUX_NATIVE_REPLY_DEFER)
 			udelay(400);
 		else
 			return -EIO;
 	}
 
-	return send_bytes;
+	return -EIO;
 }
 
 static int radeon_dp_aux_native_read(struct radeon_connector *radeon_connector,
@@ -149,26 +152,31 @@ static int radeon_dp_aux_native_read(struct radeon_connector *radeon_connector,
 	int msg_bytes = 4;
 	u8 ack;
 	int ret;
+	unsigned retry;
 
 	msg[0] = address;
 	msg[1] = address >> 8;
 	msg[2] = AUX_NATIVE_READ << 4;
 	msg[3] = (msg_bytes << 4) | (recv_bytes - 1);
 
-	while (1) {
+	for (retry = 0; retry < 4; retry++) {
 		ret = radeon_process_aux_ch(dig_connector->dp_i2c_bus,
 					    msg, msg_bytes, recv, recv_bytes, delay, &ack);
-		if (ret == 0)
-			return -EPROTO;
-		if (ret < 0)
+		if (ret == -EBUSY)
+			continue;
+		else if (ret < 0)
 			return ret;
 		if ((ack & AUX_NATIVE_REPLY_MASK) == AUX_NATIVE_REPLY_ACK)
 			return ret;
 		else if ((ack & AUX_NATIVE_REPLY_MASK) == AUX_NATIVE_REPLY_DEFER)
 			udelay(400);
+		else if (ret == 0)
+			return -EPROTO;
 		else
 			return -EIO;
 	}
+
+	return -EIO;
 }
 
 static void radeon_write_dpcd_reg(struct radeon_connector *radeon_connector,
@@ -232,7 +240,9 @@ int radeon_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode,
 	for (retry = 0; retry < 4; retry++) {
 		ret = radeon_process_aux_ch(auxch,
 					    msg, msg_bytes, reply, reply_bytes, 0, &ack);
-		if (ret < 0) {
+		if (ret == -EBUSY)
+			continue;
+		else if (ret < 0) {
 			DRM_DEBUG_KMS("aux_ch failed %d\n", ret);
 			return ret;
 		}
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index a72dbb3e1335..ed406e8404a3 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -39,7 +39,7 @@
 
 static void evergreen_gpu_init(struct radeon_device *rdev);
 void evergreen_fini(struct radeon_device *rdev);
-static void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
+void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
 
 void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev)
 {
@@ -1407,7 +1407,8 @@ int evergreen_cp_resume(struct radeon_device *rdev)
 	/* Initialize the ring buffer's read and write pointers */
 	WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA);
 	WREG32(CP_RB_RPTR_WR, 0);
-	WREG32(CP_RB_WPTR, 0);
+	rdev->cp.wptr = 0;
+	WREG32(CP_RB_WPTR, rdev->cp.wptr);
 
 	/* set the wb address wether it's enabled or not */
 	WREG32(CP_RB_RPTR_ADDR,
@@ -1429,7 +1430,6 @@ int evergreen_cp_resume(struct radeon_device *rdev)
 	WREG32(CP_DEBUG, (1 << 27) | (1 << 28));
 
 	rdev->cp.rptr = RREG32(CP_RB_RPTR);
-	rdev->cp.wptr = RREG32(CP_RB_WPTR);
 
 	evergreen_cp_start(rdev);
 	rdev->cp.ready = true;
@@ -1593,48 +1593,6 @@ static u32 evergreen_get_tile_pipe_to_backend_map(struct radeon_device *rdev,
 	return backend_map;
 }
 
-static void evergreen_program_channel_remap(struct radeon_device *rdev)
-{
-	u32 tcp_chan_steer_lo, tcp_chan_steer_hi, mc_shared_chremap, tmp;
-
-	tmp = RREG32(MC_SHARED_CHMAP);
-	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
-	case 0:
-	case 1:
-	case 2:
-	case 3:
-	default:
-		/* default mapping */
-		mc_shared_chremap = 0x00fac688;
-		break;
-	}
-
-	switch (rdev->family) {
-	case CHIP_HEMLOCK:
-	case CHIP_CYPRESS:
-	case CHIP_BARTS:
-		tcp_chan_steer_lo = 0x54763210;
-		tcp_chan_steer_hi = 0x0000ba98;
-		break;
-	case CHIP_JUNIPER:
-	case CHIP_REDWOOD:
-	case CHIP_CEDAR:
-	case CHIP_PALM:
-	case CHIP_SUMO:
-	case CHIP_SUMO2:
-	case CHIP_TURKS:
-	case CHIP_CAICOS:
-	default:
-		tcp_chan_steer_lo = 0x76543210;
-		tcp_chan_steer_hi = 0x0000ba98;
-		break;
-	}
-
-	WREG32(TCP_CHAN_STEER_LO, tcp_chan_steer_lo);
-	WREG32(TCP_CHAN_STEER_HI, tcp_chan_steer_hi);
-	WREG32(MC_SHARED_CHREMAP, mc_shared_chremap);
-}
-
 static void evergreen_gpu_init(struct radeon_device *rdev)
 {
 	u32 cc_rb_backend_disable = 0;
@@ -2081,8 +2039,6 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
 
-	evergreen_program_channel_remap(rdev);
-
 	num_shader_engines = ((RREG32(GB_ADDR_CONFIG) & NUM_SHADER_ENGINES(3)) >> 12) + 1;
 	grbm_gfx_index = INSTANCE_BROADCAST_WRITES;
 
@@ -2633,7 +2589,7 @@ int evergreen_irq_set(struct radeon_device *rdev)
 	return 0;
 }
 
-static inline void evergreen_irq_ack(struct radeon_device *rdev)
+static void evergreen_irq_ack(struct radeon_device *rdev)
 {
 	u32 tmp;
 
@@ -2744,7 +2700,7 @@ void evergreen_irq_suspend(struct radeon_device *rdev)
 	r600_rlc_stop(rdev);
 }
 
-static inline u32 evergreen_get_ih_wptr(struct radeon_device *rdev)
+static u32 evergreen_get_ih_wptr(struct radeon_device *rdev)
 {
 	u32 wptr, tmp;
 
@@ -3050,8 +3006,7 @@ static int evergreen_startup(struct radeon_device *rdev)
 	int r;
 
 	/* enable pcie gen2 link */
-	if (!ASIC_IS_DCE5(rdev))
-		evergreen_pcie_gen2_enable(rdev);
+	evergreen_pcie_gen2_enable(rdev);
 
 	if (ASIC_IS_DCE5(rdev)) {
 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
@@ -3088,7 +3043,7 @@ static int evergreen_startup(struct radeon_device *rdev)
 
 	r = evergreen_blit_init(rdev);
 	if (r) {
-		evergreen_blit_fini(rdev);
+		r600_blit_fini(rdev);
 		rdev->asic->copy = NULL;
 		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
 	}
@@ -3154,43 +3109,14 @@ int evergreen_resume(struct radeon_device *rdev)
 
 int evergreen_suspend(struct radeon_device *rdev)
 {
-	int r;
-
 	/* FIXME: we should wait for ring to be empty */
 	r700_cp_stop(rdev);
 	rdev->cp.ready = false;
 	evergreen_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	evergreen_pcie_gart_disable(rdev);
+	r600_blit_suspend(rdev);
 
-	/* unpin shaders bo */
-	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
-	if (likely(r == 0)) {
-		radeon_bo_unpin(rdev->r600_blit.shader_obj);
-		radeon_bo_unreserve(rdev->r600_blit.shader_obj);
-	}
-
-	return 0;
-}
-
-int evergreen_copy_blit(struct radeon_device *rdev,
-			uint64_t src_offset, uint64_t dst_offset,
-			unsigned num_pages, struct radeon_fence *fence)
-{
-	int r;
-
-	mutex_lock(&rdev->r600_blit.mutex);
-	rdev->r600_blit.vb_ib = NULL;
-	r = evergreen_blit_prepare_copy(rdev, num_pages * RADEON_GPU_PAGE_SIZE);
-	if (r) {
-		if (rdev->r600_blit.vb_ib)
-			radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
-		mutex_unlock(&rdev->r600_blit.mutex);
-		return r;
-	}
-	evergreen_kms_blit_copy(rdev, src_offset, dst_offset, num_pages * RADEON_GPU_PAGE_SIZE);
-	evergreen_blit_done_copy(rdev, fence);
-	mutex_unlock(&rdev->r600_blit.mutex);
 	return 0;
 }
 
@@ -3302,7 +3228,7 @@ int evergreen_init(struct radeon_device *rdev)
 
 void evergreen_fini(struct radeon_device *rdev)
 {
-	evergreen_blit_fini(rdev);
+	r600_blit_fini(rdev);
 	r700_cp_fini(rdev);
 	r600_irq_fini(rdev);
 	radeon_wb_fini(rdev);
@@ -3318,7 +3244,7 @@ void evergreen_fini(struct radeon_device *rdev)
 	rdev->bios = NULL;
 }
 
-static void evergreen_pcie_gen2_enable(struct radeon_device *rdev)
+void evergreen_pcie_gen2_enable(struct radeon_device *rdev)
 {
 	u32 link_width_cntl, speed_cntl;
 
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index 2eb251858e72..dcf11bbc06d9 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -56,7 +56,9 @@ set_render_target(struct radeon_device *rdev, int format,
 	if (h < 8)
 		h = 8;
 
-	cb_color_info = ((format << 2) | (1 << 24) | (1 << 8));
+	cb_color_info = CB_FORMAT(format) |
+		CB_SOURCE_FORMAT(CB_SF_EXPORT_NORM) |
+		CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
 	pitch = (w / 8) - 1;
 	slice = ((w * h) / 64) - 1;
 
@@ -67,7 +69,7 @@ set_render_target(struct radeon_device *rdev, int format,
 	radeon_ring_write(rdev, slice);
 	radeon_ring_write(rdev, 0);
 	radeon_ring_write(rdev, cb_color_info);
-	radeon_ring_write(rdev, (1 << 4));
+	radeon_ring_write(rdev, 0);
 	radeon_ring_write(rdev, (w - 1) | ((h - 1) << 16));
 	radeon_ring_write(rdev, 0);
 	radeon_ring_write(rdev, 0);
@@ -133,12 +135,16 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
 	u32 sq_vtx_constant_word2, sq_vtx_constant_word3;
 
 	/* high addr, stride */
-	sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8));
+	sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) |
+		SQ_VTXC_STRIDE(16);
 #ifdef __BIG_ENDIAN
-	sq_vtx_constant_word2 |= (2 << 30);
+	sq_vtx_constant_word2 |= SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32);
 #endif
 	/* xyzw swizzles */
-	sq_vtx_constant_word3 = (0 << 3) | (1 << 6) | (2 << 9) | (3 << 12);
+	sq_vtx_constant_word3 = SQ_VTCX_SEL_X(SQ_SEL_X) |
+		SQ_VTCX_SEL_Y(SQ_SEL_Y) |
+		SQ_VTCX_SEL_Z(SQ_SEL_Z) |
+		SQ_VTCX_SEL_W(SQ_SEL_W);
 
 	radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8));
 	radeon_ring_write(rdev, 0x580);
@@ -149,7 +155,7 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
 	radeon_ring_write(rdev, 0);
 	radeon_ring_write(rdev, 0);
 	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30);
+	radeon_ring_write(rdev, S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_BUFFER));
 
 	if ((rdev->family == CHIP_CEDAR) ||
 	    (rdev->family == CHIP_PALM) ||
@@ -176,14 +182,19 @@ set_tex_resource(struct radeon_device *rdev,
 	if (h < 1)
 		h = 1;
 
-	sq_tex_resource_word0 = (1 << 0); /* 2D */
+	sq_tex_resource_word0 = TEX_DIM(SQ_TEX_DIM_2D);
 	sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) |
 				  ((w - 1) << 18));
-	sq_tex_resource_word1 = ((h - 1) << 0) | (1 << 28);
+	sq_tex_resource_word1 = ((h - 1) << 0) |
+				TEX_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
 	/* xyzw swizzles */
-	sq_tex_resource_word4 = (0 << 16) | (1 << 19) | (2 << 22) | (3 << 25);
+	sq_tex_resource_word4 = TEX_DST_SEL_X(SQ_SEL_X) |
+				TEX_DST_SEL_Y(SQ_SEL_Y) |
+				TEX_DST_SEL_Z(SQ_SEL_Z) |
+				TEX_DST_SEL_W(SQ_SEL_W);
 
-	sq_tex_resource_word7 = format | (SQ_TEX_VTX_VALID_TEXTURE << 30);
+	sq_tex_resource_word7 = format |
+		S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_TEXTURE);
 
 	radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8));
 	radeon_ring_write(rdev, 0);
@@ -584,31 +595,6 @@ set_default_state(struct radeon_device *rdev)
 
 }
 
-static inline uint32_t i2f(uint32_t input)
-{
-	u32 result, i, exponent, fraction;
-
-	if ((input & 0x3fff) == 0)
-		result = 0; /* 0 is a special case */
-	else {
-		exponent = 140; /* exponent biased by 127; */
-		fraction = (input & 0x3fff) << 10; /* cheat and only
-						      handle numbers below 2^^15 */
-		for (i = 0; i < 14; i++) {
-			if (fraction & 0x800000)
-				break;
-			else {
-				fraction = fraction << 1; /* keep
-							     shifting left until top bit = 1 */
-				exponent = exponent - 1;
-			}
-		}
-		result = exponent << 23 | (fraction & 0x7fffff); /* mask
-								    off top bit; assumed 1 */
-	}
-	return result;
-}
-
 int evergreen_blit_init(struct radeon_device *rdev)
 {
 	u32 obj_size;
@@ -617,6 +603,24 @@ int evergreen_blit_init(struct radeon_device *rdev)
 	u32 packet2s[16];
 	int num_packet2s = 0;
 
+	rdev->r600_blit.primitives.set_render_target = set_render_target;
+	rdev->r600_blit.primitives.cp_set_surface_sync = cp_set_surface_sync;
+	rdev->r600_blit.primitives.set_shaders = set_shaders;
+	rdev->r600_blit.primitives.set_vtx_resource = set_vtx_resource;
+	rdev->r600_blit.primitives.set_tex_resource = set_tex_resource;
+	rdev->r600_blit.primitives.set_scissors = set_scissors;
+	rdev->r600_blit.primitives.draw_auto = draw_auto;
+	rdev->r600_blit.primitives.set_default_state = set_default_state;
+
+	rdev->r600_blit.ring_size_common = 55; /* shaders + def state */
+	rdev->r600_blit.ring_size_common += 10; /* fence emit for VB IB */
+	rdev->r600_blit.ring_size_common += 5; /* done copy */
+	rdev->r600_blit.ring_size_common += 10; /* fence emit for done copy */
+
+	rdev->r600_blit.ring_size_per_loop = 74;
+
+	rdev->r600_blit.max_dim = 16384;
+
 	/* pin copy shader into vram if already initialized */
 	if (rdev->r600_blit.shader_obj)
 		goto done;
@@ -712,277 +716,3 @@ done:
 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
 	return 0;
 }
-
-void evergreen_blit_fini(struct radeon_device *rdev)
-{
-	int r;
-
-	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
-	if (rdev->r600_blit.shader_obj == NULL)
-		return;
-	/* If we can't reserve the bo, unref should be enough to destroy
-	 * it when it becomes idle.
-	 */
-	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
-	if (!r) {
-		radeon_bo_unpin(rdev->r600_blit.shader_obj);
-		radeon_bo_unreserve(rdev->r600_blit.shader_obj);
-	}
-	radeon_bo_unref(&rdev->r600_blit.shader_obj);
-}
-
-static int evergreen_vb_ib_get(struct radeon_device *rdev)
-{
-	int r;
-	r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib);
-	if (r) {
-		DRM_ERROR("failed to get IB for vertex buffer\n");
-		return r;
-	}
-
-	rdev->r600_blit.vb_total = 64*1024;
-	rdev->r600_blit.vb_used = 0;
-	return 0;
-}
-
-static void evergreen_vb_ib_put(struct radeon_device *rdev)
-{
-	radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence);
-	radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
-}
-
-int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes)
-{
-	int r;
-	int ring_size, line_size;
-	int max_size;
-	/* loops of emits + fence emit possible */
-	int dwords_per_loop = 74, num_loops;
-
-	r = evergreen_vb_ib_get(rdev);
-	if (r)
-		return r;
-
-	/* 8 bpp vs 32 bpp for xfer unit */
-	if (size_bytes & 3)
-		line_size = 8192;
-	else
-		line_size = 8192 * 4;
-
-	max_size = 8192 * line_size;
-
-	/* major loops cover the max size transfer */
-	num_loops = ((size_bytes + max_size) / max_size);
-	/* minor loops cover the extra non aligned bits */
-	num_loops += ((size_bytes % line_size) ? 1 : 0);
-	/* calculate number of loops correctly */
-	ring_size = num_loops * dwords_per_loop;
-	/* set default  + shaders */
-	ring_size += 55; /* shaders + def state */
-	ring_size += 10; /* fence emit for VB IB */
-	ring_size += 5; /* done copy */
-	ring_size += 10; /* fence emit for done copy */
-	r = radeon_ring_lock(rdev, ring_size);
-	if (r)
-		return r;
-
-	set_default_state(rdev); /* 36 */
-	set_shaders(rdev); /* 16 */
-	return 0;
-}
-
-void evergreen_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence)
-{
-	int r;
-
-	if (rdev->r600_blit.vb_ib)
-		evergreen_vb_ib_put(rdev);
-
-	if (fence)
-		r = radeon_fence_emit(rdev, fence);
-
-	radeon_ring_unlock_commit(rdev);
-}
-
-void evergreen_kms_blit_copy(struct radeon_device *rdev,
-			     u64 src_gpu_addr, u64 dst_gpu_addr,
-			     int size_bytes)
-{
-	int max_bytes;
-	u64 vb_gpu_addr;
-	u32 *vb;
-
-	DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr,
-		  size_bytes, rdev->r600_blit.vb_used);
-	vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used);
-	if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
-		max_bytes = 8192;
-
-		while (size_bytes) {
-			int cur_size = size_bytes;
-			int src_x = src_gpu_addr & 255;
-			int dst_x = dst_gpu_addr & 255;
-			int h = 1;
-			src_gpu_addr = src_gpu_addr & ~255ULL;
-			dst_gpu_addr = dst_gpu_addr & ~255ULL;
-
-			if (!src_x && !dst_x) {
-				h = (cur_size / max_bytes);
-				if (h > 8192)
-					h = 8192;
-				if (h == 0)
-					h = 1;
-				else
-					cur_size = max_bytes;
-			} else {
-				if (cur_size > max_bytes)
-					cur_size = max_bytes;
-				if (cur_size > (max_bytes - dst_x))
-					cur_size = (max_bytes - dst_x);
-				if (cur_size > (max_bytes - src_x))
-					cur_size = (max_bytes - src_x);
-			}
-
-			if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
-				WARN_ON(1);
-			}
-
-			vb[0] = i2f(dst_x);
-			vb[1] = 0;
-			vb[2] = i2f(src_x);
-			vb[3] = 0;
-
-			vb[4] = i2f(dst_x);
-			vb[5] = i2f(h);
-			vb[6] = i2f(src_x);
-			vb[7] = i2f(h);
-
-			vb[8] = i2f(dst_x + cur_size);
-			vb[9] = i2f(h);
-			vb[10] = i2f(src_x + cur_size);
-			vb[11] = i2f(h);
-
-			/* src 10 */
-			set_tex_resource(rdev, FMT_8,
-					 src_x + cur_size, h, src_x + cur_size,
-					 src_gpu_addr);
-
-			/* 5 */
-			cp_set_surface_sync(rdev,
-					    PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
-
-
-			/* dst 17 */
-			set_render_target(rdev, COLOR_8,
-					  dst_x + cur_size, h,
-					  dst_gpu_addr);
-
-			/* scissors 12 */
-			set_scissors(rdev, dst_x, 0, dst_x + cur_size, h);
-
-			/* 15 */
-			vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
-			set_vtx_resource(rdev, vb_gpu_addr);
-
-			/* draw 10 */
-			draw_auto(rdev);
-
-			/* 5 */
-			cp_set_surface_sync(rdev,
-					    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
-					    cur_size * h, dst_gpu_addr);
-
-			vb += 12;
-			rdev->r600_blit.vb_used += 12 * 4;
-
-			src_gpu_addr += cur_size * h;
-			dst_gpu_addr += cur_size * h;
-			size_bytes -= cur_size * h;
-		}
-	} else {
-		max_bytes = 8192 * 4;
-
-		while (size_bytes) {
-			int cur_size = size_bytes;
-			int src_x = (src_gpu_addr & 255);
-			int dst_x = (dst_gpu_addr & 255);
-			int h = 1;
-			src_gpu_addr = src_gpu_addr & ~255ULL;
-			dst_gpu_addr = dst_gpu_addr & ~255ULL;
-
-			if (!src_x && !dst_x) {
-				h = (cur_size / max_bytes);
-				if (h > 8192)
-					h = 8192;
-				if (h == 0)
-					h = 1;
-				else
-					cur_size = max_bytes;
-			} else {
-				if (cur_size > max_bytes)
-					cur_size = max_bytes;
-				if (cur_size > (max_bytes - dst_x))
-					cur_size = (max_bytes - dst_x);
-				if (cur_size > (max_bytes - src_x))
-					cur_size = (max_bytes - src_x);
-			}
-
-			if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
-				WARN_ON(1);
-			}
-
-			vb[0] = i2f(dst_x / 4);
-			vb[1] = 0;
-			vb[2] = i2f(src_x / 4);
-			vb[3] = 0;
-
-			vb[4] = i2f(dst_x / 4);
-			vb[5] = i2f(h);
-			vb[6] = i2f(src_x / 4);
-			vb[7] = i2f(h);
-
-			vb[8] = i2f((dst_x + cur_size) / 4);
-			vb[9] = i2f(h);
-			vb[10] = i2f((src_x + cur_size) / 4);
-			vb[11] = i2f(h);
-
-			/* src 10 */
-			set_tex_resource(rdev, FMT_8_8_8_8,
-					 (src_x + cur_size) / 4,
-					 h, (src_x + cur_size) / 4,
-					 src_gpu_addr);
-			/* 5 */
-			cp_set_surface_sync(rdev,
-					    PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
-
-			/* dst 17 */
-			set_render_target(rdev, COLOR_8_8_8_8,
-					  (dst_x + cur_size) / 4, h,
-					  dst_gpu_addr);
-
-			/* scissors 12  */
-			set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
-
-			/* Vertex buffer setup 15 */
-			vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
-			set_vtx_resource(rdev, vb_gpu_addr);
-
-			/* draw 10 */
-			draw_auto(rdev);
-
-			/* 5 */
-			cp_set_surface_sync(rdev,
-					    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
-					    cur_size * h, dst_gpu_addr);
-
-			/* 74 ring dwords per loop */
-			vb += 12;
-			rdev->r600_blit.vb_used += 12 * 4;
-
-			src_gpu_addr += cur_size * h;
-			dst_gpu_addr += cur_size * h;
-			size_bytes -= cur_size * h;
-		}
-	}
-}
-
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index a134790903d3..7fdfa8ea7570 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -122,12 +122,6 @@ static void evergreen_cs_track_init(struct evergreen_cs_track *track)
 	track->db_s_write_bo = NULL;
 }
 
-static inline int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
-{
-	/* XXX fill in */
-	return 0;
-}
-
 static int evergreen_cs_track_check(struct radeon_cs_parser *p)
 {
 	struct evergreen_cs_track *track = p->track;
@@ -236,28 +230,6 @@ static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p,
 }
 
 /**
- * evergreen_cs_packet_next_is_pkt3_nop() - test if next packet is packet3 nop for reloc
- * @parser:		parser structure holding parsing context.
- *
- * Check next packet is relocation packet3, do bo validation and compute
- * GPU offset using the provided start.
- **/
-static inline int evergreen_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
-{
-	struct radeon_cs_packet p3reloc;
-	int r;
-
-	r = evergreen_cs_packet_parse(p, &p3reloc, p->idx);
-	if (r) {
-		return 0;
-	}
-	if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
-		return 0;
-	}
-	return 1;
-}
-
-/**
  * evergreen_cs_packet_next_vline() - parse userspace VLINE packet
  * @parser:		parser structure holding parsing context.
  *
@@ -414,7 +386,7 @@ static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p,
  * if register is safe. If register is not flag as safe this function
  * will test it against a list of register needind special handling.
  */
-static inline int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
+static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 {
 	struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track;
 	struct radeon_cs_reloc *reloc;
@@ -990,7 +962,7 @@ static inline int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u3
  * This function will check that the resource has valid field and that
  * the texture and mipmap bo object are big enough to cover this resource.
  */
-static inline int evergreen_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
+static int evergreen_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
 						   struct radeon_bo *texture,
 						   struct radeon_bo *mipmap)
 {
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index 7363d9dec909..b937c49054d9 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -941,11 +941,15 @@
 #define	CB_COLOR0_SLICE					0x28c68
 #define	CB_COLOR0_VIEW					0x28c6c
 #define	CB_COLOR0_INFO					0x28c70
+#	define CB_FORMAT(x)				((x) << 2)
 #       define CB_ARRAY_MODE(x)                         ((x) << 8)
 #       define ARRAY_LINEAR_GENERAL                     0
 #       define ARRAY_LINEAR_ALIGNED                     1
 #       define ARRAY_1D_TILED_THIN1                     2
 #       define ARRAY_2D_TILED_THIN1                     4
+#	define CB_SOURCE_FORMAT(x)			((x) << 24)
+#	define CB_SF_EXPORT_FULL			0
+#	define CB_SF_EXPORT_NORM			1
 #define	CB_COLOR0_ATTRIB				0x28c74
 #define	CB_COLOR0_DIM					0x28c78
 /* only CB0-7 blocks have these regs */
@@ -1107,15 +1111,53 @@
 #define	CB_COLOR7_CLEAR_WORD3				0x28e3c
 
 #define SQ_TEX_RESOURCE_WORD0_0                         0x30000
+#	define TEX_DIM(x)				((x) << 0)
+#	define SQ_TEX_DIM_1D				0
+#	define SQ_TEX_DIM_2D				1
+#	define SQ_TEX_DIM_3D				2
+#	define SQ_TEX_DIM_CUBEMAP			3
+#	define SQ_TEX_DIM_1D_ARRAY			4
+#	define SQ_TEX_DIM_2D_ARRAY			5
+#	define SQ_TEX_DIM_2D_MSAA			6
+#	define SQ_TEX_DIM_2D_ARRAY_MSAA			7
 #define SQ_TEX_RESOURCE_WORD1_0                         0x30004
 #       define TEX_ARRAY_MODE(x)                        ((x) << 28)
 #define SQ_TEX_RESOURCE_WORD2_0                         0x30008
 #define SQ_TEX_RESOURCE_WORD3_0                         0x3000C
 #define SQ_TEX_RESOURCE_WORD4_0                         0x30010
+#	define TEX_DST_SEL_X(x)				((x) << 16)
+#	define TEX_DST_SEL_Y(x)				((x) << 19)
+#	define TEX_DST_SEL_Z(x)				((x) << 22)
+#	define TEX_DST_SEL_W(x)				((x) << 25)
+#	define SQ_SEL_X					0
+#	define SQ_SEL_Y					1
+#	define SQ_SEL_Z					2
+#	define SQ_SEL_W					3
+#	define SQ_SEL_0					4
+#	define SQ_SEL_1					5
 #define SQ_TEX_RESOURCE_WORD5_0                         0x30014
 #define SQ_TEX_RESOURCE_WORD6_0                         0x30018
 #define SQ_TEX_RESOURCE_WORD7_0                         0x3001c
 
+#define SQ_VTX_CONSTANT_WORD0_0				0x30000
+#define SQ_VTX_CONSTANT_WORD1_0				0x30004
+#define SQ_VTX_CONSTANT_WORD2_0				0x30008
+#	define SQ_VTXC_BASE_ADDR_HI(x)			((x) << 0)
+#	define SQ_VTXC_STRIDE(x)			((x) << 8)
+#	define SQ_VTXC_ENDIAN_SWAP(x)			((x) << 30)
+#	define SQ_ENDIAN_NONE				0
+#	define SQ_ENDIAN_8IN16				1
+#	define SQ_ENDIAN_8IN32				2
+#define SQ_VTX_CONSTANT_WORD3_0				0x3000C
+#	define SQ_VTCX_SEL_X(x)				((x) << 3)
+#	define SQ_VTCX_SEL_Y(x)				((x) << 6)
+#	define SQ_VTCX_SEL_Z(x)				((x) << 9)
+#	define SQ_VTCX_SEL_W(x)				((x) << 12)
+#define SQ_VTX_CONSTANT_WORD4_0				0x30010
+#define SQ_VTX_CONSTANT_WORD5_0                         0x30014
+#define SQ_VTX_CONSTANT_WORD6_0                         0x30018
+#define SQ_VTX_CONSTANT_WORD7_0                         0x3001c
+
 /* cayman 3D regs */
 #define CAYMAN_VGT_OFFCHIP_LDS_BASE			0x89B0
 #define CAYMAN_DB_EQAA					0x28804
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index bf4fce7c43f2..556b7bc3418b 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -40,6 +40,7 @@ extern void evergreen_mc_program(struct radeon_device *rdev);
 extern void evergreen_irq_suspend(struct radeon_device *rdev);
 extern int evergreen_mc_init(struct radeon_device *rdev);
 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
+extern void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
 
 #define EVERGREEN_PFP_UCODE_SIZE 1120
 #define EVERGREEN_PM4_UCODE_SIZE 1376
@@ -569,36 +570,6 @@ static u32 cayman_get_tile_pipe_to_backend_map(struct radeon_device *rdev,
 	return backend_map;
 }
 
-static void cayman_program_channel_remap(struct radeon_device *rdev)
-{
-	u32 tcp_chan_steer_lo, tcp_chan_steer_hi, mc_shared_chremap, tmp;
-
-	tmp = RREG32(MC_SHARED_CHMAP);
-	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
-	case 0:
-	case 1:
-	case 2:
-	case 3:
-	default:
-		/* default mapping */
-		mc_shared_chremap = 0x00fac688;
-		break;
-	}
-
-	switch (rdev->family) {
-	case CHIP_CAYMAN:
-	default:
-		//tcp_chan_steer_lo = 0x54763210
-		tcp_chan_steer_lo = 0x76543210;
-		tcp_chan_steer_hi = 0x0000ba98;
-		break;
-	}
-
-	WREG32(TCP_CHAN_STEER_LO, tcp_chan_steer_lo);
-	WREG32(TCP_CHAN_STEER_HI, tcp_chan_steer_hi);
-	WREG32(MC_SHARED_CHREMAP, mc_shared_chremap);
-}
-
 static u32 cayman_get_disable_mask_per_asic(struct radeon_device *rdev,
 					    u32 disable_mask_per_se,
 					    u32 max_disable_mask_per_se,
@@ -842,8 +813,6 @@ static void cayman_gpu_init(struct radeon_device *rdev)
 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
 
-	cayman_program_channel_remap(rdev);
-
 	/* primary versions */
 	WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable);
 	WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
@@ -1190,7 +1159,8 @@ int cayman_cp_resume(struct radeon_device *rdev)
 
 	/* Initialize the ring buffer's read and write pointers */
 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
-	WREG32(CP_RB0_WPTR, 0);
+	rdev->cp.wptr = 0;
+	WREG32(CP_RB0_WPTR, rdev->cp.wptr);
 
 	/* set the wb address wether it's enabled or not */
 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
@@ -1210,7 +1180,6 @@ int cayman_cp_resume(struct radeon_device *rdev)
 	WREG32(CP_RB0_BASE, rdev->cp.gpu_addr >> 8);
 
 	rdev->cp.rptr = RREG32(CP_RB0_RPTR);
-	rdev->cp.wptr = RREG32(CP_RB0_WPTR);
 
 	/* ring1  - compute only */
 	/* Set ring buffer size */
@@ -1223,7 +1192,8 @@ int cayman_cp_resume(struct radeon_device *rdev)
 
 	/* Initialize the ring buffer's read and write pointers */
 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
-	WREG32(CP_RB1_WPTR, 0);
+	rdev->cp1.wptr = 0;
+	WREG32(CP_RB1_WPTR, rdev->cp1.wptr);
 
 	/* set the wb address wether it's enabled or not */
 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
@@ -1235,7 +1205,6 @@ int cayman_cp_resume(struct radeon_device *rdev)
 	WREG32(CP_RB1_BASE, rdev->cp1.gpu_addr >> 8);
 
 	rdev->cp1.rptr = RREG32(CP_RB1_RPTR);
-	rdev->cp1.wptr = RREG32(CP_RB1_WPTR);
 
 	/* ring2 - compute only */
 	/* Set ring buffer size */
@@ -1248,7 +1217,8 @@ int cayman_cp_resume(struct radeon_device *rdev)
 
 	/* Initialize the ring buffer's read and write pointers */
 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
-	WREG32(CP_RB2_WPTR, 0);
+	rdev->cp2.wptr = 0;
+	WREG32(CP_RB2_WPTR, rdev->cp2.wptr);
 
 	/* set the wb address wether it's enabled or not */
 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
@@ -1260,7 +1230,6 @@ int cayman_cp_resume(struct radeon_device *rdev)
 	WREG32(CP_RB2_BASE, rdev->cp2.gpu_addr >> 8);
 
 	rdev->cp2.rptr = RREG32(CP_RB2_RPTR);
-	rdev->cp2.wptr = RREG32(CP_RB2_WPTR);
 
 	/* start the rings */
 	cayman_cp_start(rdev);
@@ -1376,6 +1345,9 @@ static int cayman_startup(struct radeon_device *rdev)
 {
 	int r;
 
+	/* enable pcie gen2 link */
+	evergreen_pcie_gen2_enable(rdev);
+
 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
 		r = ni_init_microcode(rdev);
 		if (r) {
@@ -1397,7 +1369,7 @@ static int cayman_startup(struct radeon_device *rdev)
 
 	r = evergreen_blit_init(rdev);
 	if (r) {
-		evergreen_blit_fini(rdev);
+		r600_blit_fini(rdev);
 		rdev->asic->copy = NULL;
 		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
 	}
@@ -1458,21 +1430,13 @@ int cayman_resume(struct radeon_device *rdev)
 
 int cayman_suspend(struct radeon_device *rdev)
 {
-	int r;
-
 	/* FIXME: we should wait for ring to be empty */
 	cayman_cp_enable(rdev, false);
 	rdev->cp.ready = false;
 	evergreen_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	cayman_pcie_gart_disable(rdev);
-
-	/* unpin shaders bo */
-	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
-	if (likely(r == 0)) {
-		radeon_bo_unpin(rdev->r600_blit.shader_obj);
-		radeon_bo_unreserve(rdev->r600_blit.shader_obj);
-	}
+	r600_blit_suspend(rdev);
 
 	return 0;
 }
@@ -1585,7 +1549,7 @@ int cayman_init(struct radeon_device *rdev)
 
 void cayman_fini(struct radeon_device *rdev)
 {
-	evergreen_blit_fini(rdev);
+	r600_blit_fini(rdev);
 	cayman_cp_fini(rdev);
 	r600_irq_fini(rdev);
 	radeon_wb_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 574f2c7c6dd9..8f8b8fa14357 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -68,6 +68,108 @@ MODULE_FIRMWARE(FIRMWARE_R520);
  * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
  */
 
+int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
+			    struct radeon_cs_packet *pkt,
+			    unsigned idx,
+			    unsigned reg)
+{
+	int r;
+	u32 tile_flags = 0;
+	u32 tmp;
+	struct radeon_cs_reloc *reloc;
+	u32 value;
+
+	r = r100_cs_packet_next_reloc(p, &reloc);
+	if (r) {
+		DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+			  idx, reg);
+		r100_cs_dump_packet(p, pkt);
+		return r;
+	}
+	value = radeon_get_ib_value(p, idx);
+	tmp = value & 0x003fffff;
+	tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
+
+	if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+		tile_flags |= RADEON_DST_TILE_MACRO;
+	if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
+		if (reg == RADEON_SRC_PITCH_OFFSET) {
+			DRM_ERROR("Cannot src blit from microtiled surface\n");
+			r100_cs_dump_packet(p, pkt);
+			return -EINVAL;
+		}
+		tile_flags |= RADEON_DST_TILE_MICRO;
+	}
+
+	tmp |= tile_flags;
+	p->ib->ptr[idx] = (value & 0x3fc00000) | tmp;
+	return 0;
+}
+
+int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
+			     struct radeon_cs_packet *pkt,
+			     int idx)
+{
+	unsigned c, i;
+	struct radeon_cs_reloc *reloc;
+	struct r100_cs_track *track;
+	int r = 0;
+	volatile uint32_t *ib;
+	u32 idx_value;
+
+	ib = p->ib->ptr;
+	track = (struct r100_cs_track *)p->track;
+	c = radeon_get_ib_value(p, idx++) & 0x1F;
+	if (c > 16) {
+	    DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
+		      pkt->opcode);
+	    r100_cs_dump_packet(p, pkt);
+	    return -EINVAL;
+	}
+	track->num_arrays = c;
+	for (i = 0; i < (c - 1); i+=2, idx+=3) {
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for packet3 %d\n",
+				  pkt->opcode);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+		idx_value = radeon_get_ib_value(p, idx);
+		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
+
+		track->arrays[i + 0].esize = idx_value >> 8;
+		track->arrays[i + 0].robj = reloc->robj;
+		track->arrays[i + 0].esize &= 0x7F;
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for packet3 %d\n",
+				  pkt->opcode);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+		ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset);
+		track->arrays[i + 1].robj = reloc->robj;
+		track->arrays[i + 1].esize = idx_value >> 24;
+		track->arrays[i + 1].esize &= 0x7F;
+	}
+	if (c & 1) {
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for packet3 %d\n",
+					  pkt->opcode);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+		idx_value = radeon_get_ib_value(p, idx);
+		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
+		track->arrays[i + 0].robj = reloc->robj;
+		track->arrays[i + 0].esize = idx_value >> 8;
+		track->arrays[i + 0].esize &= 0x7F;
+	}
+	return r;
+}
+
 void r100_pre_page_flip(struct radeon_device *rdev, int crtc)
 {
 	/* enable the pflip int */
@@ -591,7 +693,7 @@ void r100_irq_disable(struct radeon_device *rdev)
 	WREG32(R_000044_GEN_INT_STATUS, tmp);
 }
 
-static inline uint32_t r100_irq_ack(struct radeon_device *rdev)
+static uint32_t r100_irq_ack(struct radeon_device *rdev)
 {
 	uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS);
 	uint32_t irq_mask = RADEON_SW_INT_TEST |
@@ -724,11 +826,11 @@ void r100_fence_ring_emit(struct radeon_device *rdev,
 int r100_copy_blit(struct radeon_device *rdev,
 		   uint64_t src_offset,
 		   uint64_t dst_offset,
-		   unsigned num_pages,
+		   unsigned num_gpu_pages,
 		   struct radeon_fence *fence)
 {
 	uint32_t cur_pages;
-	uint32_t stride_bytes = PAGE_SIZE;
+	uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE;
 	uint32_t pitch;
 	uint32_t stride_pixels;
 	unsigned ndw;
@@ -740,7 +842,7 @@ int r100_copy_blit(struct radeon_device *rdev,
 	/* radeon pitch is /64 */
 	pitch = stride_bytes / 64;
 	stride_pixels = stride_bytes / 4;
-	num_loops = DIV_ROUND_UP(num_pages, 8191);
+	num_loops = DIV_ROUND_UP(num_gpu_pages, 8191);
 
 	/* Ask for enough room for blit + flush + fence */
 	ndw = 64 + (10 * num_loops);
@@ -749,12 +851,12 @@ int r100_copy_blit(struct radeon_device *rdev,
 		DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
 		return -EINVAL;
 	}
-	while (num_pages > 0) {
-		cur_pages = num_pages;
+	while (num_gpu_pages > 0) {
+		cur_pages = num_gpu_pages;
 		if (cur_pages > 8191) {
 			cur_pages = 8191;
 		}
-		num_pages -= cur_pages;
+		num_gpu_pages -= cur_pages;
 
 		/* pages are in Y direction - height
 		   page width in X direction - width */
@@ -776,8 +878,8 @@ int r100_copy_blit(struct radeon_device *rdev,
 		radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
 		radeon_ring_write(rdev, 0);
 		radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
-		radeon_ring_write(rdev, num_pages);
-		radeon_ring_write(rdev, num_pages);
+		radeon_ring_write(rdev, num_gpu_pages);
+		radeon_ring_write(rdev, num_gpu_pages);
 		radeon_ring_write(rdev, cur_pages | (stride_pixels << 16));
 	}
 	radeon_ring_write(rdev, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
@@ -993,7 +1095,8 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
 	/* Force read & write ptr to 0 */
 	WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA | RADEON_RB_NO_UPDATE);
 	WREG32(RADEON_CP_RB_RPTR_WR, 0);
-	WREG32(RADEON_CP_RB_WPTR, 0);
+	rdev->cp.wptr = 0;
+	WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr);
 
 	/* set the wb address whether it's enabled or not */
 	WREG32(R_00070C_CP_RB_RPTR_ADDR,
@@ -1010,9 +1113,6 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
 	WREG32(RADEON_CP_RB_CNTL, tmp);
 	udelay(10);
 	rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
-	rdev->cp.wptr = RREG32(RADEON_CP_RB_WPTR);
-	/* protect against crazy HW on resume */
-	rdev->cp.wptr &= rdev->cp.ptr_mask;
 	/* Set cp mode to bus mastering & enable cp*/
 	WREG32(RADEON_CP_CSQ_MODE,
 	       REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
@@ -3152,7 +3252,7 @@ void r100_bandwidth_update(struct radeon_device *rdev)
 	}
 }
 
-static inline void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
+static void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
 {
 	DRM_ERROR("pitch                      %d\n", t->pitch);
 	DRM_ERROR("use_pitch                  %d\n", t->use_pitch);
@@ -3970,3 +4070,43 @@ int r100_init(struct radeon_device *rdev)
 	}
 	return 0;
 }
+
+uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
+{
+	if (reg < rdev->rmmio_size)
+		return readl(((void __iomem *)rdev->rmmio) + reg);
+	else {
+		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
+		return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+	}
+}
+
+void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+{
+	if (reg < rdev->rmmio_size)
+		writel(v, ((void __iomem *)rdev->rmmio) + reg);
+	else {
+		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
+		writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+	}
+}
+
+u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
+{
+	if (reg < rdev->rio_mem_size)
+		return ioread32(rdev->rio_mem + reg);
+	else {
+		iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
+		return ioread32(rdev->rio_mem + RADEON_MM_DATA);
+	}
+}
+
+void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+	if (reg < rdev->rio_mem_size)
+		iowrite32(v, rdev->rio_mem + reg);
+	else {
+		iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
+		iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);
+	}
+}
diff --git a/drivers/gpu/drm/radeon/r100_track.h b/drivers/gpu/drm/radeon/r100_track.h
index 686f9dc5d4bd..6a603b378adb 100644
--- a/drivers/gpu/drm/radeon/r100_track.h
+++ b/drivers/gpu/drm/radeon/r100_track.h
@@ -92,106 +92,10 @@ int r200_packet0_check(struct radeon_cs_parser *p,
 		       struct radeon_cs_packet *pkt,
 		       unsigned idx, unsigned reg);
 
-
-
-static inline int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
-					  struct radeon_cs_packet *pkt,
-					  unsigned idx,
-					  unsigned reg)
-{
-	int r;
-	u32 tile_flags = 0;
-	u32 tmp;
-	struct radeon_cs_reloc *reloc;
-	u32 value;
-
-	r = r100_cs_packet_next_reloc(p, &reloc);
-	if (r) {
-		DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
-			  idx, reg);
-		r100_cs_dump_packet(p, pkt);
-		return r;
-	}
-	value = radeon_get_ib_value(p, idx);
-	tmp = value & 0x003fffff;
-	tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
-
-	if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
-		tile_flags |= RADEON_DST_TILE_MACRO;
-	if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
-		if (reg == RADEON_SRC_PITCH_OFFSET) {
-			DRM_ERROR("Cannot src blit from microtiled surface\n");
-			r100_cs_dump_packet(p, pkt);
-			return -EINVAL;
-		}
-		tile_flags |= RADEON_DST_TILE_MICRO;
-	}
-
-	tmp |= tile_flags;
-	p->ib->ptr[idx] = (value & 0x3fc00000) | tmp;
-	return 0;
-}
-
-static inline int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
-					   struct radeon_cs_packet *pkt,
-					   int idx)
-{
-	unsigned c, i;
-	struct radeon_cs_reloc *reloc;
-	struct r100_cs_track *track;
-	int r = 0;
-	volatile uint32_t *ib;
-	u32 idx_value;
-
-	ib = p->ib->ptr;
-	track = (struct r100_cs_track *)p->track;
-	c = radeon_get_ib_value(p, idx++) & 0x1F;
-	if (c > 16) {
-	    DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
-		      pkt->opcode);
-	    r100_cs_dump_packet(p, pkt);
-	    return -EINVAL;
-	}
-	track->num_arrays = c;
-	for (i = 0; i < (c - 1); i+=2, idx+=3) {
-		r = r100_cs_packet_next_reloc(p, &reloc);
-		if (r) {
-			DRM_ERROR("No reloc for packet3 %d\n",
-				  pkt->opcode);
-			r100_cs_dump_packet(p, pkt);
-			return r;
-		}
-		idx_value = radeon_get_ib_value(p, idx);
-		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
-
-		track->arrays[i + 0].esize = idx_value >> 8;
-		track->arrays[i + 0].robj = reloc->robj;
-		track->arrays[i + 0].esize &= 0x7F;
-		r = r100_cs_packet_next_reloc(p, &reloc);
-		if (r) {
-			DRM_ERROR("No reloc for packet3 %d\n",
-				  pkt->opcode);
-			r100_cs_dump_packet(p, pkt);
-			return r;
-		}
-		ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset);
-		track->arrays[i + 1].robj = reloc->robj;
-		track->arrays[i + 1].esize = idx_value >> 24;
-		track->arrays[i + 1].esize &= 0x7F;
-	}
-	if (c & 1) {
-		r = r100_cs_packet_next_reloc(p, &reloc);
-		if (r) {
-			DRM_ERROR("No reloc for packet3 %d\n",
-					  pkt->opcode);
-			r100_cs_dump_packet(p, pkt);
-			return r;
-		}
-		idx_value = radeon_get_ib_value(p, idx);
-		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
-		track->arrays[i + 0].robj = reloc->robj;
-		track->arrays[i + 0].esize = idx_value >> 8;
-		track->arrays[i + 0].esize &= 0x7F;
-	}
-	return r;
-}
+int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
+			    struct radeon_cs_packet *pkt,
+			    unsigned idx,
+			    unsigned reg);
+int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
+			     struct radeon_cs_packet *pkt,
+			     int idx);
diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c
index f24058300413..a1f3ba063c2d 100644
--- a/drivers/gpu/drm/radeon/r200.c
+++ b/drivers/gpu/drm/radeon/r200.c
@@ -84,7 +84,7 @@ static int r200_get_vtx_size_0(uint32_t vtx_fmt_0)
 int r200_copy_dma(struct radeon_device *rdev,
 		  uint64_t src_offset,
 		  uint64_t dst_offset,
-		  unsigned num_pages,
+		  unsigned num_gpu_pages,
 		  struct radeon_fence *fence)
 {
 	uint32_t size;
@@ -93,7 +93,7 @@ int r200_copy_dma(struct radeon_device *rdev,
 	int r = 0;
 
 	/* radeon pitch is /64 */
-	size = num_pages << PAGE_SHIFT;
+	size = num_gpu_pages << RADEON_GPU_PAGE_SHIFT;
 	num_loops = DIV_ROUND_UP(size, 0x1FFFFF);
 	r = radeon_ring_lock(rdev, num_loops * 4 + 64);
 	if (r) {
diff --git a/drivers/gpu/drm/radeon/r300_cmdbuf.c b/drivers/gpu/drm/radeon/r300_cmdbuf.c
index c5c2742e4140..1fe98b421c9b 100644
--- a/drivers/gpu/drm/radeon/r300_cmdbuf.c
+++ b/drivers/gpu/drm/radeon/r300_cmdbuf.c
@@ -791,7 +791,7 @@ static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
 /**
  * Emit the sequence to pacify R300.
  */
-static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
+static void r300_pacify(drm_radeon_private_t *dev_priv)
 {
 	uint32_t cache_z, cache_3d, cache_2d;
 	RING_LOCALS;
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 334aee6eab7c..12470b090ddf 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2212,7 +2212,8 @@ int r600_cp_resume(struct radeon_device *rdev)
 	/* Initialize the ring buffer's read and write pointers */
 	WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA);
 	WREG32(CP_RB_RPTR_WR, 0);
-	WREG32(CP_RB_WPTR, 0);
+	rdev->cp.wptr = 0;
+	WREG32(CP_RB_WPTR, rdev->cp.wptr);
 
 	/* set the wb address whether it's enabled or not */
 	WREG32(CP_RB_RPTR_ADDR,
@@ -2234,7 +2235,6 @@ int r600_cp_resume(struct radeon_device *rdev)
 	WREG32(CP_DEBUG, (1 << 27) | (1 << 28));
 
 	rdev->cp.rptr = RREG32(CP_RB_RPTR);
-	rdev->cp.wptr = RREG32(CP_RB_WPTR);
 
 	r600_cp_start(rdev);
 	rdev->cp.ready = true;
@@ -2356,26 +2356,42 @@ void r600_fence_ring_emit(struct radeon_device *rdev,
 }
 
 int r600_copy_blit(struct radeon_device *rdev,
-		   uint64_t src_offset, uint64_t dst_offset,
-		   unsigned num_pages, struct radeon_fence *fence)
+		   uint64_t src_offset,
+		   uint64_t dst_offset,
+		   unsigned num_gpu_pages,
+		   struct radeon_fence *fence)
 {
 	int r;
 
 	mutex_lock(&rdev->r600_blit.mutex);
 	rdev->r600_blit.vb_ib = NULL;
-	r = r600_blit_prepare_copy(rdev, num_pages * RADEON_GPU_PAGE_SIZE);
+	r = r600_blit_prepare_copy(rdev, num_gpu_pages);
 	if (r) {
 		if (rdev->r600_blit.vb_ib)
 			radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
 		mutex_unlock(&rdev->r600_blit.mutex);
 		return r;
 	}
-	r600_kms_blit_copy(rdev, src_offset, dst_offset, num_pages * RADEON_GPU_PAGE_SIZE);
+	r600_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages);
 	r600_blit_done_copy(rdev, fence);
 	mutex_unlock(&rdev->r600_blit.mutex);
 	return 0;
 }
 
+void r600_blit_suspend(struct radeon_device *rdev)
+{
+	int r;
+
+	/* unpin shaders bo */
+	if (rdev->r600_blit.shader_obj) {
+		r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
+		if (!r) {
+			radeon_bo_unpin(rdev->r600_blit.shader_obj);
+			radeon_bo_unreserve(rdev->r600_blit.shader_obj);
+		}
+	}
+}
+
 int r600_set_surface_reg(struct radeon_device *rdev, int reg,
 			 uint32_t tiling_flags, uint32_t pitch,
 			 uint32_t offset, uint32_t obj_size)
@@ -2495,8 +2511,6 @@ int r600_resume(struct radeon_device *rdev)
 
 int r600_suspend(struct radeon_device *rdev)
 {
-	int r;
-
 	r600_audio_fini(rdev);
 	/* FIXME: we should wait for ring to be empty */
 	r600_cp_stop(rdev);
@@ -2504,14 +2518,8 @@ int r600_suspend(struct radeon_device *rdev)
 	r600_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	r600_pcie_gart_disable(rdev);
-	/* unpin shaders bo */
-	if (rdev->r600_blit.shader_obj) {
-		r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
-		if (!r) {
-			radeon_bo_unpin(rdev->r600_blit.shader_obj);
-			radeon_bo_unreserve(rdev->r600_blit.shader_obj);
-		}
-	}
+	r600_blit_suspend(rdev);
+
 	return 0;
 }
 
@@ -3138,7 +3146,7 @@ int r600_irq_set(struct radeon_device *rdev)
 	return 0;
 }
 
-static inline void r600_irq_ack(struct radeon_device *rdev)
+static void r600_irq_ack(struct radeon_device *rdev)
 {
 	u32 tmp;
 
@@ -3239,7 +3247,7 @@ void r600_irq_disable(struct radeon_device *rdev)
 	r600_disable_interrupt_state(rdev);
 }
 
-static inline u32 r600_get_ih_wptr(struct radeon_device *rdev)
+static u32 r600_get_ih_wptr(struct radeon_device *rdev)
 {
 	u32 wptr, tmp;
 
diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c
index 7f1043448d25..3c031a48205d 100644
--- a/drivers/gpu/drm/radeon/r600_blit.c
+++ b/drivers/gpu/drm/radeon/r600_blit.c
@@ -41,7 +41,7 @@
 #define COLOR_5_6_5           0x8
 #define COLOR_8_8_8_8         0x1a
 
-static inline void
+static void
 set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
 {
 	u32 cb_color_info;
@@ -99,7 +99,7 @@ set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64
 	ADVANCE_RING();
 }
 
-static inline void
+static void
 cp_set_surface_sync(drm_radeon_private_t *dev_priv,
 		    u32 sync_type, u32 size, u64 mc_addr)
 {
@@ -121,7 +121,7 @@ cp_set_surface_sync(drm_radeon_private_t *dev_priv,
 	ADVANCE_RING();
 }
 
-static inline void
+static void
 set_shaders(struct drm_device *dev)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
@@ -184,7 +184,7 @@ set_shaders(struct drm_device *dev)
 			    R600_SH_ACTION_ENA, 512, gpu_addr);
 }
 
-static inline void
+static void
 set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
 {
 	uint32_t sq_vtx_constant_word2;
@@ -220,7 +220,7 @@ set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
 				    R600_VC_ACTION_ENA, 48, gpu_addr);
 }
 
-static inline void
+static void
 set_tex_resource(drm_radeon_private_t *dev_priv,
 		 int format, int w, int h, int pitch, u64 gpu_addr)
 {
@@ -258,7 +258,7 @@ set_tex_resource(drm_radeon_private_t *dev_priv,
 
 }
 
-static inline void
+static void
 set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
 {
 	RING_LOCALS;
@@ -282,7 +282,7 @@ set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
 	ADVANCE_RING();
 }
 
-static inline void
+static void
 draw_auto(drm_radeon_private_t *dev_priv)
 {
 	RING_LOCALS;
@@ -311,7 +311,7 @@ draw_auto(drm_radeon_private_t *dev_priv)
 	COMMIT_RING();
 }
 
-static inline void
+static void
 set_default_state(drm_radeon_private_t *dev_priv)
 {
 	int i;
@@ -489,7 +489,7 @@ set_default_state(drm_radeon_private_t *dev_priv)
 	ADVANCE_RING();
 }
 
-static inline uint32_t i2f(uint32_t input)
+static uint32_t i2f(uint32_t input)
 {
 	u32 result, i, exponent, fraction;
 
@@ -515,7 +515,7 @@ static inline uint32_t i2f(uint32_t input)
 }
 
 
-static inline int r600_nomm_get_vb(struct drm_device *dev)
+static int r600_nomm_get_vb(struct drm_device *dev)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
 	dev_priv->blit_vb = radeon_freelist_get(dev);
@@ -526,7 +526,7 @@ static inline int r600_nomm_get_vb(struct drm_device *dev)
 	return 0;
 }
 
-static inline void r600_nomm_put_vb(struct drm_device *dev)
+static void r600_nomm_put_vb(struct drm_device *dev)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
 
@@ -534,7 +534,7 @@ static inline void r600_nomm_put_vb(struct drm_device *dev)
 	radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb);
 }
 
-static inline void *r600_nomm_get_vb_ptr(struct drm_device *dev)
+static void *r600_nomm_get_vb_ptr(struct drm_device *dev)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
 	return (((char *)dev->agp_buffer_map->handle +
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
index 9aa74c3f8cb6..c4cf1308d4a1 100644
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -42,6 +42,9 @@
 #define COLOR_5_6_5           0x8
 #define COLOR_8_8_8_8         0x1a
 
+#define RECT_UNIT_H           32
+#define RECT_UNIT_W           (RADEON_GPU_PAGE_SIZE / 4 / RECT_UNIT_H)
+
 /* emits 21 on rv770+, 23 on r600 */
 static void
 set_render_target(struct radeon_device *rdev, int format,
@@ -54,7 +57,9 @@ set_render_target(struct radeon_device *rdev, int format,
 	if (h < 8)
 		h = 8;
 
-	cb_color_info = ((format << 2) | (1 << 27) | (1 << 8));
+	cb_color_info = CB_FORMAT(format) |
+		CB_SOURCE_FORMAT(CB_SF_EXPORT_NORM) |
+		CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
 	pitch = (w / 8) - 1;
 	slice = ((w * h) / 64) - 1;
 
@@ -164,9 +169,10 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
 {
 	u32 sq_vtx_constant_word2;
 
-	sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8));
+	sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) |
+		SQ_VTXC_STRIDE(16);
 #ifdef __BIG_ENDIAN
-	sq_vtx_constant_word2 |= (2 << 30);
+	sq_vtx_constant_word2 |=  SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32);
 #endif
 
 	radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7));
@@ -202,18 +208,19 @@ set_tex_resource(struct radeon_device *rdev,
 	if (h < 1)
 		h = 1;
 
-	sq_tex_resource_word0 = (1 << 0) | (1 << 3);
-	sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
-				  ((w - 1) << 19));
+	sq_tex_resource_word0 = S_038000_DIM(V_038000_SQ_TEX_DIM_2D) |
+		S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1);
+	sq_tex_resource_word0 |= S_038000_PITCH((pitch >> 3) - 1) |
+		S_038000_TEX_WIDTH(w - 1);
 
-	sq_tex_resource_word1 = (format << 26);
-	sq_tex_resource_word1 |= ((h - 1) << 0);
+	sq_tex_resource_word1 = S_038004_DATA_FORMAT(format);
+	sq_tex_resource_word1 |= S_038004_TEX_HEIGHT(h - 1);
 
-	sq_tex_resource_word4 = ((1 << 14) |
-				 (0 << 16) |
-				 (1 << 19) |
-				 (2 << 22) |
-				 (3 << 25));
+	sq_tex_resource_word4 = S_038010_REQUEST_SIZE(1) |
+		S_038010_DST_SEL_X(SQ_SEL_X) |
+		S_038010_DST_SEL_Y(SQ_SEL_Y) |
+		S_038010_DST_SEL_Z(SQ_SEL_Z) |
+		S_038010_DST_SEL_W(SQ_SEL_W);
 
 	radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7));
 	radeon_ring_write(rdev, 0);
@@ -450,7 +457,7 @@ set_default_state(struct radeon_device *rdev)
 	radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
 }
 
-static inline uint32_t i2f(uint32_t input)
+static uint32_t i2f(uint32_t input)
 {
 	u32 result, i, exponent, fraction;
 
@@ -483,6 +490,27 @@ int r600_blit_init(struct radeon_device *rdev)
 	u32 packet2s[16];
 	int num_packet2s = 0;
 
+	rdev->r600_blit.primitives.set_render_target = set_render_target;
+	rdev->r600_blit.primitives.cp_set_surface_sync = cp_set_surface_sync;
+	rdev->r600_blit.primitives.set_shaders = set_shaders;
+	rdev->r600_blit.primitives.set_vtx_resource = set_vtx_resource;
+	rdev->r600_blit.primitives.set_tex_resource = set_tex_resource;
+	rdev->r600_blit.primitives.set_scissors = set_scissors;
+	rdev->r600_blit.primitives.draw_auto = draw_auto;
+	rdev->r600_blit.primitives.set_default_state = set_default_state;
+
+	rdev->r600_blit.ring_size_common = 40; /* shaders + def state */
+	rdev->r600_blit.ring_size_common += 10; /* fence emit for VB IB */
+	rdev->r600_blit.ring_size_common += 5; /* done copy */
+	rdev->r600_blit.ring_size_common += 10; /* fence emit for done copy */
+
+	rdev->r600_blit.ring_size_per_loop = 76;
+	/* set_render_target emits 2 extra dwords on rv6xx */
+	if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770)
+		rdev->r600_blit.ring_size_per_loop += 2;
+
+	rdev->r600_blit.max_dim = 8192;
+
 	/* pin copy shader into vram if already initialized */
 	if (rdev->r600_blit.shader_obj)
 		goto done;
@@ -600,47 +628,80 @@ static void r600_vb_ib_put(struct radeon_device *rdev)
 	radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
 }
 
-int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes)
+static unsigned r600_blit_create_rect(unsigned num_gpu_pages,
+				      int *width, int *height, int max_dim)
+{
+	unsigned max_pages;
+	unsigned pages = num_gpu_pages;
+	int w, h;
+
+	if (num_gpu_pages == 0) {
+		/* not supposed to be called with no pages, but just in case */
+		h = 0;
+		w = 0;
+		pages = 0;
+		WARN_ON(1);
+	} else {
+		int rect_order = 2;
+		h = RECT_UNIT_H;
+		while (num_gpu_pages / rect_order) {
+			h *= 2;
+			rect_order *= 4;
+			if (h >= max_dim) {
+				h = max_dim;
+				break;
+			}
+		}
+		max_pages = (max_dim * h) / (RECT_UNIT_W * RECT_UNIT_H);
+		if (pages > max_pages)
+			pages = max_pages;
+		w = (pages * RECT_UNIT_W * RECT_UNIT_H) / h;
+		w = (w / RECT_UNIT_W) * RECT_UNIT_W;
+		pages = (w * h) / (RECT_UNIT_W * RECT_UNIT_H);
+		BUG_ON(pages == 0);
+	}
+
+
+	DRM_DEBUG("blit_rectangle: h=%d, w=%d, pages=%d\n", h, w, pages);
+
+	/* return width and height only of the caller wants it */
+	if (height)
+		*height = h;
+	if (width)
+		*width = w;
+
+	return pages;
+}
+
+
+int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages)
 {
 	int r;
-	int ring_size, line_size;
-	int max_size;
-	/* loops of emits 64 + fence emit possible */
-	int dwords_per_loop = 76, num_loops;
+	int ring_size;
+	int num_loops = 0;
+	int dwords_per_loop = rdev->r600_blit.ring_size_per_loop;
 
 	r = r600_vb_ib_get(rdev);
 	if (r)
 		return r;
 
-	/* set_render_target emits 2 extra dwords on rv6xx */
-	if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770)
-		dwords_per_loop += 2;
-
-	/* 8 bpp vs 32 bpp for xfer unit */
-	if (size_bytes & 3)
-		line_size = 8192;
-	else
-		line_size = 8192*4;
-
-	max_size = 8192 * line_size;
+	/* num loops */
+	while (num_gpu_pages) {
+		num_gpu_pages -=
+			r600_blit_create_rect(num_gpu_pages, NULL, NULL,
+					      rdev->r600_blit.max_dim);
+		num_loops++;
+	}
 
-	/* major loops cover the max size transfer */
-	num_loops = ((size_bytes + max_size) / max_size);
-	/* minor loops cover the extra non aligned bits */
-	num_loops += ((size_bytes % line_size) ? 1 : 0);
 	/* calculate number of loops correctly */
 	ring_size = num_loops * dwords_per_loop;
-	/* set default  + shaders */
-	ring_size += 40; /* shaders + def state */
-	ring_size += 10; /* fence emit for VB IB */
-	ring_size += 5; /* done copy */
-	ring_size += 10; /* fence emit for done copy */
+	ring_size += rdev->r600_blit.ring_size_common;
 	r = radeon_ring_lock(rdev, ring_size);
 	if (r)
 		return r;
 
-	set_default_state(rdev); /* 14 */
-	set_shaders(rdev); /* 26 */
+	rdev->r600_blit.primitives.set_default_state(rdev);
+	rdev->r600_blit.primitives.set_shaders(rdev);
 	return 0;
 }
 
@@ -659,182 +720,64 @@ void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence)
 
 void r600_kms_blit_copy(struct radeon_device *rdev,
 			u64 src_gpu_addr, u64 dst_gpu_addr,
-			int size_bytes)
+			unsigned num_gpu_pages)
 {
-	int max_bytes;
 	u64 vb_gpu_addr;
 	u32 *vb;
 
-	DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr,
-		  size_bytes, rdev->r600_blit.vb_used);
+	DRM_DEBUG("emitting copy %16llx %16llx %d %d\n",
+		  src_gpu_addr, dst_gpu_addr,
+		  num_gpu_pages, rdev->r600_blit.vb_used);
 	vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used);
-	if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
-		max_bytes = 8192;
-
-		while (size_bytes) {
-			int cur_size = size_bytes;
-			int src_x = src_gpu_addr & 255;
-			int dst_x = dst_gpu_addr & 255;
-			int h = 1;
-			src_gpu_addr = src_gpu_addr & ~255ULL;
-			dst_gpu_addr = dst_gpu_addr & ~255ULL;
-
-			if (!src_x && !dst_x) {
-				h = (cur_size / max_bytes);
-				if (h > 8192)
-					h = 8192;
-				if (h == 0)
-					h = 1;
-				else
-					cur_size = max_bytes;
-			} else {
-				if (cur_size > max_bytes)
-					cur_size = max_bytes;
-				if (cur_size > (max_bytes - dst_x))
-					cur_size = (max_bytes - dst_x);
-				if (cur_size > (max_bytes - src_x))
-					cur_size = (max_bytes - src_x);
-			}
-
-			if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
-				WARN_ON(1);
-			}
-
-			vb[0] = i2f(dst_x);
-			vb[1] = 0;
-			vb[2] = i2f(src_x);
-			vb[3] = 0;
-
-			vb[4] = i2f(dst_x);
-			vb[5] = i2f(h);
-			vb[6] = i2f(src_x);
-			vb[7] = i2f(h);
-
-			vb[8] = i2f(dst_x + cur_size);
-			vb[9] = i2f(h);
-			vb[10] = i2f(src_x + cur_size);
-			vb[11] = i2f(h);
-
-			/* src 9 */
-			set_tex_resource(rdev, FMT_8,
-					 src_x + cur_size, h, src_x + cur_size,
-					 src_gpu_addr);
-
-			/* 5 */
-			cp_set_surface_sync(rdev,
-					    PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
 
-			/* dst 23 */
-			set_render_target(rdev, COLOR_8,
-					  dst_x + cur_size, h,
-					  dst_gpu_addr);
+	while (num_gpu_pages) {
+		int w, h;
+		unsigned size_in_bytes;
+		unsigned pages_per_loop =
+			r600_blit_create_rect(num_gpu_pages, &w, &h,
+					      rdev->r600_blit.max_dim);
 
-			/* scissors 12 */
-			set_scissors(rdev, dst_x, 0, dst_x + cur_size, h);
+		size_in_bytes = pages_per_loop * RADEON_GPU_PAGE_SIZE;
+		DRM_DEBUG("rectangle w=%d h=%d\n", w, h);
 
-			/* 14 */
-			vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
-			set_vtx_resource(rdev, vb_gpu_addr);
-
-			/* draw 10 */
-			draw_auto(rdev);
-
-			/* 5 */
-			cp_set_surface_sync(rdev,
-					    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
-					    cur_size * h, dst_gpu_addr);
-
-			vb += 12;
-			rdev->r600_blit.vb_used += 12 * 4;
-
-			src_gpu_addr += cur_size * h;
-			dst_gpu_addr += cur_size * h;
-			size_bytes -= cur_size * h;
+		if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
+			WARN_ON(1);
 		}
-	} else {
-		max_bytes = 8192 * 4;
-
-		while (size_bytes) {
-			int cur_size = size_bytes;
-			int src_x = (src_gpu_addr & 255);
-			int dst_x = (dst_gpu_addr & 255);
-			int h = 1;
-			src_gpu_addr = src_gpu_addr & ~255ULL;
-			dst_gpu_addr = dst_gpu_addr & ~255ULL;
-
-			if (!src_x && !dst_x) {
-				h = (cur_size / max_bytes);
-				if (h > 8192)
-					h = 8192;
-				if (h == 0)
-					h = 1;
-				else
-					cur_size = max_bytes;
-			} else {
-				if (cur_size > max_bytes)
-					cur_size = max_bytes;
-				if (cur_size > (max_bytes - dst_x))
-					cur_size = (max_bytes - dst_x);
-				if (cur_size > (max_bytes - src_x))
-					cur_size = (max_bytes - src_x);
-			}
-
-			if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
-				WARN_ON(1);
-			}
 
-			vb[0] = i2f(dst_x / 4);
-			vb[1] = 0;
-			vb[2] = i2f(src_x / 4);
-			vb[3] = 0;
-
-			vb[4] = i2f(dst_x / 4);
-			vb[5] = i2f(h);
-			vb[6] = i2f(src_x / 4);
-			vb[7] = i2f(h);
-
-			vb[8] = i2f((dst_x + cur_size) / 4);
-			vb[9] = i2f(h);
-			vb[10] = i2f((src_x + cur_size) / 4);
-			vb[11] = i2f(h);
-
-			/* src 9 */
-			set_tex_resource(rdev, FMT_8_8_8_8,
-					 (src_x + cur_size) / 4,
-					 h, (src_x + cur_size) / 4,
-					 src_gpu_addr);
-			/* 5 */
-			cp_set_surface_sync(rdev,
-					    PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
-
-			/* dst 23 */
-			set_render_target(rdev, COLOR_8_8_8_8,
-					  (dst_x + cur_size) / 4, h,
-					  dst_gpu_addr);
-
-			/* scissors 12  */
-			set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
-
-			/* Vertex buffer setup 14 */
-			vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
-			set_vtx_resource(rdev, vb_gpu_addr);
-
-			/* draw 10 */
-			draw_auto(rdev);
-
-			/* 5 */
-			cp_set_surface_sync(rdev,
-					    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
-					    cur_size * h, dst_gpu_addr);
-
-			/* 78 ring dwords per loop */
-			vb += 12;
-			rdev->r600_blit.vb_used += 12 * 4;
-
-			src_gpu_addr += cur_size * h;
-			dst_gpu_addr += cur_size * h;
-			size_bytes -= cur_size * h;
-		}
+		vb[0] = 0;
+		vb[1] = 0;
+		vb[2] = 0;
+		vb[3] = 0;
+
+		vb[4] = 0;
+		vb[5] = i2f(h);
+		vb[6] = 0;
+		vb[7] = i2f(h);
+
+		vb[8] = i2f(w);
+		vb[9] = i2f(h);
+		vb[10] = i2f(w);
+		vb[11] = i2f(h);
+
+		rdev->r600_blit.primitives.set_tex_resource(rdev, FMT_8_8_8_8,
+							    w, h, w, src_gpu_addr);
+		rdev->r600_blit.primitives.cp_set_surface_sync(rdev,
+							       PACKET3_TC_ACTION_ENA,
+							       size_in_bytes, src_gpu_addr);
+		rdev->r600_blit.primitives.set_render_target(rdev, COLOR_8_8_8_8,
+							     w, h, dst_gpu_addr);
+		rdev->r600_blit.primitives.set_scissors(rdev, 0, 0, w, h);
+		vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
+		rdev->r600_blit.primitives.set_vtx_resource(rdev, vb_gpu_addr);
+		rdev->r600_blit.primitives.draw_auto(rdev);
+		rdev->r600_blit.primitives.cp_set_surface_sync(rdev,
+				    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
+				    size_in_bytes, dst_gpu_addr);
+
+		vb += 12;
+		rdev->r600_blit.vb_used += 4*12;
+		src_gpu_addr += size_in_bytes;
+		dst_gpu_addr += size_in_bytes;
+		num_gpu_pages -= pages_per_loop;
 	}
 }
-
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index cf83aa05a684..0a2e023c1557 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -162,7 +162,7 @@ static const struct gpu_formats color_formats_table[] = {
 	[V_038004_FMT_32_AS_32_32_32_32] = { 1, 1, 4, 0, CHIP_CEDAR},
 };
 
-static inline bool fmt_is_valid_color(u32 format)
+static bool fmt_is_valid_color(u32 format)
 {
 	if (format >= ARRAY_SIZE(color_formats_table))
 		return false;
@@ -173,7 +173,7 @@ static inline bool fmt_is_valid_color(u32 format)
 	return false;
 }
 
-static inline bool fmt_is_valid_texture(u32 format, enum radeon_family family)
+static bool fmt_is_valid_texture(u32 format, enum radeon_family family)
 {
 	if (format >= ARRAY_SIZE(color_formats_table))
 		return false;
@@ -187,7 +187,7 @@ static inline bool fmt_is_valid_texture(u32 format, enum radeon_family family)
 	return false;
 }
 
-static inline int fmt_get_blocksize(u32 format)
+static int fmt_get_blocksize(u32 format)
 {
 	if (format >= ARRAY_SIZE(color_formats_table))
 		return 0;
@@ -195,7 +195,7 @@ static inline int fmt_get_blocksize(u32 format)
 	return color_formats_table[format].blocksize;
 }
 
-static inline int fmt_get_nblocksx(u32 format, u32 w)
+static int fmt_get_nblocksx(u32 format, u32 w)
 {
 	unsigned bw;
 
@@ -209,7 +209,7 @@ static inline int fmt_get_nblocksx(u32 format, u32 w)
 	return (w + bw - 1) / bw;
 }
 
-static inline int fmt_get_nblocksy(u32 format, u32 h)
+static int fmt_get_nblocksy(u32 format, u32 h)
 {
 	unsigned bh;
 
@@ -223,25 +223,6 @@ static inline int fmt_get_nblocksy(u32 format, u32 h)
 	return (h + bh - 1) / bh;
 }
 
-static inline int r600_bpe_from_format(u32 *bpe, u32 format)
-{
- 	unsigned res;
-
-	if (format >= ARRAY_SIZE(color_formats_table))
-		goto fail;
-
-	res = color_formats_table[format].blocksize;
-	if (res == 0)
-		goto fail;
-
-	*bpe = res;
-	return 0;
-
-fail:
-	*bpe = 16;
-	return -EINVAL;
-}
-
 struct array_mode_checker {
 	int array_mode;
 	u32 group_size;
@@ -252,7 +233,7 @@ struct array_mode_checker {
 };
 
 /* returns alignment in pixels for pitch/height/depth and bytes for base */
-static inline int r600_get_array_mode_alignment(struct array_mode_checker *values,
+static int r600_get_array_mode_alignment(struct array_mode_checker *values,
 						u32 *pitch_align,
 						u32 *height_align,
 						u32 *depth_align,
@@ -331,7 +312,7 @@ static void r600_cs_track_init(struct r600_cs_track *track)
 	track->db_depth_control = 0xFFFFFFFF;
 }
 
-static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
+static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
 {
 	struct r600_cs_track *track = p->track;
 	u32 slice_tile_max, size, tmp;
@@ -737,7 +718,7 @@ static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p,
  * Check next packet is relocation packet3, do bo validation and compute
  * GPU offset using the provided start.
  **/
-static inline int r600_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
+static int r600_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
 {
 	struct radeon_cs_packet p3reloc;
 	int r;
@@ -911,7 +892,7 @@ static int r600_cs_parse_packet0(struct radeon_cs_parser *p,
  * if register is safe. If register is not flag as safe this function
  * will test it against a list of register needind special handling.
  */
-static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
+static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 {
 	struct r600_cs_track *track = (struct r600_cs_track *)p->track;
 	struct radeon_cs_reloc *reloc;
@@ -1215,7 +1196,7 @@ static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx
 	return 0;
 }
 
-static inline unsigned mip_minify(unsigned size, unsigned level)
+static unsigned mip_minify(unsigned size, unsigned level)
 {
 	unsigned val;
 
@@ -1285,7 +1266,7 @@ static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned llevel,
  * This function will check that the resource has valid field and that
  * the texture and mipmap bo object are big enough to cover this resource.
  */
-static inline int r600_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
+static int r600_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
 					      struct radeon_bo *texture,
 					      struct radeon_bo *mipmap,
 					      u64 base_offset,
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index 0245ae6c204e..bfe1b5d92afe 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -79,6 +79,11 @@
 #define CB_COLOR0_SIZE                                  0x28060
 #define CB_COLOR0_VIEW                                  0x28080
 #define CB_COLOR0_INFO                                  0x280a0
+#	define CB_FORMAT(x)				((x) << 2)
+#       define CB_ARRAY_MODE(x)                         ((x) << 8)
+#	define CB_SOURCE_FORMAT(x)			((x) << 27)
+#	define CB_SF_EXPORT_FULL			0
+#	define CB_SF_EXPORT_NORM			1
 #define CB_COLOR0_TILE                                  0x280c0
 #define CB_COLOR0_FRAG                                  0x280e0
 #define CB_COLOR0_MASK                                  0x28100
@@ -417,6 +422,17 @@
 #define	SQ_PGM_START_VS					0x28858
 #define SQ_PGM_RESOURCES_VS                             0x28868
 #define SQ_PGM_CF_OFFSET_VS                             0x288d0
+
+#define SQ_VTX_CONSTANT_WORD0_0				0x30000
+#define SQ_VTX_CONSTANT_WORD1_0				0x30004
+#define SQ_VTX_CONSTANT_WORD2_0				0x30008
+#	define SQ_VTXC_BASE_ADDR_HI(x)			((x) << 0)
+#	define SQ_VTXC_STRIDE(x)			((x) << 8)
+#	define SQ_VTXC_ENDIAN_SWAP(x)			((x) << 30)
+#	define SQ_ENDIAN_NONE				0
+#	define SQ_ENDIAN_8IN16				1
+#	define SQ_ENDIAN_8IN32				2
+#define SQ_VTX_CONSTANT_WORD3_0				0x3000c
 #define	SQ_VTX_CONSTANT_WORD6_0				0x38018
 #define		S__SQ_VTX_CONSTANT_TYPE(x)			(((x) & 3) << 30)
 #define		G__SQ_VTX_CONSTANT_TYPE(x)			(((x) >> 30) & 3)
@@ -1352,6 +1368,12 @@
 #define   S_038010_DST_SEL_W(x)                        (((x) & 0x7) << 25)
 #define   G_038010_DST_SEL_W(x)                        (((x) >> 25) & 0x7)
 #define   C_038010_DST_SEL_W                           0xF1FFFFFF
+#	define SQ_SEL_X					0
+#	define SQ_SEL_Y					1
+#	define SQ_SEL_Z					2
+#	define SQ_SEL_W					3
+#	define SQ_SEL_0					4
+#	define SQ_SEL_1					5
 #define   S_038010_BASE_LEVEL(x)                       (((x) & 0xF) << 28)
 #define   G_038010_BASE_LEVEL(x)                       (((x) >> 28) & 0xF)
 #define   C_038010_BASE_LEVEL                          0x0FFFFFFF
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 0040d28816f1..156b8b7e028e 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -102,7 +102,7 @@ extern int radeon_pcie_gen2;
 #define RADEON_FENCE_JIFFIES_TIMEOUT	(HZ / 2)
 /* RADEON_IB_POOL_SIZE must be a power of 2 */
 #define RADEON_IB_POOL_SIZE		16
-#define RADEON_DEBUGFS_MAX_NUM_FILES	32
+#define RADEON_DEBUGFS_MAX_COMPONENTS	32
 #define RADEONFB_CONN_LIMIT		4
 #define RADEON_BIOS_NUM_SCRATCH		8
 
@@ -322,6 +322,7 @@ union radeon_gart_table {
 
 #define RADEON_GPU_PAGE_SIZE 4096
 #define RADEON_GPU_PAGE_MASK (RADEON_GPU_PAGE_SIZE - 1)
+#define RADEON_GPU_PAGE_SHIFT 12
 
 struct radeon_gart {
 	dma_addr_t			table_addr;
@@ -522,9 +523,30 @@ struct r600_ih {
 	bool                    enabled;
 };
 
+struct r600_blit_cp_primitives {
+	void (*set_render_target)(struct radeon_device *rdev, int format,
+				  int w, int h, u64 gpu_addr);
+	void (*cp_set_surface_sync)(struct radeon_device *rdev,
+				    u32 sync_type, u32 size,
+				    u64 mc_addr);
+	void (*set_shaders)(struct radeon_device *rdev);
+	void (*set_vtx_resource)(struct radeon_device *rdev, u64 gpu_addr);
+	void (*set_tex_resource)(struct radeon_device *rdev,
+				 int format, int w, int h, int pitch,
+				 u64 gpu_addr);
+	void (*set_scissors)(struct radeon_device *rdev, int x1, int y1,
+			     int x2, int y2);
+	void (*draw_auto)(struct radeon_device *rdev);
+	void (*set_default_state)(struct radeon_device *rdev);
+};
+
 struct r600_blit {
 	struct mutex		mutex;
 	struct radeon_bo	*shader_obj;
+	struct r600_blit_cp_primitives primitives;
+	int max_dim;
+	int ring_size_common;
+	int ring_size_per_loop;
 	u64 shader_gpu_addr;
 	u32 vs_offset, ps_offset;
 	u32 state_offset;
@@ -533,6 +555,8 @@ struct r600_blit {
 	struct radeon_ib *vb_ib;
 };
 
+void r600_blit_suspend(struct radeon_device *rdev);
+
 int radeon_ib_get(struct radeon_device *rdev, struct radeon_ib **ib);
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib);
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib);
@@ -600,32 +624,7 @@ struct radeon_cs_parser {
 
 extern int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx);
 extern int radeon_cs_finish_pages(struct radeon_cs_parser *p);
-
-
-static inline u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
-{
-	struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
-	u32 pg_idx, pg_offset;
-	u32 idx_value = 0;
-	int new_page;
-
-	pg_idx = (idx * 4) / PAGE_SIZE;
-	pg_offset = (idx * 4) % PAGE_SIZE;
-
-	if (ibc->kpage_idx[0] == pg_idx)
-		return ibc->kpage[0][pg_offset/4];
-	if (ibc->kpage_idx[1] == pg_idx)
-		return ibc->kpage[1][pg_offset/4];
-
-	new_page = radeon_cs_update_pages(p, pg_idx);
-	if (new_page < 0) {
-		p->parser_error = new_page;
-		return 0;
-	}
-
-	idx_value = ibc->kpage[new_page][pg_offset/4];
-	return idx_value;
-}
+extern u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx);
 
 struct radeon_cs_packet {
 	unsigned	idx;
@@ -868,7 +867,7 @@ struct radeon_pm {
 /*
  * Benchmarking
  */
-void radeon_benchmark(struct radeon_device *rdev);
+void radeon_benchmark(struct radeon_device *rdev, int test_number);
 
 
 /*
@@ -914,17 +913,17 @@ struct radeon_asic {
 	int (*copy_blit)(struct radeon_device *rdev,
 			 uint64_t src_offset,
 			 uint64_t dst_offset,
-			 unsigned num_pages,
+			 unsigned num_gpu_pages,
 			 struct radeon_fence *fence);
 	int (*copy_dma)(struct radeon_device *rdev,
 			uint64_t src_offset,
 			uint64_t dst_offset,
-			unsigned num_pages,
+			unsigned num_gpu_pages,
 			struct radeon_fence *fence);
 	int (*copy)(struct radeon_device *rdev,
 		    uint64_t src_offset,
 		    uint64_t dst_offset,
-		    unsigned num_pages,
+		    unsigned num_gpu_pages,
 		    struct radeon_fence *fence);
 	uint32_t (*get_engine_clock)(struct radeon_device *rdev);
 	void (*set_engine_clock)(struct radeon_device *rdev, uint32_t eng_clock);
@@ -1253,45 +1252,10 @@ int radeon_device_init(struct radeon_device *rdev,
 void radeon_device_fini(struct radeon_device *rdev);
 int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
 
-static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
-{
-	if (reg < rdev->rmmio_size)
-		return readl((rdev->rmmio) + reg);
-	else {
-		writel(reg, (rdev->rmmio) + RADEON_MM_INDEX);
-		return readl((rdev->rmmio) + RADEON_MM_DATA);
-	}
-}
-
-static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
-{
-	if (reg < rdev->rmmio_size)
-		writel(v, (rdev->rmmio) + reg);
-	else {
-		writel(reg, (rdev->rmmio) + RADEON_MM_INDEX);
-		writel(v, (rdev->rmmio) + RADEON_MM_DATA);
-	}
-}
-
-static inline u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
-{
-	if (reg < rdev->rio_mem_size)
-		return ioread32(rdev->rio_mem + reg);
-	else {
-		iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
-		return ioread32(rdev->rio_mem + RADEON_MM_DATA);
-	}
-}
-
-static inline void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
-	if (reg < rdev->rio_mem_size)
-		iowrite32(v, rdev->rio_mem + reg);
-	else {
-		iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
-		iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);
-	}
-}
+uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg);
+void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
+void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
 
 /*
  * Cast helper
@@ -1414,19 +1378,19 @@ void radeon_atombios_fini(struct radeon_device *rdev);
 /*
  * RING helpers.
  */
+
+#if DRM_DEBUG_CODE == 0
 static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
 {
-#if DRM_DEBUG_CODE
-	if (rdev->cp.count_dw <= 0) {
-		DRM_ERROR("radeon: writting more dword to ring than expected !\n");
-	}
-#endif
 	rdev->cp.ring[rdev->cp.wptr++] = v;
 	rdev->cp.wptr &= rdev->cp.ptr_mask;
 	rdev->cp.count_dw--;
 	rdev->cp.ring_free_dw--;
 }
-
+#else
+/* With debugging this is just too big to inline */
+void radeon_ring_write(struct radeon_device *rdev, uint32_t v);
+#endif
 
 /*
  * ASICs macro.
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index df8218bb83a6..e2944566ffea 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -765,9 +765,9 @@ static struct radeon_asic evergreen_asic = {
 	.get_vblank_counter = &evergreen_get_vblank_counter,
 	.fence_ring_emit = &r600_fence_ring_emit,
 	.cs_parse = &evergreen_cs_parse,
-	.copy_blit = &evergreen_copy_blit,
+	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
-	.copy = &evergreen_copy_blit,
+	.copy = &r600_copy_blit,
 	.get_engine_clock = &radeon_atom_get_engine_clock,
 	.set_engine_clock = &radeon_atom_set_engine_clock,
 	.get_memory_clock = &radeon_atom_get_memory_clock,
@@ -812,9 +812,9 @@ static struct radeon_asic sumo_asic = {
 	.get_vblank_counter = &evergreen_get_vblank_counter,
 	.fence_ring_emit = &r600_fence_ring_emit,
 	.cs_parse = &evergreen_cs_parse,
-	.copy_blit = &evergreen_copy_blit,
+	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
-	.copy = &evergreen_copy_blit,
+	.copy = &r600_copy_blit,
 	.get_engine_clock = &radeon_atom_get_engine_clock,
 	.set_engine_clock = &radeon_atom_set_engine_clock,
 	.get_memory_clock = NULL,
@@ -859,9 +859,9 @@ static struct radeon_asic btc_asic = {
 	.get_vblank_counter = &evergreen_get_vblank_counter,
 	.fence_ring_emit = &r600_fence_ring_emit,
 	.cs_parse = &evergreen_cs_parse,
-	.copy_blit = &evergreen_copy_blit,
+	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
-	.copy = &evergreen_copy_blit,
+	.copy = &r600_copy_blit,
 	.get_engine_clock = &radeon_atom_get_engine_clock,
 	.set_engine_clock = &radeon_atom_set_engine_clock,
 	.get_memory_clock = &radeon_atom_get_memory_clock,
@@ -906,9 +906,9 @@ static struct radeon_asic cayman_asic = {
 	.get_vblank_counter = &evergreen_get_vblank_counter,
 	.fence_ring_emit = &r600_fence_ring_emit,
 	.cs_parse = &evergreen_cs_parse,
-	.copy_blit = &evergreen_copy_blit,
+	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
-	.copy = &evergreen_copy_blit,
+	.copy = &r600_copy_blit,
 	.get_engine_clock = &radeon_atom_get_engine_clock,
 	.set_engine_clock = &radeon_atom_set_engine_clock,
 	.get_memory_clock = &radeon_atom_get_memory_clock,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 3d7a0d7c6a9a..85f14f0337e4 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -75,7 +75,7 @@ uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg);
 int r100_copy_blit(struct radeon_device *rdev,
 		   uint64_t src_offset,
 		   uint64_t dst_offset,
-		   unsigned num_pages,
+		   unsigned num_gpu_pages,
 		   struct radeon_fence *fence);
 int r100_set_surface_reg(struct radeon_device *rdev, int reg,
 			 uint32_t tiling_flags, uint32_t pitch,
@@ -143,7 +143,7 @@ extern void r100_post_page_flip(struct radeon_device *rdev, int crtc);
 extern int r200_copy_dma(struct radeon_device *rdev,
 			 uint64_t src_offset,
 			 uint64_t dst_offset,
-			 unsigned num_pages,
+			 unsigned num_gpu_pages,
 			 struct radeon_fence *fence);
 void r200_set_safe_registers(struct radeon_device *rdev);
 
@@ -311,7 +311,7 @@ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
 int r600_ring_test(struct radeon_device *rdev);
 int r600_copy_blit(struct radeon_device *rdev,
 		   uint64_t src_offset, uint64_t dst_offset,
-		   unsigned num_pages, struct radeon_fence *fence);
+		   unsigned num_gpu_pages, struct radeon_fence *fence);
 void r600_hpd_init(struct radeon_device *rdev);
 void r600_hpd_fini(struct radeon_device *rdev);
 bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
@@ -364,11 +364,11 @@ void r600_hdmi_init(struct drm_encoder *encoder);
 int r600_hdmi_buffer_status_changed(struct drm_encoder *encoder);
 void r600_hdmi_update_audio_settings(struct drm_encoder *encoder);
 /* r600 blit */
-int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes);
+int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages);
 void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence);
 void r600_kms_blit_copy(struct radeon_device *rdev,
 			u64 src_gpu_addr, u64 dst_gpu_addr,
-			int size_bytes);
+			unsigned num_gpu_pages);
 
 /*
  * rv770,rv730,rv710,rv740
@@ -401,9 +401,6 @@ bool evergreen_gpu_is_lockup(struct radeon_device *rdev);
 int evergreen_asic_reset(struct radeon_device *rdev);
 void evergreen_bandwidth_update(struct radeon_device *rdev);
 void evergreen_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
-int evergreen_copy_blit(struct radeon_device *rdev,
-			uint64_t src_offset, uint64_t dst_offset,
-			unsigned num_pages, struct radeon_fence *fence);
 void evergreen_hpd_init(struct radeon_device *rdev);
 void evergreen_hpd_fini(struct radeon_device *rdev);
 bool evergreen_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
@@ -421,13 +418,6 @@ extern u32 evergreen_page_flip(struct radeon_device *rdev, int crtc, u64 crtc_ba
 extern void evergreen_post_page_flip(struct radeon_device *rdev, int crtc);
 void evergreen_disable_interrupt_state(struct radeon_device *rdev);
 int evergreen_blit_init(struct radeon_device *rdev);
-void evergreen_blit_fini(struct radeon_device *rdev);
-/* evergreen blit */
-int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes);
-void evergreen_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence);
-void evergreen_kms_blit_copy(struct radeon_device *rdev,
-			     u64 src_gpu_addr, u64 dst_gpu_addr,
-			     int size_bytes);
 
 /*
  * cayman
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index bf2b61584cdb..08d0b94332e6 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -62,7 +62,7 @@ union atom_supported_devices {
 	struct _ATOM_SUPPORTED_DEVICES_INFO_2d1 info_2d1;
 };
 
-static inline struct radeon_i2c_bus_rec radeon_lookup_i2c_gpio(struct radeon_device *rdev,
+static struct radeon_i2c_bus_rec radeon_lookup_i2c_gpio(struct radeon_device *rdev,
 							       uint8_t id)
 {
 	struct atom_context *ctx = rdev->mode_info.atom_context;
@@ -228,7 +228,7 @@ void radeon_atombios_i2c_init(struct radeon_device *rdev)
 	}
 }
 
-static inline struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev,
+static struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev,
 							u8 id)
 {
 	struct atom_context *ctx = rdev->mode_info.atom_context;
diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c
index 10191d9372d8..5cafc90de7f8 100644
--- a/drivers/gpu/drm/radeon/radeon_benchmark.c
+++ b/drivers/gpu/drm/radeon/radeon_benchmark.c
@@ -26,21 +26,81 @@
 #include "radeon_reg.h"
 #include "radeon.h"
 
-void radeon_benchmark_move(struct radeon_device *rdev, unsigned bsize,
-			   unsigned sdomain, unsigned ddomain)
+#define RADEON_BENCHMARK_COPY_BLIT 1
+#define RADEON_BENCHMARK_COPY_DMA  0
+
+#define RADEON_BENCHMARK_ITERATIONS 1024
+#define RADEON_BENCHMARK_COMMON_MODES_N 17
+
+static int radeon_benchmark_do_move(struct radeon_device *rdev, unsigned size,
+				    uint64_t saddr, uint64_t daddr,
+				    int flag, int n)
+{
+	unsigned long start_jiffies;
+	unsigned long end_jiffies;
+	struct radeon_fence *fence = NULL;
+	int i, r;
+
+	start_jiffies = jiffies;
+	for (i = 0; i < n; i++) {
+		r = radeon_fence_create(rdev, &fence);
+		if (r)
+			return r;
+
+		switch (flag) {
+		case RADEON_BENCHMARK_COPY_DMA:
+			r = radeon_copy_dma(rdev, saddr, daddr,
+					    size / RADEON_GPU_PAGE_SIZE,
+					    fence);
+			break;
+		case RADEON_BENCHMARK_COPY_BLIT:
+			r = radeon_copy_blit(rdev, saddr, daddr,
+					     size / RADEON_GPU_PAGE_SIZE,
+					     fence);
+			break;
+		default:
+			DRM_ERROR("Unknown copy method\n");
+			r = -EINVAL;
+		}
+		if (r)
+			goto exit_do_move;
+		r = radeon_fence_wait(fence, false);
+		if (r)
+			goto exit_do_move;
+		radeon_fence_unref(&fence);
+	}
+	end_jiffies = jiffies;
+	r = jiffies_to_msecs(end_jiffies - start_jiffies);
+
+exit_do_move:
+	if (fence)
+		radeon_fence_unref(&fence);
+	return r;
+}
+
+
+static void radeon_benchmark_log_results(int n, unsigned size,
+					 unsigned int time,
+					 unsigned sdomain, unsigned ddomain,
+					 char *kind)
+{
+	unsigned int throughput = (n * (size >> 10)) / time;
+	DRM_INFO("radeon: %s %u bo moves of %u kB from"
+		 " %d to %d in %u ms, throughput: %u Mb/s or %u MB/s\n",
+		 kind, n, size >> 10, sdomain, ddomain, time,
+		 throughput * 8, throughput);
+}
+
+static void radeon_benchmark_move(struct radeon_device *rdev, unsigned size,
+				  unsigned sdomain, unsigned ddomain)
 {
 	struct radeon_bo *dobj = NULL;
 	struct radeon_bo *sobj = NULL;
-	struct radeon_fence *fence = NULL;
 	uint64_t saddr, daddr;
-	unsigned long start_jiffies;
-	unsigned long end_jiffies;
-	unsigned long time;
-	unsigned i, n, size;
-	int r;
+	int r, n;
+	unsigned int time;
 
-	size = bsize;
-	n = 1024;
+	n = RADEON_BENCHMARK_ITERATIONS;
 	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, sdomain, &sobj);
 	if (r) {
 		goto out_cleanup;
@@ -67,65 +127,26 @@ void radeon_benchmark_move(struct radeon_device *rdev, unsigned bsize,
 	}
 
 	/* r100 doesn't have dma engine so skip the test */
-	if (rdev->asic->copy_dma) {
-
-		start_jiffies = jiffies;
-		for (i = 0; i < n; i++) {
-			r = radeon_fence_create(rdev, &fence);
-			if (r) {
-				goto out_cleanup;
-			}
-
-			r = radeon_copy_dma(rdev, saddr, daddr,
-					size / RADEON_GPU_PAGE_SIZE, fence);
-
-			if (r) {
-				goto out_cleanup;
-			}
-			r = radeon_fence_wait(fence, false);
-			if (r) {
-				goto out_cleanup;
-			}
-			radeon_fence_unref(&fence);
-		}
-		end_jiffies = jiffies;
-		time = end_jiffies - start_jiffies;
-		time = jiffies_to_msecs(time);
-		if (time > 0) {
-			i = ((n * size) >> 10) / time;
-			printk(KERN_INFO "radeon: dma %u bo moves of %ukb from"
-					" %d to %d in %lums (%ukb/ms %ukb/s %uM/s)\n",
-					n, size >> 10,
-					sdomain, ddomain, time,
-					i, i * 1000, (i * 1000) / 1024);
-		}
-	}
-
-	start_jiffies = jiffies;
-	for (i = 0; i < n; i++) {
-		r = radeon_fence_create(rdev, &fence);
-		if (r) {
-			goto out_cleanup;
-		}
-		r = radeon_copy_blit(rdev, saddr, daddr, size / RADEON_GPU_PAGE_SIZE, fence);
-		if (r) {
-			goto out_cleanup;
-		}
-		r = radeon_fence_wait(fence, false);
-		if (r) {
+	/* also, VRAM-to-VRAM test doesn't make much sense for DMA */
+	/* skip it as well if domains are the same */
+	if ((rdev->asic->copy_dma) && (sdomain != ddomain)) {
+		time = radeon_benchmark_do_move(rdev, size, saddr, daddr,
+						RADEON_BENCHMARK_COPY_DMA, n);
+		if (time < 0)
 			goto out_cleanup;
-		}
-		radeon_fence_unref(&fence);
-	}
-	end_jiffies = jiffies;
-	time = end_jiffies - start_jiffies;
-	time = jiffies_to_msecs(time);
-	if (time > 0) {
-		i = ((n * size) >> 10) / time;
-		printk(KERN_INFO "radeon: blit %u bo moves of %ukb from %d to %d"
-		       " in %lums (%ukb/ms %ukb/s %uM/s)\n", n, size >> 10,
-		       sdomain, ddomain, time, i, i * 1000, (i * 1000) / 1024);
+		if (time > 0)
+			radeon_benchmark_log_results(n, size, time,
+						     sdomain, ddomain, "dma");
 	}
+
+	time = radeon_benchmark_do_move(rdev, size, saddr, daddr,
+					RADEON_BENCHMARK_COPY_BLIT, n);
+	if (time < 0)
+		goto out_cleanup;
+	if (time > 0)
+		radeon_benchmark_log_results(n, size, time,
+					     sdomain, ddomain, "blit");
+
 out_cleanup:
 	if (sobj) {
 		r = radeon_bo_reserve(sobj, false);
@@ -143,18 +164,92 @@ out_cleanup:
 		}
 		radeon_bo_unref(&dobj);
 	}
-	if (fence) {
-		radeon_fence_unref(&fence);
-	}
+
 	if (r) {
-		printk(KERN_WARNING "Error while benchmarking BO move.\n");
+		DRM_ERROR("Error while benchmarking BO move.\n");
 	}
 }
 
-void radeon_benchmark(struct radeon_device *rdev)
+void radeon_benchmark(struct radeon_device *rdev, int test_number)
 {
-	radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_GTT,
-			      RADEON_GEM_DOMAIN_VRAM);
-	radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM,
-			      RADEON_GEM_DOMAIN_GTT);
+	int i;
+	int common_modes[RADEON_BENCHMARK_COMMON_MODES_N] = {
+		640 * 480 * 4,
+		720 * 480 * 4,
+		800 * 600 * 4,
+		848 * 480 * 4,
+		1024 * 768 * 4,
+		1152 * 768 * 4,
+		1280 * 720 * 4,
+		1280 * 800 * 4,
+		1280 * 854 * 4,
+		1280 * 960 * 4,
+		1280 * 1024 * 4,
+		1440 * 900 * 4,
+		1400 * 1050 * 4,
+		1680 * 1050 * 4,
+		1600 * 1200 * 4,
+		1920 * 1080 * 4,
+		1920 * 1200 * 4
+	};
+
+	switch (test_number) {
+	case 1:
+		/* simple test, VRAM to GTT and GTT to VRAM */
+		radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_GTT,
+				      RADEON_GEM_DOMAIN_VRAM);
+		radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM,
+				      RADEON_GEM_DOMAIN_GTT);
+		break;
+	case 2:
+		/* simple test, VRAM to VRAM */
+		radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM,
+				      RADEON_GEM_DOMAIN_VRAM);
+		break;
+	case 3:
+		/* GTT to VRAM, buffer size sweep, powers of 2 */
+		for (i = 1; i <= 65536; i <<= 1)
+			radeon_benchmark_move(rdev, i*1024,
+					      RADEON_GEM_DOMAIN_GTT,
+					      RADEON_GEM_DOMAIN_VRAM);
+		break;
+	case 4:
+		/* VRAM to GTT, buffer size sweep, powers of 2 */
+		for (i = 1; i <= 65536; i <<= 1)
+			radeon_benchmark_move(rdev, i*1024,
+					      RADEON_GEM_DOMAIN_VRAM,
+					      RADEON_GEM_DOMAIN_GTT);
+		break;
+	case 5:
+		/* VRAM to VRAM, buffer size sweep, powers of 2 */
+		for (i = 1; i <= 65536; i <<= 1)
+			radeon_benchmark_move(rdev, i*1024,
+					      RADEON_GEM_DOMAIN_VRAM,
+					      RADEON_GEM_DOMAIN_VRAM);
+		break;
+	case 6:
+		/* GTT to VRAM, buffer size sweep, common modes */
+		for (i = 1; i < RADEON_BENCHMARK_COMMON_MODES_N; i++)
+			radeon_benchmark_move(rdev, common_modes[i],
+					      RADEON_GEM_DOMAIN_GTT,
+					      RADEON_GEM_DOMAIN_VRAM);
+		break;
+	case 7:
+		/* VRAM to GTT, buffer size sweep, common modes */
+		for (i = 1; i < RADEON_BENCHMARK_COMMON_MODES_N; i++)
+			radeon_benchmark_move(rdev, common_modes[i],
+					      RADEON_GEM_DOMAIN_VRAM,
+					      RADEON_GEM_DOMAIN_GTT);
+		break;
+	case 8:
+		/* VRAM to VRAM, buffer size sweep, common modes */
+		for (i = 1; i < RADEON_BENCHMARK_COMMON_MODES_N; i++)
+			radeon_benchmark_move(rdev, common_modes[i],
+					      RADEON_GEM_DOMAIN_VRAM,
+					      RADEON_GEM_DOMAIN_VRAM);
+		break;
+
+	default:
+		DRM_ERROR("Unknown benchmark\n");
+	}
 }
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 63675241c7ff..8bf83c4b4147 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -620,8 +620,8 @@ static struct radeon_i2c_bus_rec combios_setup_i2c_bus(struct radeon_device *rde
 		i2c.y_data_mask = 0x80;
 	} else {
 		/* default masks for ddc pads */
-		i2c.mask_clk_mask = RADEON_GPIO_EN_1;
-		i2c.mask_data_mask = RADEON_GPIO_EN_0;
+		i2c.mask_clk_mask = RADEON_GPIO_MASK_1;
+		i2c.mask_data_mask = RADEON_GPIO_MASK_0;
 		i2c.a_clk_mask = RADEON_GPIO_A_1;
 		i2c.a_data_mask = RADEON_GPIO_A_0;
 		i2c.en_clk_mask = RADEON_GPIO_EN_1;
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index c4b8741dbf58..dec6cbe6a0a6 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -68,11 +68,11 @@ void radeon_connector_hotplug(struct drm_connector *connector)
 	if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) {
 		int saved_dpms = connector->dpms;
 
-		if (radeon_hpd_sense(rdev, radeon_connector->hpd.hpd) &&
-		    radeon_dp_needs_link_train(radeon_connector))
-			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
-		else
+		/* Only turn off the display it it's physically disconnected */
+		if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd))
 			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
+		else if (radeon_dp_needs_link_train(radeon_connector))
+			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
 		connector->dpms = saved_dpms;
 	}
 }
@@ -724,6 +724,7 @@ radeon_vga_detect(struct drm_connector *connector, bool force)
 		dret = radeon_ddc_probe(radeon_connector,
 					radeon_connector->requires_extended_probe);
 	if (dret) {
+		radeon_connector->detected_by_load = false;
 		if (radeon_connector->edid) {
 			kfree(radeon_connector->edid);
 			radeon_connector->edid = NULL;
@@ -750,12 +751,21 @@ radeon_vga_detect(struct drm_connector *connector, bool force)
 	} else {
 
 		/* if we aren't forcing don't do destructive polling */
-		if (!force)
-			return connector->status;
+		if (!force) {
+			/* only return the previous status if we last
+			 * detected a monitor via load.
+			 */
+			if (radeon_connector->detected_by_load)
+				return connector->status;
+			else
+				return ret;
+		}
 
 		if (radeon_connector->dac_load_detect && encoder) {
 			encoder_funcs = encoder->helper_private;
 			ret = encoder_funcs->detect(encoder, connector);
+			if (ret == connector_status_connected)
+				radeon_connector->detected_by_load = true;
 		}
 	}
 
@@ -897,6 +907,7 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
 		dret = radeon_ddc_probe(radeon_connector,
 					radeon_connector->requires_extended_probe);
 	if (dret) {
+		radeon_connector->detected_by_load = false;
 		if (radeon_connector->edid) {
 			kfree(radeon_connector->edid);
 			radeon_connector->edid = NULL;
@@ -959,8 +970,18 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
 	if ((ret == connector_status_connected) && (radeon_connector->use_digital == true))
 		goto out;
 
+	/* DVI-D and HDMI-A are digital only */
+	if ((connector->connector_type == DRM_MODE_CONNECTOR_DVID) ||
+	    (connector->connector_type == DRM_MODE_CONNECTOR_HDMIA))
+		goto out;
+
+	/* if we aren't forcing don't do destructive polling */
 	if (!force) {
-		ret = connector->status;
+		/* only return the previous status if we last
+		 * detected a monitor via load.
+		 */
+		if (radeon_connector->detected_by_load)
+			ret = connector->status;
 		goto out;
 	}
 
@@ -984,6 +1005,7 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
 					ret = encoder_funcs->detect(encoder, connector);
 					if (ret == connector_status_connected) {
 						radeon_connector->use_digital = false;
+						radeon_connector->detected_by_load = true;
 					}
 				}
 				break;
@@ -1303,23 +1325,14 @@ radeon_dp_detect(struct drm_connector *connector, bool force)
 		/* get the DPCD from the bridge */
 		radeon_dp_getdpcd(radeon_connector);
 
-		if (radeon_hpd_sense(rdev, radeon_connector->hpd.hpd))
-			ret = connector_status_connected;
-		else {
-			/* need to setup ddc on the bridge */
-			if (encoder)
-				radeon_atom_ext_encoder_setup_ddc(encoder);
+		if (encoder) {
+			/* setup ddc on the bridge */
+			radeon_atom_ext_encoder_setup_ddc(encoder);
 			if (radeon_ddc_probe(radeon_connector,
-					     radeon_connector->requires_extended_probe))
+					     radeon_connector->requires_extended_probe)) /* try DDC */
 				ret = connector_status_connected;
-		}
-
-		if ((ret == connector_status_disconnected) &&
-		    radeon_connector->dac_load_detect) {
-			struct drm_encoder *encoder = radeon_best_single_encoder(connector);
-			struct drm_encoder_helper_funcs *encoder_funcs;
-			if (encoder) {
-				encoder_funcs = encoder->helper_private;
+			else if (radeon_connector->dac_load_detect) { /* try load detection */
+				struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
 				ret = encoder_funcs->detect(encoder, connector);
 			}
 		}
diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c
index 3189a7efb2e9..fde25c0d65a0 100644
--- a/drivers/gpu/drm/radeon/radeon_cursor.c
+++ b/drivers/gpu/drm/radeon/radeon_cursor.c
@@ -208,23 +208,25 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc,
 	int xorigin = 0, yorigin = 0;
 	int w = radeon_crtc->cursor_width;
 
-	if (x < 0)
-		xorigin = -x + 1;
-	if (y < 0)
-		yorigin = -y + 1;
-	if (xorigin >= CURSOR_WIDTH)
-		xorigin = CURSOR_WIDTH - 1;
-	if (yorigin >= CURSOR_HEIGHT)
-		yorigin = CURSOR_HEIGHT - 1;
-
 	if (ASIC_IS_AVIVO(rdev)) {
-		int i = 0;
-		struct drm_crtc *crtc_p;
-
 		/* avivo cursor are offset into the total surface */
 		x += crtc->x;
 		y += crtc->y;
-		DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y);
+	}
+	DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y);
+
+	if (x < 0) {
+		xorigin = min(-x, CURSOR_WIDTH - 1);
+		x = 0;
+	}
+	if (y < 0) {
+		yorigin = min(-y, CURSOR_HEIGHT - 1);
+		y = 0;
+	}
+
+	if (ASIC_IS_AVIVO(rdev)) {
+		int i = 0;
+		struct drm_crtc *crtc_p;
 
 		/* avivo cursor image can't end on 128 pixel boundary or
 		 * go past the end of the frame if both crtcs are enabled
@@ -253,16 +255,12 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc,
 
 	radeon_lock_cursor(crtc, true);
 	if (ASIC_IS_DCE4(rdev)) {
-		WREG32(EVERGREEN_CUR_POSITION + radeon_crtc->crtc_offset,
-		       ((xorigin ? 0 : x) << 16) |
-		       (yorigin ? 0 : y));
+		WREG32(EVERGREEN_CUR_POSITION + radeon_crtc->crtc_offset, (x << 16) | y);
 		WREG32(EVERGREEN_CUR_HOT_SPOT + radeon_crtc->crtc_offset, (xorigin << 16) | yorigin);
 		WREG32(EVERGREEN_CUR_SIZE + radeon_crtc->crtc_offset,
 		       ((w - 1) << 16) | (radeon_crtc->cursor_height - 1));
 	} else if (ASIC_IS_AVIVO(rdev)) {
-		WREG32(AVIVO_D1CUR_POSITION + radeon_crtc->crtc_offset,
-			     ((xorigin ? 0 : x) << 16) |
-			     (yorigin ? 0 : y));
+		WREG32(AVIVO_D1CUR_POSITION + radeon_crtc->crtc_offset, (x << 16) | y);
 		WREG32(AVIVO_D1CUR_HOT_SPOT + radeon_crtc->crtc_offset, (xorigin << 16) | yorigin);
 		WREG32(AVIVO_D1CUR_SIZE + radeon_crtc->crtc_offset,
 		       ((w - 1) << 16) | (radeon_crtc->cursor_height - 1));
@@ -276,8 +274,8 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc,
 			| yorigin));
 		WREG32(RADEON_CUR_HORZ_VERT_POSN + radeon_crtc->crtc_offset,
 		       (RADEON_CUR_LOCK
-			| ((xorigin ? 0 : x) << 16)
-			| (yorigin ? 0 : y)));
+			| (x << 16)
+			| y));
 		/* offset is from DISP(2)_BASE_ADDRESS */
 		WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, (radeon_crtc->legacy_cursor_offset +
 								      (yorigin * 256)));
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index b51e15725c6e..c33bc914d93d 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -750,14 +750,15 @@ int radeon_device_init(struct radeon_device *rdev,
 
 	/* set DMA mask + need_dma32 flags.
 	 * PCIE - can handle 40-bits.
-	 * IGP - can handle 40-bits (in theory)
+	 * IGP - can handle 40-bits
 	 * AGP - generally dma32 is safest
-	 * PCI - only dma32
+	 * PCI - dma32 for legacy pci gart, 40 bits on newer asics
 	 */
 	rdev->need_dma32 = false;
 	if (rdev->flags & RADEON_IS_AGP)
 		rdev->need_dma32 = true;
-	if (rdev->flags & RADEON_IS_PCI)
+	if ((rdev->flags & RADEON_IS_PCI) &&
+	    (rdev->family < CHIP_RS400))
 		rdev->need_dma32 = true;
 
 	dma_bits = rdev->need_dma32 ? 32 : 40;
@@ -817,7 +818,7 @@ int radeon_device_init(struct radeon_device *rdev,
 		radeon_test_moves(rdev);
 	}
 	if (radeon_benchmarking) {
-		radeon_benchmark(rdev);
+		radeon_benchmark(rdev, radeon_benchmarking);
 	}
 	return 0;
 }
@@ -981,7 +982,7 @@ struct radeon_debugfs {
 	struct drm_info_list	*files;
 	unsigned		num_files;
 };
-static struct radeon_debugfs _radeon_debugfs[RADEON_DEBUGFS_MAX_NUM_FILES];
+static struct radeon_debugfs _radeon_debugfs[RADEON_DEBUGFS_MAX_COMPONENTS];
 static unsigned _radeon_debugfs_count = 0;
 
 int radeon_debugfs_add_files(struct radeon_device *rdev,
@@ -996,14 +997,17 @@ int radeon_debugfs_add_files(struct radeon_device *rdev,
 			return 0;
 		}
 	}
-	if ((_radeon_debugfs_count + nfiles) > RADEON_DEBUGFS_MAX_NUM_FILES) {
-		DRM_ERROR("Reached maximum number of debugfs files.\n");
-		DRM_ERROR("Report so we increase RADEON_DEBUGFS_MAX_NUM_FILES.\n");
+
+	i = _radeon_debugfs_count + 1;
+	if (i > RADEON_DEBUGFS_MAX_COMPONENTS) {
+		DRM_ERROR("Reached maximum number of debugfs components.\n");
+		DRM_ERROR("Report so we increase "
+		          "RADEON_DEBUGFS_MAX_COMPONENTS.\n");
 		return -EINVAL;
 	}
 	_radeon_debugfs[_radeon_debugfs_count].files = files;
 	_radeon_debugfs[_radeon_debugfs_count].num_files = nfiles;
-	_radeon_debugfs_count++;
+	_radeon_debugfs_count = i;
 #if defined(CONFIG_DEBUG_FS)
 	drm_debugfs_create_files(files, nfiles,
 				 rdev->ddev->control->debugfs_root,
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 6cc17fb96a57..6adb3e58affd 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -473,8 +473,8 @@ pflip_cleanup:
 	spin_lock_irqsave(&dev->event_lock, flags);
 	radeon_crtc->unpin_work = NULL;
 unlock_free:
-	drm_gem_object_unreference_unlocked(old_radeon_fb->obj);
 	spin_unlock_irqrestore(&dev->event_lock, flags);
+	drm_gem_object_unreference_unlocked(old_radeon_fb->obj);
 	radeon_fence_unref(&work->fence);
 	kfree(work);
 
diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c
index 319d85d7e759..8a171b21b453 100644
--- a/drivers/gpu/drm/radeon/radeon_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_encoders.c
@@ -1507,7 +1507,14 @@ radeon_atom_encoder_dpms(struct drm_encoder *encoder, int mode)
 		switch (mode) {
 		case DRM_MODE_DPMS_ON:
 			args.ucAction = ATOM_ENABLE;
-			atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+			/* workaround for DVOOutputControl on some RS690 systems */
+			if (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_DDI) {
+				u32 reg = RREG32(RADEON_BIOS_3_SCRATCH);
+				WREG32(RADEON_BIOS_3_SCRATCH, reg & ~ATOM_S3_DFP2I_ACTIVE);
+				atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+				WREG32(RADEON_BIOS_3_SCRATCH, reg);
+			} else
+				atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
 			if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
 				args.ucAction = ATOM_LCD_BLON;
 				atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
@@ -1748,9 +1755,12 @@ static int radeon_atom_pick_dig_encoder(struct drm_encoder *encoder)
 	/* DCE4/5 */
 	if (ASIC_IS_DCE4(rdev)) {
 		dig = radeon_encoder->enc_priv;
-		if (ASIC_IS_DCE41(rdev))
-			return radeon_crtc->crtc_id;
-		else {
+		if (ASIC_IS_DCE41(rdev)) {
+			if (dig->linkb)
+				return 1;
+			else
+				return 0;
+		} else {
 			switch (radeon_encoder->encoder_id) {
 			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
 				if (dig->linkb)
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 7fd4e3e5ad5f..a488b502ec1f 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -263,7 +263,7 @@ retry:
 		 */
 		if (seq == rdev->fence_drv.last_seq && radeon_gpu_is_lockup(rdev)) {
 			/* good news we believe it's a lockup */
-			WARN(1, "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n",
+			printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n",
 			     fence->seq, seq);
 			/* FIXME: what should we do ? marking everyone
 			 * as signaled for now
diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c
index 6c111c1fa3f9..02cb7da4124d 100644
--- a/drivers/gpu/drm/radeon/radeon_i2c.c
+++ b/drivers/gpu/drm/radeon/radeon_i2c.c
@@ -81,8 +81,9 @@ bool radeon_ddc_probe(struct radeon_connector *radeon_connector, bool requires_e
 
 /* bit banging i2c */
 
-static void radeon_i2c_do_lock(struct radeon_i2c_chan *i2c, int lock_state)
+static int pre_xfer(struct i2c_adapter *i2c_adap)
 {
+	struct radeon_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
 	struct radeon_device *rdev = i2c->dev->dev_private;
 	struct radeon_i2c_bus_rec *rec = &i2c->rec;
 	uint32_t temp;
@@ -137,19 +138,30 @@ static void radeon_i2c_do_lock(struct radeon_i2c_chan *i2c, int lock_state)
 	WREG32(rec->en_data_reg, temp);
 
 	/* mask the gpio pins for software use */
-	temp = RREG32(rec->mask_clk_reg);
-	if (lock_state)
-		temp |= rec->mask_clk_mask;
-	else
-		temp &= ~rec->mask_clk_mask;
+	temp = RREG32(rec->mask_clk_reg) | rec->mask_clk_mask;
 	WREG32(rec->mask_clk_reg, temp);
 	temp = RREG32(rec->mask_clk_reg);
 
+	temp = RREG32(rec->mask_data_reg) | rec->mask_data_mask;
+	WREG32(rec->mask_data_reg, temp);
 	temp = RREG32(rec->mask_data_reg);
-	if (lock_state)
-		temp |= rec->mask_data_mask;
-	else
-		temp &= ~rec->mask_data_mask;
+
+	return 0;
+}
+
+static void post_xfer(struct i2c_adapter *i2c_adap)
+{
+	struct radeon_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
+	struct radeon_device *rdev = i2c->dev->dev_private;
+	struct radeon_i2c_bus_rec *rec = &i2c->rec;
+	uint32_t temp;
+
+	/* unmask the gpio pins for software use */
+	temp = RREG32(rec->mask_clk_reg) & ~rec->mask_clk_mask;
+	WREG32(rec->mask_clk_reg, temp);
+	temp = RREG32(rec->mask_clk_reg);
+
+	temp = RREG32(rec->mask_data_reg) & ~rec->mask_data_mask;
 	WREG32(rec->mask_data_reg, temp);
 	temp = RREG32(rec->mask_data_reg);
 }
@@ -209,22 +221,6 @@ static void set_data(void *i2c_priv, int data)
 	WREG32(rec->en_data_reg, val);
 }
 
-static int pre_xfer(struct i2c_adapter *i2c_adap)
-{
-	struct radeon_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
-
-	radeon_i2c_do_lock(i2c, 1);
-
-	return 0;
-}
-
-static void post_xfer(struct i2c_adapter *i2c_adap)
-{
-	struct radeon_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
-
-	radeon_i2c_do_lock(i2c, 0);
-}
-
 /* hw i2c */
 
 static u32 radeon_get_i2c_prescale(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/radeon_irq.c b/drivers/gpu/drm/radeon/radeon_irq.c
index 465746bd51b7..00da38424dfc 100644
--- a/drivers/gpu/drm/radeon/radeon_irq.c
+++ b/drivers/gpu/drm/radeon/radeon_irq.c
@@ -129,7 +129,7 @@ void radeon_disable_vblank(struct drm_device *dev, int crtc)
 	}
 }
 
-static inline u32 radeon_acknowledge_irqs(drm_radeon_private_t *dev_priv, u32 *r500_disp_int)
+static u32 radeon_acknowledge_irqs(drm_radeon_private_t *dev_priv, u32 *r500_disp_int)
 {
 	u32 irqs = RADEON_READ(RADEON_GEN_INT_STATUS);
 	u32 irq_mask = RADEON_SW_INT_TEST;
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_tv.c b/drivers/gpu/drm/radeon/radeon_legacy_tv.c
index c7b6cb428d09..b37ec0f1413a 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_tv.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_tv.c
@@ -864,7 +864,7 @@ void radeon_legacy_tv_adjust_crtc_reg(struct drm_encoder *encoder,
 	*v_sync_strt_wid = tmp;
 }
 
-static inline int get_post_div(int value)
+static int get_post_div(int value)
 {
 	int post_div;
 	switch (value) {
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 68820f5f6303..ed0178f03235 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -447,6 +447,7 @@ struct radeon_connector {
 	struct edid *edid;
 	void *con_priv;
 	bool dac_load_detect;
+	bool detected_by_load; /* if the connection status was determined by load */
 	uint16_t connector_object_id;
 	struct radeon_hpd hpd;
 	struct radeon_router router;
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 976c3b1b1b6e..138839312e8b 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -515,3 +515,45 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 	}
 	return 0;
 }
+
+int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait,
+		   enum ttm_buffer_usage usage)
+{
+	int r;
+
+	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
+	if (unlikely(r != 0))
+		return r;
+	spin_lock(&bo->tbo.bdev->fence_lock);
+	if (mem_type)
+		*mem_type = bo->tbo.mem.mem_type;
+	if (bo->tbo.sync_obj)
+		r = ttm_bo_wait(&bo->tbo, true, true, no_wait, usage);
+	spin_unlock(&bo->tbo.bdev->fence_lock);
+	ttm_bo_unreserve(&bo->tbo);
+	return r;
+}
+
+
+/**
+ * radeon_bo_reserve - reserve bo
+ * @bo:		bo structure
+ * @no_wait:		don't sleep while trying to reserve (return -EBUSY)
+ *
+ * Returns:
+ * -EBUSY: buffer is busy and @no_wait is true
+ * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by
+ * a signal. Release all buffer reservations and return to user-space.
+ */
+int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait)
+{
+	int r;
+
+	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
+	if (unlikely(r != 0)) {
+		if (r != -ERESTARTSYS)
+			dev_err(bo->rdev->dev, "%p reserve failed\n", bo);
+		return r;
+	}
+	return 0;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index a057a8e5a6e6..c6c8e43e6d9a 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -52,28 +52,7 @@ static inline unsigned radeon_mem_type_to_domain(u32 mem_type)
 	return 0;
 }
 
-/**
- * radeon_bo_reserve - reserve bo
- * @bo:		bo structure
- * @no_wait:		don't sleep while trying to reserve (return -EBUSY)
- *
- * Returns:
- * -EBUSY: buffer is busy and @no_wait is true
- * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by
- * a signal. Release all buffer reservations and return to user-space.
- */
-static inline int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait)
-{
-	int r;
-
-	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
-	if (unlikely(r != 0)) {
-		if (r != -ERESTARTSYS)
-			dev_err(bo->rdev->dev, "%p reserve failed\n", bo);
-		return r;
-	}
-	return 0;
-}
+int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait);
 
 static inline void radeon_bo_unreserve(struct radeon_bo *bo)
 {
@@ -118,23 +97,8 @@ static inline u64 radeon_bo_mmap_offset(struct radeon_bo *bo)
 	return bo->tbo.addr_space_offset;
 }
 
-static inline int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type,
-				 bool no_wait, enum ttm_buffer_usage usage)
-{
-	int r;
-
-	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
-	if (unlikely(r != 0))
-		return r;
-	spin_lock(&bo->tbo.bdev->fence_lock);
-	if (mem_type)
-		*mem_type = bo->tbo.mem.mem_type;
-	if (bo->tbo.sync_obj)
-		r = ttm_bo_wait(&bo->tbo, true, true, no_wait, usage);
-	spin_unlock(&bo->tbo.bdev->fence_lock);
-	ttm_bo_unreserve(&bo->tbo);
-	return r;
-}
+extern int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type,
+			  bool no_wait, enum ttm_buffer_usage usage);
 
 extern int radeon_bo_create(struct radeon_device *rdev,
 				unsigned long size, int byte_align,
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 08c0233db1b8..49d58202202c 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -35,6 +35,44 @@
 
 int radeon_debugfs_ib_init(struct radeon_device *rdev);
 
+u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
+{
+	struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
+	u32 pg_idx, pg_offset;
+	u32 idx_value = 0;
+	int new_page;
+
+	pg_idx = (idx * 4) / PAGE_SIZE;
+	pg_offset = (idx * 4) % PAGE_SIZE;
+
+	if (ibc->kpage_idx[0] == pg_idx)
+		return ibc->kpage[0][pg_offset/4];
+	if (ibc->kpage_idx[1] == pg_idx)
+		return ibc->kpage[1][pg_offset/4];
+
+	new_page = radeon_cs_update_pages(p, pg_idx);
+	if (new_page < 0) {
+		p->parser_error = new_page;
+		return 0;
+	}
+
+	idx_value = ibc->kpage[new_page][pg_offset/4];
+	return idx_value;
+}
+
+void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
+{
+#if DRM_DEBUG_CODE
+	if (rdev->cp.count_dw <= 0) {
+		DRM_ERROR("radeon: writting more dword to ring than expected !\n");
+	}
+#endif
+	rdev->cp.ring[rdev->cp.wptr++] = v;
+	rdev->cp.wptr &= rdev->cp.ptr_mask;
+	rdev->cp.count_dw--;
+	rdev->cp.ring_free_dw--;
+}
+
 void radeon_ib_bogus_cleanup(struct radeon_device *rdev)
 {
 	struct radeon_ib *ib, *n;
diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c
index 92e7ea73b7c5..e8422ae7fe74 100644
--- a/drivers/gpu/drm/radeon/radeon_state.c
+++ b/drivers/gpu/drm/radeon/radeon_state.c
@@ -272,12 +272,12 @@ static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
 	return 0;
 }
 
-static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
-						     dev_priv,
-						     struct drm_file *file_priv,
-						     drm_radeon_kcmd_buffer_t *
-						     cmdbuf,
-						     unsigned int *cmdsz)
+static int radeon_check_and_fixup_packet3(drm_radeon_private_t *
+					  dev_priv,
+					  struct drm_file *file_priv,
+					  drm_radeon_kcmd_buffer_t *
+					  cmdbuf,
+					  unsigned int *cmdsz)
 {
 	u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
 	u32 offset, narrays;
@@ -446,8 +446,8 @@ static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
  * CP hardware state programming functions
  */
 
-static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
-					     struct drm_clip_rect * box)
+static void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
+				  struct drm_clip_rect * box)
 {
 	RING_LOCALS;
 
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 9b86fb0e4122..0b5468bfaf54 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -277,7 +277,12 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
 		DRM_ERROR("Trying to move memory with CP turned off.\n");
 		return -EINVAL;
 	}
-	r = radeon_copy(rdev, old_start, new_start, new_mem->num_pages, fence);
+
+	BUILD_BUG_ON((PAGE_SIZE % RADEON_GPU_PAGE_SIZE) != 0);
+
+	r = radeon_copy(rdev, old_start, new_start,
+			new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */
+			fence);
 	/* FIXME: handle copy error */
 	r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL,
 				      evict, no_wait_reserve, no_wait_gpu, new_mem);
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 80928f9ff80f..87cc1feee3ac 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -539,55 +539,6 @@ static u32 r700_get_tile_pipe_to_backend_map(struct radeon_device *rdev,
 	return backend_map;
 }
 
-static void rv770_program_channel_remap(struct radeon_device *rdev)
-{
-	u32 tcp_chan_steer, mc_shared_chremap, tmp;
-	bool force_no_swizzle;
-
-	switch (rdev->family) {
-	case CHIP_RV770:
-	case CHIP_RV730:
-		force_no_swizzle = false;
-		break;
-	case CHIP_RV710:
-	case CHIP_RV740:
-	default:
-		force_no_swizzle = true;
-		break;
-	}
-
-	tmp = RREG32(MC_SHARED_CHMAP);
-	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
-	case 0:
-	case 1:
-	default:
-		/* default mapping */
-		mc_shared_chremap = 0x00fac688;
-		break;
-	case 2:
-	case 3:
-		if (force_no_swizzle)
-			mc_shared_chremap = 0x00fac688;
-		else
-			mc_shared_chremap = 0x00bbc298;
-		break;
-	}
-
-	if (rdev->family == CHIP_RV740)
-		tcp_chan_steer = 0x00ef2a60;
-	else
-		tcp_chan_steer = 0x00fac688;
-
-	/* RV770 CE has special chremap setup */
-	if (rdev->pdev->device == 0x944e) {
-		tcp_chan_steer = 0x00b08b08;
-		mc_shared_chremap = 0x00b08b08;
-	}
-
-	WREG32(TCP_CHAN_STEER, tcp_chan_steer);
-	WREG32(MC_SHARED_CHREMAP, mc_shared_chremap);
-}
-
 static void rv770_gpu_init(struct radeon_device *rdev)
 {
 	int i, j, num_qd_pipes;
@@ -788,8 +739,6 @@ static void rv770_gpu_init(struct radeon_device *rdev)
 	WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));
 	WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));
 
-	rv770_program_channel_remap(rdev);
-
 	WREG32(CC_RB_BACKEND_DISABLE,      cc_rb_backend_disable);
 	WREG32(CC_GC_SHADER_PIPE_CONFIG,   cc_gc_shader_pipe_config);
 	WREG32(GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
@@ -1238,8 +1187,6 @@ int rv770_resume(struct radeon_device *rdev)
 
 int rv770_suspend(struct radeon_device *rdev)
 {
-	int r;
-
 	r600_audio_fini(rdev);
 	/* FIXME: we should wait for ring to be empty */
 	r700_cp_stop(rdev);
@@ -1247,14 +1194,8 @@ int rv770_suspend(struct radeon_device *rdev)
 	r600_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	rv770_pcie_gart_disable(rdev);
-	/* unpin shaders bo */
-	if (rdev->r600_blit.shader_obj) {
-		r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
-		if (likely(r == 0)) {
-			radeon_bo_unpin(rdev->r600_blit.shader_obj);
-			radeon_bo_unreserve(rdev->r600_blit.shader_obj);
-		}
-	}
+	r600_blit_suspend(rdev);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index b824d9bdd87c..50fc8e4c9a31 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -394,7 +394,8 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 
 	if (!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED)) {
 		if (bo->ttm == NULL) {
-			ret = ttm_bo_add_ttm(bo, false);
+			bool zero = !(old_man->flags & TTM_MEMTYPE_FLAG_FIXED);
+			ret = ttm_bo_add_ttm(bo, zero);
 			if (ret)
 				goto out_err;
 		}
@@ -1295,6 +1296,7 @@ int ttm_bo_create(struct ttm_bo_device *bdev,
 
 	return ret;
 }
+EXPORT_SYMBOL(ttm_bo_create);
 
 static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
 					unsigned mem_type, bool allow_errors)
diff --git a/drivers/gpu/drm/vmwgfx/Kconfig b/drivers/gpu/drm/vmwgfx/Kconfig
index 30ad13344f7b..794ff67c5701 100644
--- a/drivers/gpu/drm/vmwgfx/Kconfig
+++ b/drivers/gpu/drm/vmwgfx/Kconfig
@@ -7,7 +7,8 @@ config DRM_VMWGFX
 	select FB_CFB_IMAGEBLIT
 	select DRM_TTM
 	help
-	  KMS enabled DRM driver for SVGA2 virtual hardware.
-
-	  If unsure say n. The compiled module will be
-	  called vmwgfx.ko
+	  Choose this option if you would like to run 3D acceleration
+	  in a VMware virtual machine.
+	  This is a KMS enabled DRM driver for the VMware SVGA2
+	  virtual hardware.
+	  The compiled module will be called "vmwgfx.ko".
diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
index 7d8e9d5d498c..586869c8c11f 100644
--- a/drivers/gpu/drm/vmwgfx/Makefile
+++ b/drivers/gpu/drm/vmwgfx/Makefile
@@ -5,6 +5,6 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
 	    vmwgfx_fb.o vmwgfx_ioctl.o vmwgfx_resource.o vmwgfx_buffer.o \
 	    vmwgfx_fifo.o vmwgfx_irq.o vmwgfx_ldu.o vmwgfx_ttm_glue.o \
 	    vmwgfx_overlay.o vmwgfx_marker.o vmwgfx_gmrid_manager.o \
-	    vmwgfx_fence.o
+	    vmwgfx_fence.o vmwgfx_dmabuf.o vmwgfx_scrn.o
 
 obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o
diff --git a/drivers/gpu/drm/vmwgfx/svga3d_reg.h b/drivers/gpu/drm/vmwgfx/svga3d_reg.h
index 77cb45331000..d0e085ee8249 100644
--- a/drivers/gpu/drm/vmwgfx/svga3d_reg.h
+++ b/drivers/gpu/drm/vmwgfx/svga3d_reg.h
@@ -57,7 +57,8 @@ typedef enum {
    SVGA3D_HWVERSION_WS6_B1    = SVGA3D_MAKE_HWVERSION(1, 1),
    SVGA3D_HWVERSION_FUSION_11 = SVGA3D_MAKE_HWVERSION(1, 4),
    SVGA3D_HWVERSION_WS65_B1   = SVGA3D_MAKE_HWVERSION(2, 0),
-   SVGA3D_HWVERSION_CURRENT   = SVGA3D_HWVERSION_WS65_B1,
+   SVGA3D_HWVERSION_WS8_B1    = SVGA3D_MAKE_HWVERSION(2, 1),
+   SVGA3D_HWVERSION_CURRENT   = SVGA3D_HWVERSION_WS8_B1,
 } SVGA3dHardwareVersion;
 
 /*
@@ -67,7 +68,8 @@ typedef enum {
 typedef uint32 SVGA3dBool; /* 32-bit Bool definition */
 #define SVGA3D_NUM_CLIPPLANES                   6
 #define SVGA3D_MAX_SIMULTANEOUS_RENDER_TARGETS  8
-
+#define SVGA3D_MAX_CONTEXT_IDS                  256
+#define SVGA3D_MAX_SURFACE_IDS                  (32 * 1024)
 
 /*
  * Surface formats.
@@ -79,76 +81,91 @@ typedef uint32 SVGA3dBool; /* 32-bit Bool definition */
  */
 
 typedef enum SVGA3dSurfaceFormat {
-   SVGA3D_FORMAT_INVALID = 0,
+   SVGA3D_FORMAT_INVALID               = 0,
 
-   SVGA3D_X8R8G8B8       = 1,
-   SVGA3D_A8R8G8B8       = 2,
+   SVGA3D_X8R8G8B8                     = 1,
+   SVGA3D_A8R8G8B8                     = 2,
 
-   SVGA3D_R5G6B5         = 3,
-   SVGA3D_X1R5G5B5       = 4,
-   SVGA3D_A1R5G5B5       = 5,
-   SVGA3D_A4R4G4B4       = 6,
+   SVGA3D_R5G6B5                       = 3,
+   SVGA3D_X1R5G5B5                     = 4,
+   SVGA3D_A1R5G5B5                     = 5,
+   SVGA3D_A4R4G4B4                     = 6,
 
-   SVGA3D_Z_D32          = 7,
-   SVGA3D_Z_D16          = 8,
-   SVGA3D_Z_D24S8        = 9,
-   SVGA3D_Z_D15S1        = 10,
+   SVGA3D_Z_D32                        = 7,
+   SVGA3D_Z_D16                        = 8,
+   SVGA3D_Z_D24S8                      = 9,
+   SVGA3D_Z_D15S1                      = 10,
 
-   SVGA3D_LUMINANCE8            = 11,
-   SVGA3D_LUMINANCE4_ALPHA4     = 12,
-   SVGA3D_LUMINANCE16           = 13,
-   SVGA3D_LUMINANCE8_ALPHA8     = 14,
+   SVGA3D_LUMINANCE8                   = 11,
+   SVGA3D_LUMINANCE4_ALPHA4            = 12,
+   SVGA3D_LUMINANCE16                  = 13,
+   SVGA3D_LUMINANCE8_ALPHA8            = 14,
 
-   SVGA3D_DXT1           = 15,
-   SVGA3D_DXT2           = 16,
-   SVGA3D_DXT3           = 17,
-   SVGA3D_DXT4           = 18,
-   SVGA3D_DXT5           = 19,
+   SVGA3D_DXT1                         = 15,
+   SVGA3D_DXT2                         = 16,
+   SVGA3D_DXT3                         = 17,
+   SVGA3D_DXT4                         = 18,
+   SVGA3D_DXT5                         = 19,
 
-   SVGA3D_BUMPU8V8       = 20,
-   SVGA3D_BUMPL6V5U5     = 21,
-   SVGA3D_BUMPX8L8V8U8   = 22,
-   SVGA3D_BUMPL8V8U8     = 23,
+   SVGA3D_BUMPU8V8                     = 20,
+   SVGA3D_BUMPL6V5U5                   = 21,
+   SVGA3D_BUMPX8L8V8U8                 = 22,
+   SVGA3D_BUMPL8V8U8                   = 23,
 
-   SVGA3D_ARGB_S10E5     = 24,   /* 16-bit floating-point ARGB */
-   SVGA3D_ARGB_S23E8     = 25,   /* 32-bit floating-point ARGB */
+   SVGA3D_ARGB_S10E5                   = 24,   /* 16-bit floating-point ARGB */
+   SVGA3D_ARGB_S23E8                   = 25,   /* 32-bit floating-point ARGB */
 
-   SVGA3D_A2R10G10B10    = 26,
+   SVGA3D_A2R10G10B10                  = 26,
 
    /* signed formats */
-   SVGA3D_V8U8           = 27,
-   SVGA3D_Q8W8V8U8       = 28,
-   SVGA3D_CxV8U8         = 29,
+   SVGA3D_V8U8                         = 27,
+   SVGA3D_Q8W8V8U8                     = 28,
+   SVGA3D_CxV8U8                       = 29,
 
    /* mixed formats */
-   SVGA3D_X8L8V8U8       = 30,
-   SVGA3D_A2W10V10U10    = 31,
+   SVGA3D_X8L8V8U8                     = 30,
+   SVGA3D_A2W10V10U10                  = 31,
 
-   SVGA3D_ALPHA8         = 32,
+   SVGA3D_ALPHA8                       = 32,
 
    /* Single- and dual-component floating point formats */
-   SVGA3D_R_S10E5        = 33,
-   SVGA3D_R_S23E8        = 34,
-   SVGA3D_RG_S10E5       = 35,
-   SVGA3D_RG_S23E8       = 36,
+   SVGA3D_R_S10E5                      = 33,
+   SVGA3D_R_S23E8                      = 34,
+   SVGA3D_RG_S10E5                     = 35,
+   SVGA3D_RG_S23E8                     = 36,
 
    /*
     * Any surface can be used as a buffer object, but SVGA3D_BUFFER is
     * the most efficient format to use when creating new surfaces
     * expressly for index or vertex data.
     */
-   SVGA3D_BUFFER         = 37,
 
-   SVGA3D_Z_D24X8        = 38,
+   SVGA3D_BUFFER                       = 37,
+
+   SVGA3D_Z_D24X8                      = 38,
 
-   SVGA3D_V16U16         = 39,
+   SVGA3D_V16U16                       = 39,
 
-   SVGA3D_G16R16         = 40,
-   SVGA3D_A16B16G16R16   = 41,
+   SVGA3D_G16R16                       = 40,
+   SVGA3D_A16B16G16R16                 = 41,
 
    /* Packed Video formats */
-   SVGA3D_UYVY           = 42,
-   SVGA3D_YUY2           = 43,
+   SVGA3D_UYVY                         = 42,
+   SVGA3D_YUY2                         = 43,
+
+   /* Planar video formats */
+   SVGA3D_NV12                         = 44,
+
+   /* Video format with alpha */
+   SVGA3D_AYUV                         = 45,
+
+   SVGA3D_BC4_UNORM                    = 108,
+   SVGA3D_BC5_UNORM                    = 111,
+
+   /* Advanced D3D9 depth formats. */
+   SVGA3D_Z_DF16                       = 118,
+   SVGA3D_Z_DF24                       = 119,
+   SVGA3D_Z_D24S8_INT                  = 120,
 
    SVGA3D_FORMAT_MAX
 } SVGA3dSurfaceFormat;
@@ -414,10 +431,20 @@ typedef enum {
    SVGA3D_RS_SRCBLENDALPHA             = 94,    /* SVGA3dBlendOp */
    SVGA3D_RS_DSTBLENDALPHA             = 95,    /* SVGA3dBlendOp */
    SVGA3D_RS_BLENDEQUATIONALPHA        = 96,    /* SVGA3dBlendEquation */
+   SVGA3D_RS_TRANSPARENCYANTIALIAS     = 97,    /* SVGA3dTransparencyAntialiasType */
+   SVGA3D_RS_LINEAA                    = 98,    /* SVGA3dBool */
+   SVGA3D_RS_LINEWIDTH                 = 99,    /* float */
    SVGA3D_RS_MAX
 } SVGA3dRenderStateName;
 
 typedef enum {
+   SVGA3D_TRANSPARENCYANTIALIAS_NORMAL            = 0,
+   SVGA3D_TRANSPARENCYANTIALIAS_ALPHATOCOVERAGE   = 1,
+   SVGA3D_TRANSPARENCYANTIALIAS_SUPERSAMPLE       = 2,
+   SVGA3D_TRANSPARENCYANTIALIAS_MAX
+} SVGA3dTransparencyAntialiasType;
+
+typedef enum {
    SVGA3D_VERTEXMATERIAL_NONE     = 0,    /* Use the value in the current material */
    SVGA3D_VERTEXMATERIAL_DIFFUSE  = 1,    /* Use the value in the diffuse component */
    SVGA3D_VERTEXMATERIAL_SPECULAR = 2,    /* Use the value in the specular component */
@@ -728,10 +755,10 @@ typedef enum {
    SVGA3D_TEX_FILTER_NEAREST        = 1,
    SVGA3D_TEX_FILTER_LINEAR         = 2,
    SVGA3D_TEX_FILTER_ANISOTROPIC    = 3,
-   SVGA3D_TEX_FILTER_FLATCUBIC      = 4, // Deprecated, not implemented
-   SVGA3D_TEX_FILTER_GAUSSIANCUBIC  = 5, // Deprecated, not implemented
-   SVGA3D_TEX_FILTER_PYRAMIDALQUAD  = 6, // Not currently implemented
-   SVGA3D_TEX_FILTER_GAUSSIANQUAD   = 7, // Not currently implemented
+   SVGA3D_TEX_FILTER_FLATCUBIC      = 4, /* Deprecated, not implemented */
+   SVGA3D_TEX_FILTER_GAUSSIANCUBIC  = 5, /* Deprecated, not implemented */
+   SVGA3D_TEX_FILTER_PYRAMIDALQUAD  = 6, /* Not currently implemented */
+   SVGA3D_TEX_FILTER_GAUSSIANQUAD   = 7, /* Not currently implemented */
    SVGA3D_TEX_FILTER_MAX
 } SVGA3dTextureFilter;
 
@@ -799,19 +826,19 @@ typedef enum {
 
 typedef enum {
    SVGA3D_DECLUSAGE_POSITION     = 0,
-   SVGA3D_DECLUSAGE_BLENDWEIGHT,       //  1
-   SVGA3D_DECLUSAGE_BLENDINDICES,      //  2
-   SVGA3D_DECLUSAGE_NORMAL,            //  3
-   SVGA3D_DECLUSAGE_PSIZE,             //  4
-   SVGA3D_DECLUSAGE_TEXCOORD,          //  5
-   SVGA3D_DECLUSAGE_TANGENT,           //  6
-   SVGA3D_DECLUSAGE_BINORMAL,          //  7
-   SVGA3D_DECLUSAGE_TESSFACTOR,        //  8
-   SVGA3D_DECLUSAGE_POSITIONT,         //  9
-   SVGA3D_DECLUSAGE_COLOR,             // 10
-   SVGA3D_DECLUSAGE_FOG,               // 11
-   SVGA3D_DECLUSAGE_DEPTH,             // 12
-   SVGA3D_DECLUSAGE_SAMPLE,            // 13
+   SVGA3D_DECLUSAGE_BLENDWEIGHT,       /*  1 */
+   SVGA3D_DECLUSAGE_BLENDINDICES,      /*  2 */
+   SVGA3D_DECLUSAGE_NORMAL,            /*  3 */
+   SVGA3D_DECLUSAGE_PSIZE,             /*  4 */
+   SVGA3D_DECLUSAGE_TEXCOORD,          /*  5 */
+   SVGA3D_DECLUSAGE_TANGENT,           /*  6 */
+   SVGA3D_DECLUSAGE_BINORMAL,          /*  7 */
+   SVGA3D_DECLUSAGE_TESSFACTOR,        /*  8 */
+   SVGA3D_DECLUSAGE_POSITIONT,         /*  9 */
+   SVGA3D_DECLUSAGE_COLOR,             /* 10 */
+   SVGA3D_DECLUSAGE_FOG,               /* 11 */
+   SVGA3D_DECLUSAGE_DEPTH,             /* 12 */
+   SVGA3D_DECLUSAGE_SAMPLE,            /* 13 */
    SVGA3D_DECLUSAGE_MAX
 } SVGA3dDeclUsage;
 
@@ -819,10 +846,10 @@ typedef enum {
    SVGA3D_DECLMETHOD_DEFAULT     = 0,
    SVGA3D_DECLMETHOD_PARTIALU,
    SVGA3D_DECLMETHOD_PARTIALV,
-   SVGA3D_DECLMETHOD_CROSSUV,          // Normal
+   SVGA3D_DECLMETHOD_CROSSUV,          /* Normal */
    SVGA3D_DECLMETHOD_UV,
-   SVGA3D_DECLMETHOD_LOOKUP,           // Lookup a displacement map
-   SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, // Lookup a pre-sampled displacement map
+   SVGA3D_DECLMETHOD_LOOKUP,           /* Lookup a displacement map */
+   SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, /* Lookup a pre-sampled displacement map */
 } SVGA3dDeclMethod;
 
 typedef enum {
@@ -930,7 +957,6 @@ typedef enum {
 } SVGA3dCubeFace;
 
 typedef enum {
-   SVGA3D_SHADERTYPE_COMPILED_DX8               = 0,
    SVGA3D_SHADERTYPE_VS                         = 1,
    SVGA3D_SHADERTYPE_PS                         = 2,
    SVGA3D_SHADERTYPE_MAX
@@ -968,12 +994,18 @@ typedef enum {
 } SVGA3dTransferType;
 
 /*
- * The maximum number vertex arrays we're guaranteed to support in
+ * The maximum number of vertex arrays we're guaranteed to support in
  * SVGA_3D_CMD_DRAWPRIMITIVES.
  */
 #define SVGA3D_MAX_VERTEX_ARRAYS   32
 
 /*
+ * The maximum number of primitive ranges we're guaranteed to support
+ * in SVGA_3D_CMD_DRAWPRIMITIVES.
+ */
+#define SVGA3D_MAX_DRAW_PRIMITIVE_RANGES 32
+
+/*
  * Identifiers for commands in the command FIFO.
  *
  * IDs between 1000 and 1039 (inclusive) were used by obsolete versions of
@@ -990,7 +1022,7 @@ typedef enum {
 #define SVGA_3D_CMD_LEGACY_BASE            1000
 #define SVGA_3D_CMD_BASE                   1040
 
-#define SVGA_3D_CMD_SURFACE_DEFINE         SVGA_3D_CMD_BASE + 0
+#define SVGA_3D_CMD_SURFACE_DEFINE         SVGA_3D_CMD_BASE + 0     /* Deprecated */
 #define SVGA_3D_CMD_SURFACE_DESTROY        SVGA_3D_CMD_BASE + 1
 #define SVGA_3D_CMD_SURFACE_COPY           SVGA_3D_CMD_BASE + 2
 #define SVGA_3D_CMD_SURFACE_STRETCHBLT     SVGA_3D_CMD_BASE + 3
@@ -1008,7 +1040,7 @@ typedef enum {
 #define SVGA_3D_CMD_SETVIEWPORT            SVGA_3D_CMD_BASE + 15
 #define SVGA_3D_CMD_SETCLIPPLANE           SVGA_3D_CMD_BASE + 16
 #define SVGA_3D_CMD_CLEAR                  SVGA_3D_CMD_BASE + 17
-#define SVGA_3D_CMD_PRESENT                SVGA_3D_CMD_BASE + 18    // Deprecated
+#define SVGA_3D_CMD_PRESENT                SVGA_3D_CMD_BASE + 18    /* Deprecated */
 #define SVGA_3D_CMD_SHADER_DEFINE          SVGA_3D_CMD_BASE + 19
 #define SVGA_3D_CMD_SHADER_DESTROY         SVGA_3D_CMD_BASE + 20
 #define SVGA_3D_CMD_SET_SHADER             SVGA_3D_CMD_BASE + 21
@@ -1018,9 +1050,13 @@ typedef enum {
 #define SVGA_3D_CMD_BEGIN_QUERY            SVGA_3D_CMD_BASE + 25
 #define SVGA_3D_CMD_END_QUERY              SVGA_3D_CMD_BASE + 26
 #define SVGA_3D_CMD_WAIT_FOR_QUERY         SVGA_3D_CMD_BASE + 27
-#define SVGA_3D_CMD_PRESENT_READBACK       SVGA_3D_CMD_BASE + 28    // Deprecated
+#define SVGA_3D_CMD_PRESENT_READBACK       SVGA_3D_CMD_BASE + 28    /* Deprecated */
 #define SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN SVGA_3D_CMD_BASE + 29
-#define SVGA_3D_CMD_MAX                    SVGA_3D_CMD_BASE + 30
+#define SVGA_3D_CMD_SURFACE_DEFINE_V2      SVGA_3D_CMD_BASE + 30
+#define SVGA_3D_CMD_GENERATE_MIPMAPS       SVGA_3D_CMD_BASE + 31
+#define SVGA_3D_CMD_ACTIVATE_SURFACE       SVGA_3D_CMD_BASE + 40
+#define SVGA_3D_CMD_DEACTIVATE_SURFACE     SVGA_3D_CMD_BASE + 41
+#define SVGA_3D_CMD_MAX                    SVGA_3D_CMD_BASE + 42
 
 #define SVGA_3D_CMD_FUTURE_MAX             2000
 
@@ -1031,9 +1067,9 @@ typedef enum {
 typedef struct {
    union {
       struct {
-         uint16  function;       // SVGA3dFogFunction
-         uint8   type;           // SVGA3dFogType
-         uint8   base;           // SVGA3dFogBase
+         uint16  function;       /* SVGA3dFogFunction */
+         uint8   type;           /* SVGA3dFogType */
+         uint8   base;           /* SVGA3dFogBase */
       };
       uint32     uintValue;
    };
@@ -1109,6 +1145,8 @@ typedef enum {
    SVGA3D_SURFACE_HINT_RENDERTARGET    = (1 << 6),
    SVGA3D_SURFACE_HINT_DEPTHSTENCIL    = (1 << 7),
    SVGA3D_SURFACE_HINT_WRITEONLY       = (1 << 8),
+   SVGA3D_SURFACE_MASKABLE_ANTIALIAS   = (1 << 9),
+   SVGA3D_SURFACE_AUTOGENMIPMAPS       = (1 << 10),
 } SVGA3dSurfaceFlags;
 
 typedef
@@ -1121,6 +1159,12 @@ struct {
    uint32                      sid;
    SVGA3dSurfaceFlags          surfaceFlags;
    SVGA3dSurfaceFormat         format;
+   /*
+    * If surfaceFlags has SVGA3D_SURFACE_CUBEMAP bit set, all SVGA3dSurfaceFace
+    * structures must have the same value of numMipLevels field.
+    * Otherwise, all but the first SVGA3dSurfaceFace structures must have the
+    * numMipLevels set to 0.
+    */
    SVGA3dSurfaceFace           face[SVGA3D_MAX_SURFACE_FACES];
    /*
     * Followed by an SVGA3dSize structure for each mip level in each face.
@@ -1135,6 +1179,31 @@ struct {
 
 typedef
 struct {
+   uint32                      sid;
+   SVGA3dSurfaceFlags          surfaceFlags;
+   SVGA3dSurfaceFormat         format;
+   /*
+    * If surfaceFlags has SVGA3D_SURFACE_CUBEMAP bit set, all SVGA3dSurfaceFace
+    * structures must have the same value of numMipLevels field.
+    * Otherwise, all but the first SVGA3dSurfaceFace structures must have the
+    * numMipLevels set to 0.
+    */
+   SVGA3dSurfaceFace           face[SVGA3D_MAX_SURFACE_FACES];
+   uint32                      multisampleCount;
+   SVGA3dTextureFilter         autogenFilter;
+   /*
+    * Followed by an SVGA3dSize structure for each mip level in each face.
+    *
+    * A note on surface sizes: Sizes are always specified in pixels,
+    * even if the true surface size is not a multiple of the minimum
+    * block size of the surface's format. For example, a 3x3x1 DXT1
+    * compressed texture would actually be stored as a 4x4x1 image in
+    * memory.
+    */
+} SVGA3dCmdDefineSurface_v2;     /* SVGA_3D_CMD_SURFACE_DEFINE_V2 */
+
+typedef
+struct {
    uint32               sid;
 } SVGA3dCmdDestroySurface;      /* SVGA_3D_CMD_SURFACE_DESTROY */
 
@@ -1474,10 +1543,12 @@ struct {
     * SVGA3dCmdDrawPrimitives structure. In order,
     * they are:
     *
-    * 1. SVGA3dVertexDecl, quantity 'numVertexDecls'
-    * 2. SVGA3dPrimitiveRange, quantity 'numRanges'
+    * 1. SVGA3dVertexDecl, quantity 'numVertexDecls', but no more than
+    *    SVGA3D_MAX_VERTEX_ARRAYS;
+    * 2. SVGA3dPrimitiveRange, quantity 'numRanges', but no more than
+    *    SVGA3D_MAX_DRAW_PRIMITIVE_RANGES;
     * 3. Optionally, SVGA3dVertexDivisor, quantity 'numVertexDecls' (contains
-    *    the frequency divisor for this the corresponding vertex decl)
+    *    the frequency divisor for the corresponding vertex decl).
     */
 } SVGA3dCmdDrawPrimitives;      /* SVGA_3D_CMD_DRAWPRIMITIVES */
 
@@ -1671,6 +1742,12 @@ struct {
    /* Clipping: zero or more SVGASignedRects follow */
 } SVGA3dCmdBlitSurfaceToScreen;         /* SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN */
 
+typedef
+struct {
+   uint32               sid;
+   SVGA3dTextureFilter  filter;
+} SVGA3dCmdGenerateMipmaps;             /* SVGA_3D_CMD_GENERATE_MIPMAPS */
+
 
 /*
  * Capability query index.
@@ -1774,6 +1851,32 @@ typedef enum {
    SVGA3D_DEVCAP_SURFACEFMT_A16B16G16R16           = 67,
    SVGA3D_DEVCAP_SURFACEFMT_UYVY                   = 68,
    SVGA3D_DEVCAP_SURFACEFMT_YUY2                   = 69,
+   SVGA3D_DEVCAP_MULTISAMPLE_NONMASKABLESAMPLES    = 70,
+   SVGA3D_DEVCAP_MULTISAMPLE_MASKABLESAMPLES       = 71,
+   SVGA3D_DEVCAP_ALPHATOCOVERAGE                   = 72,
+   SVGA3D_DEVCAP_SUPERSAMPLE                       = 73,
+   SVGA3D_DEVCAP_AUTOGENMIPMAPS                    = 74,
+   SVGA3D_DEVCAP_SURFACEFMT_NV12                   = 75,
+   SVGA3D_DEVCAP_SURFACEFMT_AYUV                   = 76,
+
+   /*
+    * This is the maximum number of SVGA context IDs that the guest
+    * can define using SVGA_3D_CMD_CONTEXT_DEFINE.
+    */
+   SVGA3D_DEVCAP_MAX_CONTEXT_IDS                   = 77,
+
+   /*
+    * This is the maximum number of SVGA surface IDs that the guest
+    * can define using SVGA_3D_CMD_SURFACE_DEFINE*.
+    */
+   SVGA3D_DEVCAP_MAX_SURFACE_IDS                   = 78,
+
+   SVGA3D_DEVCAP_SURFACEFMT_Z_DF16                 = 79,
+   SVGA3D_DEVCAP_SURFACEFMT_Z_DF24                 = 80,
+   SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8_INT            = 81,
+
+   SVGA3D_DEVCAP_SURFACEFMT_BC4_UNORM              = 82,
+   SVGA3D_DEVCAP_SURFACEFMT_BC5_UNORM              = 83,
 
    /*
     * Don't add new caps into the previous section; the values in this
diff --git a/drivers/gpu/drm/vmwgfx/svga_escape.h b/drivers/gpu/drm/vmwgfx/svga_escape.h
index 7b85e9b8c854..8e8d9682e018 100644
--- a/drivers/gpu/drm/vmwgfx/svga_escape.h
+++ b/drivers/gpu/drm/vmwgfx/svga_escape.h
@@ -75,7 +75,7 @@
  */
 
 #define SVGA_ESCAPE_VMWARE_HINT               0x00030000
-#define SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN    0x00030001  // Deprecated
+#define SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN    0x00030001  /* Deprecated */
 
 typedef
 struct {
diff --git a/drivers/gpu/drm/vmwgfx/svga_overlay.h b/drivers/gpu/drm/vmwgfx/svga_overlay.h
index f753d73c14b4..f38416fcb046 100644
--- a/drivers/gpu/drm/vmwgfx/svga_overlay.h
+++ b/drivers/gpu/drm/vmwgfx/svga_overlay.h
@@ -38,9 +38,9 @@
  * Video formats we support
  */
 
-#define VMWARE_FOURCC_YV12 0x32315659 // 'Y' 'V' '1' '2'
-#define VMWARE_FOURCC_YUY2 0x32595559 // 'Y' 'U' 'Y' '2'
-#define VMWARE_FOURCC_UYVY 0x59565955 // 'U' 'Y' 'V' 'Y'
+#define VMWARE_FOURCC_YV12 0x32315659 /* 'Y' 'V' '1' '2' */
+#define VMWARE_FOURCC_YUY2 0x32595559 /* 'Y' 'U' 'Y' '2' */
+#define VMWARE_FOURCC_UYVY 0x59565955 /* 'U' 'Y' 'V' 'Y' */
 
 typedef enum {
    SVGA_OVERLAY_FORMAT_INVALID = 0,
@@ -68,7 +68,7 @@ struct SVGAEscapeVideoSetRegs {
       uint32 streamId;
    } header;
 
-   // May include zero or more items.
+   /* May include zero or more items. */
    struct {
       uint32 registerId;
       uint32 value;
@@ -134,12 +134,12 @@ struct {
  */
 
 static inline bool
-VMwareVideoGetAttributes(const SVGAOverlayFormat format,    // IN
-                         uint32 *width,                     // IN / OUT
-                         uint32 *height,                    // IN / OUT
-                         uint32 *size,                      // OUT
-                         uint32 *pitches,                   // OUT (optional)
-                         uint32 *offsets)                   // OUT (optional)
+VMwareVideoGetAttributes(const SVGAOverlayFormat format,    /* IN */
+                         uint32 *width,                     /* IN / OUT */
+                         uint32 *height,                    /* IN / OUT */
+                         uint32 *size,                      /* OUT */
+                         uint32 *pitches,                   /* OUT (optional) */
+                         uint32 *offsets)                   /* OUT (optional) */
 {
     int tmp;
 
@@ -198,4 +198,4 @@ VMwareVideoGetAttributes(const SVGAOverlayFormat format,    // IN
     return true;
 }
 
-#endif // _SVGA_OVERLAY_H_
+#endif /* _SVGA_OVERLAY_H_ */
diff --git a/drivers/gpu/drm/vmwgfx/svga_reg.h b/drivers/gpu/drm/vmwgfx/svga_reg.h
index ec5aad9b6ed3..01f63cb49678 100644
--- a/drivers/gpu/drm/vmwgfx/svga_reg.h
+++ b/drivers/gpu/drm/vmwgfx/svga_reg.h
@@ -276,7 +276,7 @@ enum {
  * possible.
  */
 #define SVGA_GMR_NULL         ((uint32) -1)
-#define SVGA_GMR_FRAMEBUFFER  ((uint32) -2)  // Guest Framebuffer (GFB)
+#define SVGA_GMR_FRAMEBUFFER  ((uint32) -2)  /* Guest Framebuffer (GFB) */
 
 typedef
 struct SVGAGuestMemDescriptor {
@@ -317,13 +317,35 @@ struct SVGAGMRImageFormat {
       struct {
          uint32 bitsPerPixel : 8;
          uint32 colorDepth   : 8;
-         uint32 reserved     : 16;  // Must be zero
+         uint32 reserved     : 16;  /* Must be zero */
       };
 
       uint32 value;
    };
 } SVGAGMRImageFormat;
 
+typedef
+struct SVGAGuestImage {
+   SVGAGuestPtr         ptr;
+
+   /*
+    * A note on interpretation of pitch: This value of pitch is the
+    * number of bytes between vertically adjacent image
+    * blocks. Normally this is the number of bytes between the first
+    * pixel of two adjacent scanlines. With compressed textures,
+    * however, this may represent the number of bytes between
+    * compression blocks rather than between rows of pixels.
+    *
+    * XXX: Compressed textures currently must be tightly packed in guest memory.
+    *
+    * If the image is 1-dimensional, pitch is ignored.
+    *
+    * If 'pitch' is zero, the SVGA3D device calculates a pitch value
+    * assuming each row of blocks is tightly packed.
+    */
+   uint32 pitch;
+} SVGAGuestImage;
+
 /*
  * SVGAColorBGRX --
  *
@@ -339,7 +361,7 @@ struct SVGAColorBGRX {
          uint32 b : 8;
          uint32 g : 8;
          uint32 r : 8;
-         uint32 x : 8;  // Unused
+         uint32 x : 8;  /* Unused */
       };
 
       uint32 value;
@@ -395,16 +417,16 @@ struct SVGASignedPoint {
 #define SVGA_CAP_NONE               0x00000000
 #define SVGA_CAP_RECT_COPY          0x00000002
 #define SVGA_CAP_CURSOR             0x00000020
-#define SVGA_CAP_CURSOR_BYPASS      0x00000040   // Legacy (Use Cursor Bypass 3 instead)
-#define SVGA_CAP_CURSOR_BYPASS_2    0x00000080   // Legacy (Use Cursor Bypass 3 instead)
+#define SVGA_CAP_CURSOR_BYPASS      0x00000040   /* Legacy (Use Cursor Bypass 3 instead) */
+#define SVGA_CAP_CURSOR_BYPASS_2    0x00000080   /* Legacy (Use Cursor Bypass 3 instead) */
 #define SVGA_CAP_8BIT_EMULATION     0x00000100
 #define SVGA_CAP_ALPHA_CURSOR       0x00000200
 #define SVGA_CAP_3D                 0x00004000
 #define SVGA_CAP_EXTENDED_FIFO      0x00008000
-#define SVGA_CAP_MULTIMON           0x00010000   // Legacy multi-monitor support
+#define SVGA_CAP_MULTIMON           0x00010000   /* Legacy multi-monitor support */
 #define SVGA_CAP_PITCHLOCK          0x00020000
 #define SVGA_CAP_IRQMASK            0x00040000
-#define SVGA_CAP_DISPLAY_TOPOLOGY   0x00080000   // Legacy multi-monitor support
+#define SVGA_CAP_DISPLAY_TOPOLOGY   0x00080000   /* Legacy multi-monitor support */
 #define SVGA_CAP_GMR                0x00100000
 #define SVGA_CAP_TRACES             0x00200000
 #define SVGA_CAP_GMR2               0x00400000
@@ -453,7 +475,7 @@ enum {
 
    SVGA_FIFO_CAPABILITIES = 4,
    SVGA_FIFO_FLAGS,
-   // Valid with SVGA_FIFO_CAP_FENCE:
+   /* Valid with SVGA_FIFO_CAP_FENCE: */
    SVGA_FIFO_FENCE,
 
    /*
@@ -466,33 +488,47 @@ enum {
     * extended FIFO.
     */
 
-   // Valid if exists (i.e. if extended FIFO enabled):
+   /* Valid if exists (i.e. if extended FIFO enabled): */
    SVGA_FIFO_3D_HWVERSION,       /* See SVGA3dHardwareVersion in svga3d_reg.h */
-   // Valid with SVGA_FIFO_CAP_PITCHLOCK:
+   /* Valid with SVGA_FIFO_CAP_PITCHLOCK: */
    SVGA_FIFO_PITCHLOCK,
 
-   // Valid with SVGA_FIFO_CAP_CURSOR_BYPASS_3:
+   /* Valid with SVGA_FIFO_CAP_CURSOR_BYPASS_3: */
    SVGA_FIFO_CURSOR_ON,          /* Cursor bypass 3 show/hide register */
    SVGA_FIFO_CURSOR_X,           /* Cursor bypass 3 x register */
    SVGA_FIFO_CURSOR_Y,           /* Cursor bypass 3 y register */
    SVGA_FIFO_CURSOR_COUNT,       /* Incremented when any of the other 3 change */
    SVGA_FIFO_CURSOR_LAST_UPDATED,/* Last time the host updated the cursor */
 
-   // Valid with SVGA_FIFO_CAP_RESERVE:
+   /* Valid with SVGA_FIFO_CAP_RESERVE: */
    SVGA_FIFO_RESERVED,           /* Bytes past NEXT_CMD with real contents */
 
    /*
-    * Valid with SVGA_FIFO_CAP_SCREEN_OBJECT:
+    * Valid with SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2:
     *
     * By default this is SVGA_ID_INVALID, to indicate that the cursor
     * coordinates are specified relative to the virtual root. If this
     * is set to a specific screen ID, cursor position is reinterpreted
-    * as a signed offset relative to that screen's origin. This is the
-    * only way to place the cursor on a non-rooted screen.
+    * as a signed offset relative to that screen's origin.
     */
    SVGA_FIFO_CURSOR_SCREEN_ID,
 
    /*
+    * Valid with SVGA_FIFO_CAP_DEAD
+    *
+    * An arbitrary value written by the host, drivers should not use it.
+    */
+   SVGA_FIFO_DEAD,
+
+   /*
+    * Valid with SVGA_FIFO_CAP_3D_HWVERSION_REVISED:
+    *
+    * Contains 3D HWVERSION (see SVGA3dHardwareVersion in svga3d_reg.h)
+    * on platforms that can enforce graphics resource limits.
+    */
+   SVGA_FIFO_3D_HWVERSION_REVISED,
+
+   /*
     * XXX: The gap here, up until SVGA_FIFO_3D_CAPS, can be used for new
     * registers, but this must be done carefully and with judicious use of
     * capability bits, since comparisons based on SVGA_FIFO_MIN aren't
@@ -530,7 +566,7 @@ enum {
     * sets SVGA_FIFO_MIN high enough to leave room for them.
     */
 
-   // Valid if register exists:
+   /* Valid if register exists: */
    SVGA_FIFO_GUEST_3D_HWVERSION, /* Guest driver's 3D version */
    SVGA_FIFO_FENCE_GOAL,         /* Matching target for SVGA_IRQFLAG_FENCE_GOAL */
    SVGA_FIFO_BUSY,               /* See "FIFO Synchronization Registers" */
@@ -731,6 +767,37 @@ enum {
  *
  *       - When a screen is resized, either using Screen Object commands or
  *         legacy multimon registers, its contents are preserved.
+ *
+ * SVGA_FIFO_CAP_GMR2 --
+ *
+ *    Provides new commands to define and remap guest memory regions (GMR).
+ *
+ *    New 2D commands:
+ *       DEFINE_GMR2, REMAP_GMR2.
+ *
+ * SVGA_FIFO_CAP_3D_HWVERSION_REVISED --
+ *
+ *    Indicates new register SVGA_FIFO_3D_HWVERSION_REVISED exists.
+ *    This register may replace SVGA_FIFO_3D_HWVERSION on platforms
+ *    that enforce graphics resource limits.  This allows the platform
+ *    to clear SVGA_FIFO_3D_HWVERSION and disable 3D in legacy guest
+ *    drivers that do not limit their resources.
+ *
+ *    Note this is an alias to SVGA_FIFO_CAP_GMR2 because these indicators
+ *    are codependent (and thus we use a single capability bit).
+ *
+ * SVGA_FIFO_CAP_SCREEN_OBJECT_2 --
+ *
+ *    Modifies the DEFINE_SCREEN command to include a guest provided
+ *    backing store in GMR memory and the bytesPerLine for the backing
+ *    store.  This capability requires the use of a backing store when
+ *    creating screen objects.  However if SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    is present then backing stores are optional.
+ *
+ * SVGA_FIFO_CAP_DEAD --
+ *
+ *    Drivers should not use this cap bit.  This cap bit can not be
+ *    reused since some hosts already expose it.
  */
 
 #define SVGA_FIFO_CAP_NONE                  0
@@ -742,6 +809,10 @@ enum {
 #define SVGA_FIFO_CAP_ESCAPE            (1<<5)
 #define SVGA_FIFO_CAP_RESERVE           (1<<6)
 #define SVGA_FIFO_CAP_SCREEN_OBJECT     (1<<7)
+#define SVGA_FIFO_CAP_GMR2              (1<<8)
+#define SVGA_FIFO_CAP_3D_HWVERSION_REVISED  SVGA_FIFO_CAP_GMR2
+#define SVGA_FIFO_CAP_SCREEN_OBJECT_2   (1<<9)
+#define SVGA_FIFO_CAP_DEAD              (1<<10)
 
 
 /*
@@ -752,7 +823,7 @@ enum {
 
 #define SVGA_FIFO_FLAG_NONE                 0
 #define SVGA_FIFO_FLAG_ACCELFRONT       (1<<0)
-#define SVGA_FIFO_FLAG_RESERVED        (1<<31) // Internal use only
+#define SVGA_FIFO_FLAG_RESERVED        (1<<31) /* Internal use only */
 
 /*
  * FIFO reservation sentinel value
@@ -785,22 +856,22 @@ enum {
    SVGA_VIDEO_DATA_OFFSET,
    SVGA_VIDEO_FORMAT,
    SVGA_VIDEO_COLORKEY,
-   SVGA_VIDEO_SIZE,          // Deprecated
+   SVGA_VIDEO_SIZE,          /* Deprecated */
    SVGA_VIDEO_WIDTH,
    SVGA_VIDEO_HEIGHT,
    SVGA_VIDEO_SRC_X,
    SVGA_VIDEO_SRC_Y,
    SVGA_VIDEO_SRC_WIDTH,
    SVGA_VIDEO_SRC_HEIGHT,
-   SVGA_VIDEO_DST_X,         // Signed int32
-   SVGA_VIDEO_DST_Y,         // Signed int32
+   SVGA_VIDEO_DST_X,         /* Signed int32 */
+   SVGA_VIDEO_DST_Y,         /* Signed int32 */
    SVGA_VIDEO_DST_WIDTH,
    SVGA_VIDEO_DST_HEIGHT,
    SVGA_VIDEO_PITCH_1,
    SVGA_VIDEO_PITCH_2,
    SVGA_VIDEO_PITCH_3,
-   SVGA_VIDEO_DATA_GMRID,    // Optional, defaults to SVGA_GMR_FRAMEBUFFER
-   SVGA_VIDEO_DST_SCREEN_ID, // Optional, defaults to virtual coords (SVGA_ID_INVALID)
+   SVGA_VIDEO_DATA_GMRID,    /* Optional, defaults to SVGA_GMR_FRAMEBUFFER */
+   SVGA_VIDEO_DST_SCREEN_ID, /* Optional, defaults to virtual coords (SVGA_ID_INVALID) */
    SVGA_VIDEO_NUM_REGS
 };
 
@@ -851,15 +922,51 @@ typedef struct SVGAOverlayUnit {
  *    compatibility. New flags can be added, and the struct may grow,
  *    but existing fields must retain their meaning.
  *
+ *    Added with SVGA_FIFO_CAP_SCREEN_OBJECT_2 are required fields of
+ *    a SVGAGuestPtr that is used to back the screen contents.  This
+ *    memory must come from the GFB.  The guest is not allowed to
+ *    access the memory and doing so will have undefined results.  The
+ *    backing store is required to be page aligned and the size is
+ *    padded to the next page boundry.  The number of pages is:
+ *       (bytesPerLine * size.width * 4 + PAGE_SIZE - 1) / PAGE_SIZE
+ *
+ *    The pitch in the backingStore is required to be at least large
+ *    enough to hold a 32bbp scanline.  It is recommended that the
+ *    driver pad bytesPerLine for a potential performance win.
+ *
+ *    The cloneCount field is treated as a hint from the guest that
+ *    the user wants this display to be cloned, countCount times.  A
+ *    value of zero means no cloning should happen.
  */
 
-#define SVGA_SCREEN_HAS_ROOT    (1 << 0)  // Screen is present in the virtual coord space
-#define SVGA_SCREEN_IS_PRIMARY  (1 << 1)  // Guest considers this screen to be 'primary'
-#define SVGA_SCREEN_FULLSCREEN_HINT (1 << 2)   // Guest is running a fullscreen app here
+#define SVGA_SCREEN_MUST_BE_SET     (1 << 0) /* Must be set or results undefined */
+#define SVGA_SCREEN_HAS_ROOT SVGA_SCREEN_MUST_BE_SET /* Deprecated */
+#define SVGA_SCREEN_IS_PRIMARY      (1 << 1) /* Guest considers this screen to be 'primary' */
+#define SVGA_SCREEN_FULLSCREEN_HINT (1 << 2) /* Guest is running a fullscreen app here */
+
+/*
+ * Added with SVGA_FIFO_CAP_SCREEN_OBJECT_2.  When the screen is
+ * deactivated the base layer is defined to lose all contents and
+ * become black.  When a screen is deactivated the backing store is
+ * optional.  When set backingPtr and bytesPerLine will be ignored.
+ */
+#define SVGA_SCREEN_DEACTIVATE  (1 << 3)
+
+/*
+ * Added with SVGA_FIFO_CAP_SCREEN_OBJECT_2.  When this flag is set
+ * the screen contents will be outputted as all black to the user
+ * though the base layer contents is preserved.  The screen base layer
+ * can still be read and written to like normal though the no visible
+ * effect will be seen by the user.  When the flag is changed the
+ * screen will be blanked or redrawn to the current contents as needed
+ * without any extra commands from the driver.  This flag only has an
+ * effect when the screen is not deactivated.
+ */
+#define SVGA_SCREEN_BLANKING (1 << 4)
 
 typedef
 struct SVGAScreenObject {
-   uint32 structSize;   // sizeof(SVGAScreenObject)
+   uint32 structSize;   /* sizeof(SVGAScreenObject) */
    uint32 id;
    uint32 flags;
    struct {
@@ -869,7 +976,14 @@ struct SVGAScreenObject {
    struct {
       int32 x;
       int32 y;
-   } root;              // Only used if SVGA_SCREEN_HAS_ROOT is set.
+   } root;
+
+   /*
+    * Added and required by SVGA_FIFO_CAP_SCREEN_OBJECT_2, optional
+    * with SVGA_FIFO_CAP_SCREEN_OBJECT.
+    */
+   SVGAGuestImage backingStore;
+   uint32 cloneCount;
 } SVGAScreenObject;
 
 
@@ -944,7 +1058,7 @@ typedef enum {
  */
 
 typedef
-struct {
+struct SVGAFifoCmdUpdate {
    uint32 x;
    uint32 y;
    uint32 width;
@@ -963,7 +1077,7 @@ struct {
  */
 
 typedef
-struct {
+struct SVGAFifoCmdRectCopy {
    uint32 srcX;
    uint32 srcY;
    uint32 destX;
@@ -987,14 +1101,14 @@ struct {
  */
 
 typedef
-struct {
-   uint32 id;             // Reserved, must be zero.
+struct SVGAFifoCmdDefineCursor {
+   uint32 id;             /* Reserved, must be zero. */
    uint32 hotspotX;
    uint32 hotspotY;
    uint32 width;
    uint32 height;
-   uint32 andMaskDepth;   // Value must be 1 or equal to BITS_PER_PIXEL
-   uint32 xorMaskDepth;   // Value must be 1 or equal to BITS_PER_PIXEL
+   uint32 andMaskDepth;   /* Value must be 1 or equal to BITS_PER_PIXEL */
+   uint32 xorMaskDepth;   /* Value must be 1 or equal to BITS_PER_PIXEL */
    /*
     * Followed by scanline data for AND mask, then XOR mask.
     * Each scanline is padded to a 32-bit boundary.
@@ -1016,8 +1130,8 @@ struct {
  */
 
 typedef
-struct {
-   uint32 id;             // Reserved, must be zero.
+struct SVGAFifoCmdDefineAlphaCursor {
+   uint32 id;             /* Reserved, must be zero. */
    uint32 hotspotX;
    uint32 hotspotY;
    uint32 width;
@@ -1039,7 +1153,7 @@ struct {
  */
 
 typedef
-struct {
+struct SVGAFifoCmdUpdateVerbose {
    uint32 x;
    uint32 y;
    uint32 width;
@@ -1064,13 +1178,13 @@ struct {
 #define  SVGA_ROP_COPY                    0x03
 
 typedef
-struct {
-   uint32 color;     // In the same format as the GFB
+struct SVGAFifoCmdFrontRopFill {
+   uint32 color;     /* In the same format as the GFB */
    uint32 x;
    uint32 y;
    uint32 width;
    uint32 height;
-   uint32 rop;       // Must be SVGA_ROP_COPY
+   uint32 rop;       /* Must be SVGA_ROP_COPY */
 } SVGAFifoCmdFrontRopFill;
 
 
@@ -1107,7 +1221,7 @@ struct {
  */
 
 typedef
-struct {
+struct SVGAFifoCmdEscape {
    uint32 nsid;
    uint32 size;
    /* followed by 'size' bytes of data */
@@ -1137,12 +1251,12 @@ struct {
  *    registers (SVGA_REG_NUM_GUEST_DISPLAYS, SVGA_REG_DISPLAY_*).
  *
  * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
  */
 
 typedef
 struct {
-   SVGAScreenObject screen;   // Variable-length according to version
+   SVGAScreenObject screen;   /* Variable-length according to version */
 } SVGAFifoCmdDefineScreen;
 
 
@@ -1153,7 +1267,7 @@ struct {
  *    re-use.
  *
  * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
  */
 
 typedef
@@ -1206,7 +1320,7 @@ struct {
  *    GMRFB.
  *
  * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
  */
 
 typedef
@@ -1243,7 +1357,7 @@ struct {
  *    SVGA_CMD_ANNOTATION_* commands for details.
  *
  * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
  */
 
 typedef
@@ -1291,7 +1405,7 @@ struct {
  *    the time any subsequent FENCE commands are reached.
  *
  * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
  */
 
 typedef
@@ -1326,7 +1440,7 @@ struct {
  *    user's display is being remoted over a network connection.
  *
  * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
  */
 
 typedef
@@ -1358,7 +1472,7 @@ struct {
  *    undefined.
  *
  * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
  */
 
 typedef
@@ -1381,8 +1495,7 @@ typedef
 struct {
    uint32 gmrId;
    uint32 numPages;
-}
-SVGAFifoCmdDefineGMR2;
+} SVGAFifoCmdDefineGMR2;
 
 
 /*
@@ -1424,8 +1537,8 @@ typedef
 struct {
    uint32 gmrId;
    SVGARemapGMR2Flags flags;
-	uint32 offsetPages; /* offset in pages to begin remap */
-	uint32 numPages; /* number of pages to remap */
+   uint32 offsetPages; /* offset in pages to begin remap */
+   uint32 numPages; /* number of pages to remap */
    /*
     * Followed by additional data depending on SVGARemapGMR2Flags.
     *
@@ -1434,7 +1547,6 @@ struct {
     * (according to flag SVGA_REMAP_GMR2_PPN64) follows.  If flag
     * SVGA_REMAP_GMR2_SINGLE_PPN is set, array contains a single entry.
     */
-}
-SVGAFifoCmdRemapGMR2;
+} SVGAFifoCmdRemapGMR2;
 
 #endif
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 5d665ce8cbe4..5a72ed908232 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -42,6 +42,10 @@ static uint32_t sys_placement_flags = TTM_PL_FLAG_SYSTEM |
 static uint32_t gmr_placement_flags = VMW_PL_FLAG_GMR |
 	TTM_PL_FLAG_CACHED;
 
+static uint32_t gmr_ne_placement_flags = VMW_PL_FLAG_GMR |
+	TTM_PL_FLAG_CACHED |
+	TTM_PL_FLAG_NO_EVICT;
+
 struct ttm_placement vmw_vram_placement = {
 	.fpfn = 0,
 	.lpfn = 0,
@@ -56,6 +60,11 @@ static uint32_t vram_gmr_placement_flags[] = {
 	VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
 };
 
+static uint32_t gmr_vram_placement_flags[] = {
+	VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED,
+	TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED
+};
+
 struct ttm_placement vmw_vram_gmr_placement = {
 	.fpfn = 0,
 	.lpfn = 0,
@@ -65,6 +74,20 @@ struct ttm_placement vmw_vram_gmr_placement = {
 	.busy_placement = &gmr_placement_flags
 };
 
+static uint32_t vram_gmr_ne_placement_flags[] = {
+	TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT,
+	VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT
+};
+
+struct ttm_placement vmw_vram_gmr_ne_placement = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.num_placement = 2,
+	.placement = vram_gmr_ne_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &gmr_ne_placement_flags
+};
+
 struct ttm_placement vmw_vram_sys_placement = {
 	.fpfn = 0,
 	.lpfn = 0,
@@ -92,6 +115,30 @@ struct ttm_placement vmw_sys_placement = {
 	.busy_placement = &sys_placement_flags
 };
 
+static uint32_t evictable_placement_flags[] = {
+	TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED,
+	TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED,
+	VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
+};
+
+struct ttm_placement vmw_evictable_placement = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.num_placement = 3,
+	.placement = evictable_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &sys_placement_flags
+};
+
+struct ttm_placement vmw_srf_placement = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.num_placement = 1,
+	.num_busy_placement = 2,
+	.placement = &gmr_placement_flags,
+	.busy_placement = gmr_vram_placement_flags
+};
+
 struct vmw_ttm_backend {
 	struct ttm_backend backend;
 	struct page **pages;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
new file mode 100644
index 000000000000..3fa884db08ab
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
@@ -0,0 +1,322 @@
+/**************************************************************************
+ *
+ * Copyright © 2011 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "ttm/ttm_placement.h"
+
+#include "drmP.h"
+#include "vmwgfx_drv.h"
+
+
+/**
+ * vmw_dmabuf_to_placement - Validate a buffer to placement.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to move.
+ * @pin:  Pin buffer if true.
+ * @interruptible:  Use interruptible wait.
+ *
+ * May only be called by the current master since it assumes that the
+ * master lock is the current master's lock.
+ * This function takes the master's lock in write mode.
+ * Flushes and unpins the query bo to avoid failures.
+ *
+ * Returns
+ *  -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_dmabuf_to_placement(struct vmw_private *dev_priv,
+			    struct vmw_dma_buffer *buf,
+			    struct ttm_placement *placement,
+			    bool interruptible)
+{
+	struct vmw_master *vmaster = dev_priv->active_master;
+	struct ttm_buffer_object *bo = &buf->base;
+	int ret;
+
+	ret = ttm_write_lock(&vmaster->lock, interruptible);
+	if (unlikely(ret != 0))
+		return ret;
+
+	vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
+
+	ret = ttm_bo_reserve(bo, interruptible, false, false, 0);
+	if (unlikely(ret != 0))
+		goto err;
+
+	ret = ttm_bo_validate(bo, placement, interruptible, false, false);
+
+	ttm_bo_unreserve(bo);
+
+err:
+	ttm_write_unlock(&vmaster->lock);
+	return ret;
+}
+
+/**
+ * vmw_dmabuf_to_vram_or_gmr - Move a buffer to vram or gmr.
+ *
+ * May only be called by the current master since it assumes that the
+ * master lock is the current master's lock.
+ * This function takes the master's lock in write mode.
+ * Flushes and unpins the query bo if @pin == true to avoid failures.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to move.
+ * @pin:  Pin buffer if true.
+ * @interruptible:  Use interruptible wait.
+ *
+ * Returns
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_dmabuf_to_vram_or_gmr(struct vmw_private *dev_priv,
+			      struct vmw_dma_buffer *buf,
+			      bool pin, bool interruptible)
+{
+	struct vmw_master *vmaster = dev_priv->active_master;
+	struct ttm_buffer_object *bo = &buf->base;
+	struct ttm_placement *placement;
+	int ret;
+
+	ret = ttm_write_lock(&vmaster->lock, interruptible);
+	if (unlikely(ret != 0))
+		return ret;
+
+	if (pin)
+		vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
+
+	ret = ttm_bo_reserve(bo, interruptible, false, false, 0);
+	if (unlikely(ret != 0))
+		goto err;
+
+	/**
+	 * Put BO in VRAM if there is space, otherwise as a GMR.
+	 * If there is no space in VRAM and GMR ids are all used up,
+	 * start evicting GMRs to make room. If the DMA buffer can't be
+	 * used as a GMR, this will return -ENOMEM.
+	 */
+
+	if (pin)
+		placement = &vmw_vram_gmr_ne_placement;
+	else
+		placement = &vmw_vram_gmr_placement;
+
+	ret = ttm_bo_validate(bo, placement, interruptible, false, false);
+	if (likely(ret == 0) || ret == -ERESTARTSYS)
+		goto err_unreserve;
+
+
+	/**
+	 * If that failed, try VRAM again, this time evicting
+	 * previous contents.
+	 */
+
+	if (pin)
+		placement = &vmw_vram_ne_placement;
+	else
+		placement = &vmw_vram_placement;
+
+	ret = ttm_bo_validate(bo, placement, interruptible, false, false);
+
+err_unreserve:
+	ttm_bo_unreserve(bo);
+err:
+	ttm_write_unlock(&vmaster->lock);
+	return ret;
+}
+
+/**
+ * vmw_dmabuf_to_vram - Move a buffer to vram.
+ *
+ * May only be called by the current master since it assumes that the
+ * master lock is the current master's lock.
+ * This function takes the master's lock in write mode.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to move.
+ * @pin:  Pin buffer in vram if true.
+ * @interruptible:  Use interruptible wait.
+ *
+ * Returns
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_dmabuf_to_vram(struct vmw_private *dev_priv,
+		       struct vmw_dma_buffer *buf,
+		       bool pin, bool interruptible)
+{
+	struct ttm_placement *placement;
+
+	if (pin)
+		placement = &vmw_vram_ne_placement;
+	else
+		placement = &vmw_vram_placement;
+
+	return vmw_dmabuf_to_placement(dev_priv, buf,
+				       placement,
+				       interruptible);
+}
+
+/**
+ * vmw_dmabuf_to_start_of_vram - Move a buffer to start of vram.
+ *
+ * May only be called by the current master since it assumes that the
+ * master lock is the current master's lock.
+ * This function takes the master's lock in write mode.
+ * Flushes and unpins the query bo if @pin == true to avoid failures.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to move.
+ * @pin:  Pin buffer in vram if true.
+ * @interruptible:  Use interruptible wait.
+ *
+ * Returns
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_dmabuf_to_start_of_vram(struct vmw_private *dev_priv,
+				struct vmw_dma_buffer *buf,
+				bool pin, bool interruptible)
+{
+	struct vmw_master *vmaster = dev_priv->active_master;
+	struct ttm_buffer_object *bo = &buf->base;
+	struct ttm_placement placement;
+	int ret = 0;
+
+	if (pin)
+		placement = vmw_vram_ne_placement;
+	else
+		placement = vmw_vram_placement;
+	placement.lpfn = bo->num_pages;
+
+	ret = ttm_write_lock(&vmaster->lock, interruptible);
+	if (unlikely(ret != 0))
+		return ret;
+
+	if (pin)
+		vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
+
+	ret = ttm_bo_reserve(bo, interruptible, false, false, 0);
+	if (unlikely(ret != 0))
+		goto err_unlock;
+
+	/* Is this buffer already in vram but not at the start of it? */
+	if (bo->mem.mem_type == TTM_PL_VRAM &&
+	    bo->mem.start < bo->num_pages &&
+	    bo->mem.start > 0)
+		(void) ttm_bo_validate(bo, &vmw_sys_placement, false,
+				       false, false);
+
+	ret = ttm_bo_validate(bo, &placement, interruptible, false, false);
+
+	/* For some reason we didn't up at the start of vram */
+	WARN_ON(ret == 0 && bo->offset != 0);
+
+	ttm_bo_unreserve(bo);
+err_unlock:
+	ttm_write_unlock(&vmaster->lock);
+
+	return ret;
+}
+
+
+/**
+ * vmw_dmabuf_upin - Unpin the buffer given buffer, does not move the buffer.
+ *
+ * May only be called by the current master since it assumes that the
+ * master lock is the current master's lock.
+ * This function takes the master's lock in write mode.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to unpin.
+ * @interruptible:  Use interruptible wait.
+ *
+ * Returns
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_dmabuf_unpin(struct vmw_private *dev_priv,
+		     struct vmw_dma_buffer *buf,
+		     bool interruptible)
+{
+	/*
+	 * We could in theory early out if the buffer is
+	 * unpinned but we need to lock and reserve the buffer
+	 * anyways so we don't gain much by that.
+	 */
+	return vmw_dmabuf_to_placement(dev_priv, buf,
+				       &vmw_evictable_placement,
+				       interruptible);
+}
+
+
+/**
+ * vmw_bo_get_guest_ptr - Get the guest ptr representing the current placement
+ * of a buffer.
+ *
+ * @bo: Pointer to a struct ttm_buffer_object. Must be pinned or reserved.
+ * @ptr: SVGAGuestPtr returning the result.
+ */
+void vmw_bo_get_guest_ptr(const struct ttm_buffer_object *bo,
+			  SVGAGuestPtr *ptr)
+{
+	if (bo->mem.mem_type == TTM_PL_VRAM) {
+		ptr->gmrId = SVGA_GMR_FRAMEBUFFER;
+		ptr->offset = bo->offset;
+	} else {
+		ptr->gmrId = bo->mem.start;
+		ptr->offset = 0;
+	}
+}
+
+
+/**
+ * vmw_bo_pin - Pin or unpin a buffer object without moving it.
+ *
+ * @bo: The buffer object. Must be reserved, and present either in VRAM
+ * or GMR memory.
+ * @pin: Whether to pin or unpin.
+ *
+ */
+void vmw_bo_pin(struct ttm_buffer_object *bo, bool pin)
+{
+	uint32_t pl_flags;
+	struct ttm_placement placement;
+	uint32_t old_mem_type = bo->mem.mem_type;
+	int ret;
+
+	BUG_ON(!atomic_read(&bo->reserved));
+	BUG_ON(old_mem_type != TTM_PL_VRAM &&
+	       old_mem_type != VMW_PL_FLAG_GMR);
+
+	pl_flags = TTM_PL_FLAG_VRAM | VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED;
+	if (pin)
+		pl_flags |= TTM_PL_FLAG_NO_EVICT;
+
+	memset(&placement, 0, sizeof(placement));
+	placement.num_placement = 1;
+	placement.placement = &pl_flags;
+
+	ret = ttm_bo_validate(bo, &placement, false, true, true);
+
+	BUG_ON(ret != 0 || bo->mem.mem_type != old_mem_type);
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index d4829cbf326d..b8eb8cdcfb78 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -94,6 +94,15 @@
 #define DRM_IOCTL_VMW_FENCE_UNREF				\
 	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_FENCE_UNREF,		\
 		 struct drm_vmw_fence_arg)
+#define DRM_IOCTL_VMW_FENCE_EVENT				\
+	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_FENCE_EVENT,		\
+		 struct drm_vmw_fence_event_arg)
+#define DRM_IOCTL_VMW_PRESENT					\
+	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_PRESENT,		\
+		 struct drm_vmw_present_arg)
+#define DRM_IOCTL_VMW_PRESENT_READBACK				\
+	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_PRESENT_READBACK,	\
+		 struct drm_vmw_present_readback_arg)
 
 /**
  * The core DRM version of this macro doesn't account for
@@ -144,8 +153,18 @@ static struct drm_ioctl_desc vmw_ioctls[] = {
 		      DRM_AUTH | DRM_UNLOCKED),
 	VMW_IOCTL_DEF(VMW_FENCE_UNREF, vmw_fence_obj_unref_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
+	VMW_IOCTL_DEF(VMW_FENCE_EVENT,
+		      vmw_fence_event_ioctl,
+		      DRM_AUTH | DRM_UNLOCKED),
 	VMW_IOCTL_DEF(VMW_GET_3D_CAP, vmw_get_cap_3d_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
+
+	/* these allow direct access to the framebuffers mark as master only */
+	VMW_IOCTL_DEF(VMW_PRESENT, vmw_present_ioctl,
+		      DRM_MASTER | DRM_AUTH | DRM_UNLOCKED),
+	VMW_IOCTL_DEF(VMW_PRESENT_READBACK,
+		      vmw_present_readback_ioctl,
+		      DRM_MASTER | DRM_AUTH | DRM_UNLOCKED),
 };
 
 static struct pci_device_id vmw_pci_id_list[] = {
@@ -200,6 +219,72 @@ static void vmw_print_capabilities(uint32_t capabilities)
 		DRM_INFO("  Screen Object 2.\n");
 }
 
+
+/**
+ * vmw_execbuf_prepare_dummy_query - Initialize a query result structure at
+ * the start of a buffer object.
+ *
+ * @dev_priv: The device private structure.
+ *
+ * This function will idle the buffer using an uninterruptible wait, then
+ * map the first page and initialize a pending occlusion query result structure,
+ * Finally it will unmap the buffer.
+ *
+ * TODO: Since we're only mapping a single page, we should optimize the map
+ * to use kmap_atomic / iomap_atomic.
+ */
+static void vmw_dummy_query_bo_prepare(struct vmw_private *dev_priv)
+{
+	struct ttm_bo_kmap_obj map;
+	volatile SVGA3dQueryResult *result;
+	bool dummy;
+	int ret;
+	struct ttm_bo_device *bdev = &dev_priv->bdev;
+	struct ttm_buffer_object *bo = dev_priv->dummy_query_bo;
+
+	ttm_bo_reserve(bo, false, false, false, 0);
+	spin_lock(&bdev->fence_lock);
+	ret = ttm_bo_wait(bo, false, false, false, TTM_USAGE_READWRITE);
+	spin_unlock(&bdev->fence_lock);
+	if (unlikely(ret != 0))
+		(void) vmw_fallback_wait(dev_priv, false, true, 0, false,
+					 10*HZ);
+
+	ret = ttm_bo_kmap(bo, 0, 1, &map);
+	if (likely(ret == 0)) {
+		result = ttm_kmap_obj_virtual(&map, &dummy);
+		result->totalSize = sizeof(*result);
+		result->state = SVGA3D_QUERYSTATE_PENDING;
+		result->result32 = 0xff;
+		ttm_bo_kunmap(&map);
+	} else
+		DRM_ERROR("Dummy query buffer map failed.\n");
+	ttm_bo_unreserve(bo);
+}
+
+
+/**
+ * vmw_dummy_query_bo_create - create a bo to hold a dummy query result
+ *
+ * @dev_priv: A device private structure.
+ *
+ * This function creates a small buffer object that holds the query
+ * result for dummy queries emitted as query barriers.
+ * No interruptible waits are done within this function.
+ *
+ * Returns an error if bo creation fails.
+ */
+static int vmw_dummy_query_bo_create(struct vmw_private *dev_priv)
+{
+	return ttm_bo_create(&dev_priv->bdev,
+			     PAGE_SIZE,
+			     ttm_bo_type_device,
+			     &vmw_vram_sys_placement,
+			     0, 0, false, NULL,
+			     &dev_priv->dummy_query_bo);
+}
+
+
 static int vmw_request_device(struct vmw_private *dev_priv)
 {
 	int ret;
@@ -210,12 +295,29 @@ static int vmw_request_device(struct vmw_private *dev_priv)
 		return ret;
 	}
 	vmw_fence_fifo_up(dev_priv->fman);
+	ret = vmw_dummy_query_bo_create(dev_priv);
+	if (unlikely(ret != 0))
+		goto out_no_query_bo;
+	vmw_dummy_query_bo_prepare(dev_priv);
 
 	return 0;
+
+out_no_query_bo:
+	vmw_fence_fifo_down(dev_priv->fman);
+	vmw_fifo_release(dev_priv, &dev_priv->fifo);
+	return ret;
 }
 
 static void vmw_release_device(struct vmw_private *dev_priv)
 {
+	/*
+	 * Previous destructions should've released
+	 * the pinned bo.
+	 */
+
+	BUG_ON(dev_priv->pinned_bo != NULL);
+
+	ttm_bo_unref(&dev_priv->dummy_query_bo);
 	vmw_fence_fifo_down(dev_priv->fman);
 	vmw_fifo_release(dev_priv, &dev_priv->fifo);
 }
@@ -306,6 +408,8 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	init_waitqueue_head(&dev_priv->fifo_queue);
 	dev_priv->fence_queue_waiters = 0;
 	atomic_set(&dev_priv->fifo_queue_waiters, 0);
+	INIT_LIST_HEAD(&dev_priv->surface_lru);
+	dev_priv->used_memory_size = 0;
 
 	dev_priv->io_start = pci_resource_start(dev->pdev, 0);
 	dev_priv->vram_start = pci_resource_start(dev->pdev, 1);
@@ -326,6 +430,10 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 
 	dev_priv->capabilities = vmw_read(dev_priv, SVGA_REG_CAPABILITIES);
 
+	dev_priv->vram_size = vmw_read(dev_priv, SVGA_REG_VRAM_SIZE);
+	dev_priv->mmio_size = vmw_read(dev_priv, SVGA_REG_MEM_SIZE);
+	dev_priv->fb_max_width = vmw_read(dev_priv, SVGA_REG_MAX_WIDTH);
+	dev_priv->fb_max_height = vmw_read(dev_priv, SVGA_REG_MAX_HEIGHT);
 	if (dev_priv->capabilities & SVGA_CAP_GMR) {
 		dev_priv->max_gmr_descriptors =
 			vmw_read(dev_priv,
@@ -338,13 +446,15 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 			vmw_read(dev_priv, SVGA_REG_GMRS_MAX_PAGES);
 		dev_priv->memory_size =
 			vmw_read(dev_priv, SVGA_REG_MEMORY_SIZE);
+		dev_priv->memory_size -= dev_priv->vram_size;
+	} else {
+		/*
+		 * An arbitrary limit of 512MiB on surface
+		 * memory. But all HWV8 hardware supports GMR2.
+		 */
+		dev_priv->memory_size = 512*1024*1024;
 	}
 
-	dev_priv->vram_size = vmw_read(dev_priv, SVGA_REG_VRAM_SIZE);
-	dev_priv->mmio_size = vmw_read(dev_priv, SVGA_REG_MEM_SIZE);
-	dev_priv->fb_max_width = vmw_read(dev_priv, SVGA_REG_MAX_WIDTH);
-	dev_priv->fb_max_height = vmw_read(dev_priv, SVGA_REG_MAX_HEIGHT);
-
 	mutex_unlock(&dev_priv->hw_mutex);
 
 	vmw_print_capabilities(dev_priv->capabilities);
@@ -358,8 +468,8 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	if (dev_priv->capabilities & SVGA_CAP_GMR2) {
 		DRM_INFO("Max number of GMR pages is %u\n",
 			 (unsigned)dev_priv->max_gmr_pages);
-		DRM_INFO("Max dedicated hypervisor graphics memory is %u\n",
-			 (unsigned)dev_priv->memory_size);
+		DRM_INFO("Max dedicated hypervisor surface memory is %u kiB\n",
+			 (unsigned)dev_priv->memory_size / 1024);
 	}
 	DRM_INFO("VRAM at 0x%08x size is %u kiB\n",
 		 dev_priv->vram_start, dev_priv->vram_size / 1024);
@@ -451,22 +561,30 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	dev_priv->fman = vmw_fence_manager_init(dev_priv);
 	if (unlikely(dev_priv->fman == NULL))
 		goto out_no_fman;
+
+	/* Need to start the fifo to check if we can do screen objects */
+	ret = vmw_3d_resource_inc(dev_priv, true);
+	if (unlikely(ret != 0))
+		goto out_no_fifo;
+	vmw_kms_save_vga(dev_priv);
+
+	/* Start kms and overlay systems, needs fifo. */
 	ret = vmw_kms_init(dev_priv);
 	if (unlikely(ret != 0))
 		goto out_no_kms;
 	vmw_overlay_init(dev_priv);
+
+	/* 3D Depends on Screen Objects being used. */
+	DRM_INFO("Detected %sdevice 3D availability.\n",
+		 vmw_fifo_have_3d(dev_priv) ?
+		 "" : "no ");
+
+	/* We might be done with the fifo now */
 	if (dev_priv->enable_fb) {
-		ret = vmw_3d_resource_inc(dev_priv, false);
-		if (unlikely(ret != 0))
-			goto out_no_fifo;
-		vmw_kms_save_vga(dev_priv);
 		vmw_fb_init(dev_priv);
-		DRM_INFO("%s", vmw_fifo_have_3d(dev_priv) ?
-			 "Detected device 3D availability.\n" :
-			 "Detected no device 3D availability.\n");
 	} else {
-		DRM_INFO("Delayed 3D detection since we're not "
-			 "running the device in SVGA mode yet.\n");
+		vmw_kms_restore_vga(dev_priv);
+		vmw_3d_resource_dec(dev_priv, true);
 	}
 
 	if (dev_priv->capabilities & SVGA_CAP_IRQMASK) {
@@ -483,15 +601,17 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	return 0;
 
 out_no_irq:
-	if (dev_priv->enable_fb) {
+	if (dev_priv->enable_fb)
 		vmw_fb_close(dev_priv);
+	vmw_overlay_close(dev_priv);
+	vmw_kms_close(dev_priv);
+out_no_kms:
+	/* We still have a 3D resource reference held */
+	if (dev_priv->enable_fb) {
 		vmw_kms_restore_vga(dev_priv);
 		vmw_3d_resource_dec(dev_priv, false);
 	}
 out_no_fifo:
-	vmw_overlay_close(dev_priv);
-	vmw_kms_close(dev_priv);
-out_no_kms:
 	vmw_fence_manager_takedown(dev_priv->fman);
 out_no_fman:
 	if (dev_priv->stealth)
@@ -771,7 +891,7 @@ static void vmw_master_drop(struct drm_device *dev,
 
 	vmw_fp->locked_master = drm_master_get(file_priv->master);
 	ret = ttm_vt_lock(&vmaster->lock, false, vmw_fp->tfile);
-	vmw_kms_idle_workqueues(vmaster);
+	vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
 
 	if (unlikely((ret != 0))) {
 		DRM_ERROR("Unable to lock TTM at VT switch.\n");
@@ -823,6 +943,7 @@ static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val,
 		 * This empties VRAM and unbinds all GMR bindings.
 		 * Buffer contents is moved to swappable memory.
 		 */
+		vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
 		ttm_bo_swapout_all(&dev_priv->bdev);
 
 		break;
@@ -948,6 +1069,8 @@ static struct drm_driver driver = {
 	.irq_uninstall = vmw_irq_uninstall,
 	.irq_handler = vmw_irq_handler,
 	.get_vblank_counter = vmw_get_vblank_counter,
+	.enable_vblank = vmw_enable_vblank,
+	.disable_vblank = vmw_disable_vblank,
 	.reclaim_buffers_locked = NULL,
 	.ioctls = vmw_ioctls,
 	.num_ioctls = DRM_ARRAY_SIZE(vmw_ioctls),
@@ -964,7 +1087,8 @@ static struct drm_driver driver = {
 		 .release = drm_release,
 		 .unlocked_ioctl = vmw_unlocked_ioctl,
 		 .mmap = vmw_mmap,
-		 .poll = drm_poll,
+		 .poll = vmw_fops_poll,
+		 .read = vmw_fops_read,
 		 .fasync = drm_fasync,
 #if defined(CONFIG_COMPAT)
 		 .compat_ioctl = drm_compat_ioctl,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 564a81582111..30589d0aecd9 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -40,9 +40,9 @@
 #include "ttm/ttm_module.h"
 #include "vmwgfx_fence.h"
 
-#define VMWGFX_DRIVER_DATE "20110901"
+#define VMWGFX_DRIVER_DATE "20111008"
 #define VMWGFX_DRIVER_MAJOR 2
-#define VMWGFX_DRIVER_MINOR 0
+#define VMWGFX_DRIVER_MINOR 2
 #define VMWGFX_DRIVER_PATCHLEVEL 0
 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
@@ -79,9 +79,11 @@ struct vmw_resource {
 	int id;
 	enum ttm_object_type res_type;
 	bool avail;
+	void (*remove_from_lists) (struct vmw_resource *res);
 	void (*hw_destroy) (struct vmw_resource *res);
 	void (*res_free) (struct vmw_resource *res);
-	bool on_validate_list;
+	struct list_head validate_head;
+	struct list_head query_head; /* Protected by the cmdbuf mutex */
 	/* TODO is a generic snooper needed? */
 #if 0
 	void (*snoop)(struct vmw_resource *res,
@@ -97,8 +99,12 @@ struct vmw_cursor_snooper {
 	uint32_t *image;
 };
 
+struct vmw_framebuffer;
+struct vmw_surface_offset;
+
 struct vmw_surface {
 	struct vmw_resource res;
+	struct list_head lru_head; /* Protected by the resource lock */
 	uint32_t flags;
 	uint32_t format;
 	uint32_t mip_levels[DRM_VMW_MAX_SURFACE_FACES];
@@ -109,6 +115,9 @@ struct vmw_surface {
 
 	/* TODO so far just a extra pointer */
 	struct vmw_cursor_snooper snooper;
+	struct ttm_buffer_object *backup;
+	struct vmw_surface_offset *offsets;
+	uint32_t backup_size;
 };
 
 struct vmw_marker_queue {
@@ -139,6 +148,8 @@ struct vmw_sw_context{
 	struct ida bo_list;
 	uint32_t last_cid;
 	bool cid_valid;
+	bool kernel; /**< is the called made from the kernel */
+	struct vmw_resource *cur_ctx;
 	uint32_t last_sid;
 	uint32_t sid_translation;
 	bool sid_valid;
@@ -150,8 +161,12 @@ struct vmw_sw_context{
 	uint32_t cur_val_buf;
 	uint32_t *cmd_bounce;
 	uint32_t cmd_bounce_size;
-	struct vmw_resource *resources[VMWGFX_MAX_VALIDATIONS];
-	uint32_t num_ref_resources;
+	struct list_head resource_list;
+	uint32_t fence_flags;
+	struct list_head query_list;
+	struct ttm_buffer_object *cur_query_bo;
+	uint32_t cur_query_cid;
+	bool query_cid_valid;
 };
 
 struct vmw_legacy_display;
@@ -216,6 +231,7 @@ struct vmw_private {
 
 	void *fb_info;
 	struct vmw_legacy_display *ldu_priv;
+	struct vmw_screen_object_display *sou_priv;
 	struct vmw_overlay *overlay_priv;
 
 	/*
@@ -248,10 +264,12 @@ struct vmw_private {
 	wait_queue_head_t fence_queue;
 	wait_queue_head_t fifo_queue;
 	int fence_queue_waiters; /* Protected by hw_mutex */
+	int goal_queue_waiters; /* Protected by hw_mutex */
 	atomic_t fifo_queue_waiters;
 	uint32_t last_read_seqno;
 	spinlock_t irq_lock;
 	struct vmw_fence_manager *fman;
+	uint32_t irq_mask;
 
 	/*
 	 * Device state
@@ -290,6 +308,26 @@ struct vmw_private {
 
 	struct mutex release_mutex;
 	uint32_t num_3d_resources;
+
+	/*
+	 * Query processing. These members
+	 * are protected by the cmdbuf mutex.
+	 */
+
+	struct ttm_buffer_object *dummy_query_bo;
+	struct ttm_buffer_object *pinned_bo;
+	uint32_t query_cid;
+	bool dummy_query_bo_pinned;
+
+	/*
+	 * Surface swapping. The "surface_lru" list is protected by the
+	 * resource lock in order to be able to destroy a surface and take
+	 * it off the lru atomically. "used_memory_size" is currently
+	 * protected by the cmdbuf mutex for simplicity.
+	 */
+
+	struct list_head surface_lru;
+	uint32_t used_memory_size;
 };
 
 static inline struct vmw_private *vmw_priv(struct drm_device *dev)
@@ -369,6 +407,8 @@ extern int vmw_surface_reference_ioctl(struct drm_device *dev, void *data,
 extern int vmw_surface_check(struct vmw_private *dev_priv,
 			     struct ttm_object_file *tfile,
 			     uint32_t handle, int *id);
+extern int vmw_surface_validate(struct vmw_private *dev_priv,
+				struct vmw_surface *srf);
 extern void vmw_dmabuf_bo_free(struct ttm_buffer_object *bo);
 extern int vmw_dmabuf_init(struct vmw_private *dev_priv,
 			   struct vmw_dma_buffer *vmw_bo,
@@ -384,10 +424,6 @@ extern uint32_t vmw_dmabuf_validate_node(struct ttm_buffer_object *bo,
 extern void vmw_dmabuf_validate_clear(struct ttm_buffer_object *bo);
 extern int vmw_user_dmabuf_lookup(struct ttm_object_file *tfile,
 				  uint32_t id, struct vmw_dma_buffer **out);
-extern int vmw_dmabuf_to_start_of_vram(struct vmw_private *vmw_priv,
-				       struct vmw_dma_buffer *bo);
-extern int vmw_dmabuf_from_vram(struct vmw_private *vmw_priv,
-				struct vmw_dma_buffer *bo);
 extern int vmw_stream_claim_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file_priv);
 extern int vmw_stream_unref_ioctl(struct drm_device *dev, void *data,
@@ -396,7 +432,30 @@ extern int vmw_user_stream_lookup(struct vmw_private *dev_priv,
 				  struct ttm_object_file *tfile,
 				  uint32_t *inout_id,
 				  struct vmw_resource **out);
+extern void vmw_resource_unreserve(struct list_head *list);
 
+/**
+ * DMA buffer helper routines - vmwgfx_dmabuf.c
+ */
+extern int vmw_dmabuf_to_placement(struct vmw_private *vmw_priv,
+				   struct vmw_dma_buffer *bo,
+				   struct ttm_placement *placement,
+				   bool interruptible);
+extern int vmw_dmabuf_to_vram(struct vmw_private *dev_priv,
+			      struct vmw_dma_buffer *buf,
+			      bool pin, bool interruptible);
+extern int vmw_dmabuf_to_vram_or_gmr(struct vmw_private *dev_priv,
+				     struct vmw_dma_buffer *buf,
+				     bool pin, bool interruptible);
+extern int vmw_dmabuf_to_start_of_vram(struct vmw_private *vmw_priv,
+				       struct vmw_dma_buffer *bo,
+				       bool pin, bool interruptible);
+extern int vmw_dmabuf_unpin(struct vmw_private *vmw_priv,
+			    struct vmw_dma_buffer *bo,
+			    bool interruptible);
+extern void vmw_bo_get_guest_ptr(const struct ttm_buffer_object *buf,
+				 SVGAGuestPtr *ptr);
+extern void vmw_bo_pin(struct ttm_buffer_object *bo, bool pin);
 
 /**
  * Misc Ioctl functionality - vmwgfx_ioctl.c
@@ -406,6 +465,14 @@ extern int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
 extern int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
+extern int vmw_present_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file_priv);
+extern int vmw_present_readback_ioctl(struct drm_device *dev, void *data,
+				      struct drm_file *file_priv);
+extern unsigned int vmw_fops_poll(struct file *filp,
+				  struct poll_table_struct *wait);
+extern ssize_t vmw_fops_read(struct file *filp, char __user *buffer,
+			     size_t count, loff_t *offset);
 
 /**
  * Fifo utilities - vmwgfx_fifo.c
@@ -422,6 +489,8 @@ extern int vmw_fifo_send_fence(struct vmw_private *dev_priv,
 extern void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason);
 extern bool vmw_fifo_have_3d(struct vmw_private *dev_priv);
 extern bool vmw_fifo_have_pitchlock(struct vmw_private *dev_priv);
+extern int vmw_fifo_emit_dummy_query(struct vmw_private *dev_priv,
+				     uint32_t cid);
 
 /**
  * TTM glue - vmwgfx_ttm_glue.c
@@ -439,7 +508,10 @@ extern struct ttm_placement vmw_vram_placement;
 extern struct ttm_placement vmw_vram_ne_placement;
 extern struct ttm_placement vmw_vram_sys_placement;
 extern struct ttm_placement vmw_vram_gmr_placement;
+extern struct ttm_placement vmw_vram_gmr_ne_placement;
 extern struct ttm_placement vmw_sys_placement;
+extern struct ttm_placement vmw_evictable_placement;
+extern struct ttm_placement vmw_srf_placement;
 extern struct ttm_bo_driver vmw_bo_driver;
 extern int vmw_dma_quiescent(struct drm_device *dev);
 
@@ -449,6 +521,30 @@ extern int vmw_dma_quiescent(struct drm_device *dev);
 
 extern int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_priv);
+extern int vmw_execbuf_process(struct drm_file *file_priv,
+			       struct vmw_private *dev_priv,
+			       void __user *user_commands,
+			       void *kernel_commands,
+			       uint32_t command_size,
+			       uint64_t throttle_us,
+			       struct drm_vmw_fence_rep __user
+			       *user_fence_rep);
+
+extern void
+vmw_execbuf_release_pinned_bo(struct vmw_private *dev_priv,
+			      bool only_on_cid_match, uint32_t cid);
+
+extern int vmw_execbuf_fence_commands(struct drm_file *file_priv,
+				      struct vmw_private *dev_priv,
+				      struct vmw_fence_obj **p_fence,
+				      uint32_t *p_handle);
+extern void vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
+					struct vmw_fpriv *vmw_fp,
+					int ret,
+					struct drm_vmw_fence_rep __user
+					*user_fence_rep,
+					struct vmw_fence_obj *fence,
+					uint32_t fence_handle);
 
 /**
  * IRQs and wating - vmwgfx_irq.c
@@ -473,6 +569,8 @@ extern void vmw_update_seqno(struct vmw_private *dev_priv,
 				struct vmw_fifo_state *fifo_state);
 extern void vmw_seqno_waiter_add(struct vmw_private *dev_priv);
 extern void vmw_seqno_waiter_remove(struct vmw_private *dev_priv);
+extern void vmw_goal_waiter_add(struct vmw_private *dev_priv);
+extern void vmw_goal_waiter_remove(struct vmw_private *dev_priv);
 
 /**
  * Rudimentary fence-like objects currently used only for throttling -
@@ -520,6 +618,21 @@ bool vmw_kms_validate_mode_vram(struct vmw_private *dev_priv,
 				uint32_t pitch,
 				uint32_t height);
 u32 vmw_get_vblank_counter(struct drm_device *dev, int crtc);
+int vmw_enable_vblank(struct drm_device *dev, int crtc);
+void vmw_disable_vblank(struct drm_device *dev, int crtc);
+int vmw_kms_present(struct vmw_private *dev_priv,
+		    struct drm_file *file_priv,
+		    struct vmw_framebuffer *vfb,
+		    struct vmw_surface *surface,
+		    uint32_t sid, int32_t destX, int32_t destY,
+		    struct drm_vmw_rect *clips,
+		    uint32_t num_clips);
+int vmw_kms_readback(struct vmw_private *dev_priv,
+		     struct drm_file *file_priv,
+		     struct vmw_framebuffer *vfb,
+		     struct drm_vmw_fence_rep __user *user_fence_rep,
+		     struct drm_vmw_rect *clips,
+		     uint32_t num_clips);
 
 /**
  * Overlay control - vmwgfx_overlay.c
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index fa26e647f488..28e1c35aec6f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -44,28 +44,64 @@ static int vmw_cmd_ok(struct vmw_private *dev_priv,
 	return 0;
 }
 
-
-static int vmw_resource_to_validate_list(struct vmw_sw_context *sw_context,
-					 struct vmw_resource **p_res)
+static void vmw_resource_to_validate_list(struct vmw_sw_context *sw_context,
+					  struct vmw_resource **p_res)
 {
-	int ret = 0;
 	struct vmw_resource *res = *p_res;
 
-	if (!res->on_validate_list) {
-		if (sw_context->num_ref_resources >= VMWGFX_MAX_VALIDATIONS) {
-			DRM_ERROR("Too many resources referenced in "
-				  "command stream.\n");
-			ret = -ENOMEM;
-			goto out;
-		}
-		sw_context->resources[sw_context->num_ref_resources++] = res;
-		res->on_validate_list = true;
-		return 0;
+	if (list_empty(&res->validate_head)) {
+		list_add_tail(&res->validate_head, &sw_context->resource_list);
+		*p_res = NULL;
+	} else
+		vmw_resource_unreference(p_res);
+}
+
+/**
+ * vmw_bo_to_validate_list - add a bo to a validate list
+ *
+ * @sw_context: The software context used for this command submission batch.
+ * @bo: The buffer object to add.
+ * @fence_flags: Fence flags to be or'ed with any other fence flags for
+ * this buffer on this submission batch.
+ * @p_val_node: If non-NULL Will be updated with the validate node number
+ * on return.
+ *
+ * Returns -EINVAL if the limit of number of buffer objects per command
+ * submission is reached.
+ */
+static int vmw_bo_to_validate_list(struct vmw_sw_context *sw_context,
+				   struct ttm_buffer_object *bo,
+				   uint32_t fence_flags,
+				   uint32_t *p_val_node)
+{
+	uint32_t val_node;
+	struct ttm_validate_buffer *val_buf;
+
+	val_node = vmw_dmabuf_validate_node(bo, sw_context->cur_val_buf);
+
+	if (unlikely(val_node >= VMWGFX_MAX_VALIDATIONS)) {
+		DRM_ERROR("Max number of DMA buffers per submission"
+			  " exceeded.\n");
+		return -EINVAL;
 	}
 
-out:
-	vmw_resource_unreference(p_res);
-	return ret;
+	val_buf = &sw_context->val_bufs[val_node];
+	if (unlikely(val_node == sw_context->cur_val_buf)) {
+		val_buf->new_sync_obj_arg = NULL;
+		val_buf->bo = ttm_bo_reference(bo);
+		val_buf->usage = TTM_USAGE_READWRITE;
+		list_add_tail(&val_buf->head, &sw_context->validate_nodes);
+		++sw_context->cur_val_buf;
+	}
+
+	val_buf->new_sync_obj_arg = (void *)
+		((unsigned long) val_buf->new_sync_obj_arg | fence_flags);
+	sw_context->fence_flags |= fence_flags;
+
+	if (p_val_node)
+		*p_val_node = val_node;
+
+	return 0;
 }
 
 static int vmw_cmd_cid_check(struct vmw_private *dev_priv,
@@ -94,7 +130,10 @@ static int vmw_cmd_cid_check(struct vmw_private *dev_priv,
 
 	sw_context->last_cid = cmd->cid;
 	sw_context->cid_valid = true;
-	return vmw_resource_to_validate_list(sw_context, &ctx);
+	sw_context->cur_ctx = ctx;
+	vmw_resource_to_validate_list(sw_context, &ctx);
+
+	return 0;
 }
 
 static int vmw_cmd_sid_check(struct vmw_private *dev_priv,
@@ -114,7 +153,8 @@ static int vmw_cmd_sid_check(struct vmw_private *dev_priv,
 		return 0;
 	}
 
-	ret = vmw_user_surface_lookup_handle(dev_priv, sw_context->tfile,
+	ret = vmw_user_surface_lookup_handle(dev_priv,
+					     sw_context->tfile,
 					     *sid, &srf);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Could ot find or use surface 0x%08x "
@@ -124,13 +164,23 @@ static int vmw_cmd_sid_check(struct vmw_private *dev_priv,
 		return ret;
 	}
 
+	ret = vmw_surface_validate(dev_priv, srf);
+	if (unlikely(ret != 0)) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("Could not validate surface.\n");
+		vmw_surface_unreference(&srf);
+		return ret;
+	}
+
 	sw_context->last_sid = *sid;
 	sw_context->sid_valid = true;
 	sw_context->sid_translation = srf->res.id;
 	*sid = sw_context->sid_translation;
 
 	res = &srf->res;
-	return vmw_resource_to_validate_list(sw_context, &res);
+	vmw_resource_to_validate_list(sw_context, &res);
+
+	return 0;
 }
 
 
@@ -197,6 +247,12 @@ static int vmw_cmd_blt_surf_screen_check(struct vmw_private *dev_priv,
 	} *cmd;
 
 	cmd = container_of(header, struct vmw_sid_cmd, header);
+
+	if (unlikely(!sw_context->kernel)) {
+		DRM_ERROR("Kernel only SVGA3d command: %u.\n", cmd->header.id);
+		return -EPERM;
+	}
+
 	return vmw_cmd_sid_check(dev_priv, sw_context, &cmd->body.srcImage.sid);
 }
 
@@ -209,10 +265,179 @@ static int vmw_cmd_present_check(struct vmw_private *dev_priv,
 		SVGA3dCmdPresent body;
 	} *cmd;
 
+
 	cmd = container_of(header, struct vmw_sid_cmd, header);
+
+	if (unlikely(!sw_context->kernel)) {
+		DRM_ERROR("Kernel only SVGA3d command: %u.\n", cmd->header.id);
+		return -EPERM;
+	}
+
 	return vmw_cmd_sid_check(dev_priv, sw_context, &cmd->body.sid);
 }
 
+/**
+ * vmw_query_bo_switch_prepare - Prepare to switch pinned buffer for queries.
+ *
+ * @dev_priv: The device private structure.
+ * @cid: The hardware context for the next query.
+ * @new_query_bo: The new buffer holding query results.
+ * @sw_context: The software context used for this command submission.
+ *
+ * This function checks whether @new_query_bo is suitable for holding
+ * query results, and if another buffer currently is pinned for query
+ * results. If so, the function prepares the state of @sw_context for
+ * switching pinned buffers after successful submission of the current
+ * command batch. It also checks whether we're using a new query context.
+ * In that case, it makes sure we emit a query barrier for the old
+ * context before the current query buffer is fenced.
+ */
+static int vmw_query_bo_switch_prepare(struct vmw_private *dev_priv,
+				       uint32_t cid,
+				       struct ttm_buffer_object *new_query_bo,
+				       struct vmw_sw_context *sw_context)
+{
+	int ret;
+	bool add_cid = false;
+	uint32_t cid_to_add;
+
+	if (unlikely(new_query_bo != sw_context->cur_query_bo)) {
+
+		if (unlikely(new_query_bo->num_pages > 4)) {
+			DRM_ERROR("Query buffer too large.\n");
+			return -EINVAL;
+		}
+
+		if (unlikely(sw_context->cur_query_bo != NULL)) {
+			BUG_ON(!sw_context->query_cid_valid);
+			add_cid = true;
+			cid_to_add = sw_context->cur_query_cid;
+			ret = vmw_bo_to_validate_list(sw_context,
+						      sw_context->cur_query_bo,
+						      DRM_VMW_FENCE_FLAG_EXEC,
+						      NULL);
+			if (unlikely(ret != 0))
+				return ret;
+		}
+		sw_context->cur_query_bo = new_query_bo;
+
+		ret = vmw_bo_to_validate_list(sw_context,
+					      dev_priv->dummy_query_bo,
+					      DRM_VMW_FENCE_FLAG_EXEC,
+					      NULL);
+		if (unlikely(ret != 0))
+			return ret;
+
+	}
+
+	if (unlikely(cid != sw_context->cur_query_cid &&
+		     sw_context->query_cid_valid)) {
+		add_cid = true;
+		cid_to_add = sw_context->cur_query_cid;
+	}
+
+	sw_context->cur_query_cid = cid;
+	sw_context->query_cid_valid = true;
+
+	if (add_cid) {
+		struct vmw_resource *ctx = sw_context->cur_ctx;
+
+		if (list_empty(&ctx->query_head))
+			list_add_tail(&ctx->query_head,
+				      &sw_context->query_list);
+		ret = vmw_bo_to_validate_list(sw_context,
+					      dev_priv->dummy_query_bo,
+					      DRM_VMW_FENCE_FLAG_EXEC,
+					      NULL);
+		if (unlikely(ret != 0))
+			return ret;
+	}
+	return 0;
+}
+
+
+/**
+ * vmw_query_bo_switch_commit - Finalize switching pinned query buffer
+ *
+ * @dev_priv: The device private structure.
+ * @sw_context: The software context used for this command submission batch.
+ *
+ * This function will check if we're switching query buffers, and will then,
+ * if no other query waits are issued this command submission batch,
+ * issue a dummy occlusion query wait used as a query barrier. When the fence
+ * object following that query wait has signaled, we are sure that all
+ * preseding queries have finished, and the old query buffer can be unpinned.
+ * However, since both the new query buffer and the old one are fenced with
+ * that fence, we can do an asynchronus unpin now, and be sure that the
+ * old query buffer won't be moved until the fence has signaled.
+ *
+ * As mentioned above, both the new - and old query buffers need to be fenced
+ * using a sequence emitted *after* calling this function.
+ */
+static void vmw_query_bo_switch_commit(struct vmw_private *dev_priv,
+				     struct vmw_sw_context *sw_context)
+{
+
+	struct vmw_resource *ctx, *next_ctx;
+	int ret;
+
+	/*
+	 * The validate list should still hold references to all
+	 * contexts here.
+	 */
+
+	list_for_each_entry_safe(ctx, next_ctx, &sw_context->query_list,
+				 query_head) {
+		list_del_init(&ctx->query_head);
+
+		BUG_ON(list_empty(&ctx->validate_head));
+
+		ret = vmw_fifo_emit_dummy_query(dev_priv, ctx->id);
+
+		if (unlikely(ret != 0))
+			DRM_ERROR("Out of fifo space for dummy query.\n");
+	}
+
+	if (dev_priv->pinned_bo != sw_context->cur_query_bo) {
+		if (dev_priv->pinned_bo) {
+			vmw_bo_pin(dev_priv->pinned_bo, false);
+			ttm_bo_unref(&dev_priv->pinned_bo);
+		}
+
+		vmw_bo_pin(sw_context->cur_query_bo, true);
+
+		/*
+		 * We pin also the dummy_query_bo buffer so that we
+		 * don't need to validate it when emitting
+		 * dummy queries in context destroy paths.
+		 */
+
+		vmw_bo_pin(dev_priv->dummy_query_bo, true);
+		dev_priv->dummy_query_bo_pinned = true;
+
+		dev_priv->query_cid = sw_context->cur_query_cid;
+		dev_priv->pinned_bo =
+			ttm_bo_reference(sw_context->cur_query_bo);
+	}
+}
+
+/**
+ * vmw_query_switch_backoff - clear query barrier list
+ * @sw_context: The sw context used for this submission batch.
+ *
+ * This function is used as part of an error path, where a previously
+ * set up list of query barriers needs to be cleared.
+ *
+ */
+static void vmw_query_switch_backoff(struct vmw_sw_context *sw_context)
+{
+	struct list_head *list, *next;
+
+	list_for_each_safe(list, next, &sw_context->query_list) {
+		list_del_init(list);
+	}
+}
+
 static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
 				   struct vmw_sw_context *sw_context,
 				   SVGAGuestPtr *ptr,
@@ -222,8 +447,6 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
 	struct ttm_buffer_object *bo;
 	uint32_t handle = ptr->gmrId;
 	struct vmw_relocation *reloc;
-	uint32_t cur_validate_node;
-	struct ttm_validate_buffer *val_buf;
 	int ret;
 
 	ret = vmw_user_dmabuf_lookup(sw_context->tfile, handle, &vmw_bo);
@@ -243,23 +466,11 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
 	reloc = &sw_context->relocs[sw_context->cur_reloc++];
 	reloc->location = ptr;
 
-	cur_validate_node = vmw_dmabuf_validate_node(bo, sw_context->cur_val_buf);
-	if (unlikely(cur_validate_node >= VMWGFX_MAX_VALIDATIONS)) {
-		DRM_ERROR("Max number of DMA buffers per submission"
-			  " exceeded.\n");
-		ret = -EINVAL;
+	ret = vmw_bo_to_validate_list(sw_context, bo, DRM_VMW_FENCE_FLAG_EXEC,
+				      &reloc->index);
+	if (unlikely(ret != 0))
 		goto out_no_reloc;
-	}
 
-	reloc->index = cur_validate_node;
-	if (unlikely(cur_validate_node == sw_context->cur_val_buf)) {
-		val_buf = &sw_context->val_bufs[cur_validate_node];
-		val_buf->bo = ttm_bo_reference(bo);
-		val_buf->usage = TTM_USAGE_READWRITE;
-		val_buf->new_sync_obj_arg = (void *) DRM_VMW_FENCE_FLAG_EXEC;
-		list_add_tail(&val_buf->head, &sw_context->validate_nodes);
-		++sw_context->cur_val_buf;
-	}
 	*vmw_bo_p = vmw_bo;
 	return 0;
 
@@ -291,8 +502,11 @@ static int vmw_cmd_end_query(struct vmw_private *dev_priv,
 	if (unlikely(ret != 0))
 		return ret;
 
+	ret = vmw_query_bo_switch_prepare(dev_priv, cmd->q.cid,
+					  &vmw_bo->base, sw_context);
+
 	vmw_dmabuf_unreference(&vmw_bo);
-	return 0;
+	return ret;
 }
 
 static int vmw_cmd_wait_query(struct vmw_private *dev_priv,
@@ -305,6 +519,7 @@ static int vmw_cmd_wait_query(struct vmw_private *dev_priv,
 		SVGA3dCmdWaitForQuery q;
 	} *cmd;
 	int ret;
+	struct vmw_resource *ctx;
 
 	cmd = container_of(header, struct vmw_query_cmd, header);
 	ret = vmw_cmd_cid_check(dev_priv, sw_context, header);
@@ -318,6 +533,16 @@ static int vmw_cmd_wait_query(struct vmw_private *dev_priv,
 		return ret;
 
 	vmw_dmabuf_unreference(&vmw_bo);
+
+	/*
+	 * This wait will act as a barrier for previous waits for this
+	 * context.
+	 */
+
+	ctx = sw_context->cur_ctx;
+	if (!list_empty(&ctx->query_head))
+		list_del_init(&ctx->query_head);
+
 	return 0;
 }
 
@@ -350,6 +575,13 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv,
 		goto out_no_reloc;
 	}
 
+	ret = vmw_surface_validate(dev_priv, srf);
+	if (unlikely(ret != 0)) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("Culd not validate surface.\n");
+		goto out_no_validate;
+	}
+
 	/*
 	 * Patch command stream with device SID.
 	 */
@@ -359,8 +591,12 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv,
 	vmw_dmabuf_unreference(&vmw_bo);
 
 	res = &srf->res;
-	return vmw_resource_to_validate_list(sw_context, &res);
+	vmw_resource_to_validate_list(sw_context, &res);
+
+	return 0;
 
+out_no_validate:
+	vmw_surface_unreference(&srf);
 out_no_reloc:
 	vmw_dmabuf_unreference(&vmw_bo);
 	return ret;
@@ -450,6 +686,71 @@ static int vmw_cmd_tex_state(struct vmw_private *dev_priv,
 	return 0;
 }
 
+static int vmw_cmd_check_define_gmrfb(struct vmw_private *dev_priv,
+				      struct vmw_sw_context *sw_context,
+				      void *buf)
+{
+	struct vmw_dma_buffer *vmw_bo;
+	int ret;
+
+	struct {
+		uint32_t header;
+		SVGAFifoCmdDefineGMRFB body;
+	} *cmd = buf;
+
+	ret = vmw_translate_guest_ptr(dev_priv, sw_context,
+				      &cmd->body.ptr,
+				      &vmw_bo);
+	if (unlikely(ret != 0))
+		return ret;
+
+	vmw_dmabuf_unreference(&vmw_bo);
+
+	return ret;
+}
+
+static int vmw_cmd_check_not_3d(struct vmw_private *dev_priv,
+				struct vmw_sw_context *sw_context,
+				void *buf, uint32_t *size)
+{
+	uint32_t size_remaining = *size;
+	uint32_t cmd_id;
+
+	cmd_id = le32_to_cpu(((uint32_t *)buf)[0]);
+	switch (cmd_id) {
+	case SVGA_CMD_UPDATE:
+		*size = sizeof(uint32_t) + sizeof(SVGAFifoCmdUpdate);
+		break;
+	case SVGA_CMD_DEFINE_GMRFB:
+		*size = sizeof(uint32_t) + sizeof(SVGAFifoCmdDefineGMRFB);
+		break;
+	case SVGA_CMD_BLIT_GMRFB_TO_SCREEN:
+		*size = sizeof(uint32_t) + sizeof(SVGAFifoCmdBlitGMRFBToScreen);
+		break;
+	case SVGA_CMD_BLIT_SCREEN_TO_GMRFB:
+		*size = sizeof(uint32_t) + sizeof(SVGAFifoCmdBlitGMRFBToScreen);
+		break;
+	default:
+		DRM_ERROR("Unsupported SVGA command: %u.\n", cmd_id);
+		return -EINVAL;
+	}
+
+	if (*size > size_remaining) {
+		DRM_ERROR("Invalid SVGA command (size mismatch):"
+			  " %u.\n", cmd_id);
+		return -EINVAL;
+	}
+
+	if (unlikely(!sw_context->kernel)) {
+		DRM_ERROR("Kernel only SVGA command: %u.\n", cmd_id);
+		return -EPERM;
+	}
+
+	if (cmd_id == SVGA_CMD_DEFINE_GMRFB)
+		return vmw_cmd_check_define_gmrfb(dev_priv, sw_context, buf);
+
+	return 0;
+}
 
 typedef int (*vmw_cmd_func) (struct vmw_private *,
 			     struct vmw_sw_context *,
@@ -502,11 +803,11 @@ static int vmw_cmd_check(struct vmw_private *dev_priv,
 	SVGA3dCmdHeader *header = (SVGA3dCmdHeader *) buf;
 	int ret;
 
-	cmd_id = ((uint32_t *)buf)[0];
-	if (cmd_id == SVGA_CMD_UPDATE) {
-		*size = 5 << 2;
-		return 0;
-	}
+	cmd_id = le32_to_cpu(((uint32_t *)buf)[0]);
+	/* Handle any none 3D commands */
+	if (unlikely(cmd_id < SVGA_CMD_MAX))
+		return vmw_cmd_check_not_3d(dev_priv, sw_context, buf, size);
+
 
 	cmd_id = le32_to_cpu(header->id);
 	*size = le32_to_cpu(header->size) + sizeof(SVGA3dCmdHeader);
@@ -531,9 +832,9 @@ out_err:
 
 static int vmw_cmd_check_all(struct vmw_private *dev_priv,
 			     struct vmw_sw_context *sw_context,
+			     void *buf,
 			     uint32_t size)
 {
-	void *buf = sw_context->cmd_bounce;
 	int32_t cur_size = size;
 	int ret;
 
@@ -582,7 +883,7 @@ static void vmw_apply_relocations(struct vmw_sw_context *sw_context)
 static void vmw_clear_validations(struct vmw_sw_context *sw_context)
 {
 	struct ttm_validate_buffer *entry, *next;
-	uint32_t i = sw_context->num_ref_resources;
+	struct vmw_resource *res, *res_next;
 
 	/*
 	 * Drop references to DMA buffers held during command submission.
@@ -599,9 +900,11 @@ static void vmw_clear_validations(struct vmw_sw_context *sw_context)
 	/*
 	 * Drop references to resources held during command submission.
 	 */
-	while (i-- > 0) {
-		sw_context->resources[i]->on_validate_list = false;
-		vmw_resource_unreference(&sw_context->resources[i]);
+	vmw_resource_unreserve(&sw_context->resource_list);
+	list_for_each_entry_safe(res, res_next, &sw_context->resource_list,
+				 validate_head) {
+		list_del_init(&res->validate_head);
+		vmw_resource_unreference(&res);
 	}
 }
 
@@ -610,6 +913,16 @@ static int vmw_validate_single_buffer(struct vmw_private *dev_priv,
 {
 	int ret;
 
+
+	/*
+	 * Don't validate pinned buffers.
+	 */
+
+	if (bo == dev_priv->pinned_bo ||
+	    (bo == dev_priv->dummy_query_bo &&
+	     dev_priv->dummy_query_bo_pinned))
+		return 0;
+
 	/**
 	 * Put BO in VRAM if there is space, otherwise as a GMR.
 	 * If there is no space in VRAM and GMR ids are all used up,
@@ -681,6 +994,9 @@ static int vmw_resize_cmd_bounce(struct vmw_sw_context *sw_context,
  * Creates a fence object and submits a command stream marker.
  * If this fails for some reason, We sync the fifo and return NULL.
  * It is then safe to fence buffers with a NULL pointer.
+ *
+ * If @p_handle is not NULL @file_priv must also not be NULL. Creates
+ * a userspace handle if @p_handle is not NULL, otherwise not.
  */
 
 int vmw_execbuf_fence_commands(struct drm_file *file_priv,
@@ -692,6 +1008,8 @@ int vmw_execbuf_fence_commands(struct drm_file *file_priv,
 	int ret;
 	bool synced = false;
 
+	/* p_handle implies file_priv. */
+	BUG_ON(p_handle != NULL && file_priv == NULL);
 
 	ret = vmw_fifo_send_fence(dev_priv, &sequence);
 	if (unlikely(ret != 0)) {
@@ -719,69 +1037,127 @@ int vmw_execbuf_fence_commands(struct drm_file *file_priv,
 	return 0;
 }
 
-int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
-		      struct drm_file *file_priv)
+/**
+ * vmw_execbuf_copy_fence_user - copy fence object information to
+ * user-space.
+ *
+ * @dev_priv: Pointer to a vmw_private struct.
+ * @vmw_fp: Pointer to the struct vmw_fpriv representing the calling file.
+ * @ret: Return value from fence object creation.
+ * @user_fence_rep: User space address of a struct drm_vmw_fence_rep to
+ * which the information should be copied.
+ * @fence: Pointer to the fenc object.
+ * @fence_handle: User-space fence handle.
+ *
+ * This function copies fence information to user-space. If copying fails,
+ * The user-space struct drm_vmw_fence_rep::error member is hopefully
+ * left untouched, and if it's preloaded with an -EFAULT by user-space,
+ * the error will hopefully be detected.
+ * Also if copying fails, user-space will be unable to signal the fence
+ * object so we wait for it immediately, and then unreference the
+ * user-space reference.
+ */
+void
+vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
+			    struct vmw_fpriv *vmw_fp,
+			    int ret,
+			    struct drm_vmw_fence_rep __user *user_fence_rep,
+			    struct vmw_fence_obj *fence,
+			    uint32_t fence_handle)
 {
-	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct drm_vmw_execbuf_arg *arg = (struct drm_vmw_execbuf_arg *)data;
 	struct drm_vmw_fence_rep fence_rep;
-	struct drm_vmw_fence_rep __user *user_fence_rep;
-	int ret;
-	void *user_cmd;
-	void *cmd;
-	struct vmw_sw_context *sw_context = &dev_priv->ctx;
-	struct vmw_master *vmaster = vmw_master(file_priv->master);
-	struct vmw_fence_obj *fence;
-	uint32_t handle;
+
+	if (user_fence_rep == NULL)
+		return;
+
+	memset(&fence_rep, 0, sizeof(fence_rep));
+
+	fence_rep.error = ret;
+	if (ret == 0) {
+		BUG_ON(fence == NULL);
+
+		fence_rep.handle = fence_handle;
+		fence_rep.seqno = fence->seqno;
+		vmw_update_seqno(dev_priv, &dev_priv->fifo);
+		fence_rep.passed_seqno = dev_priv->last_read_seqno;
+	}
 
 	/*
-	 * This will allow us to extend the ioctl argument while
-	 * maintaining backwards compatibility:
-	 * We take different code paths depending on the value of
-	 * arg->version.
+	 * copy_to_user errors will be detected by user space not
+	 * seeing fence_rep::error filled in. Typically
+	 * user-space would have pre-set that member to -EFAULT.
 	 */
+	ret = copy_to_user(user_fence_rep, &fence_rep,
+			   sizeof(fence_rep));
 
-	if (unlikely(arg->version != DRM_VMW_EXECBUF_VERSION)) {
-		DRM_ERROR("Incorrect execbuf version.\n");
-		DRM_ERROR("You're running outdated experimental "
-			  "vmwgfx user-space drivers.");
-		return -EINVAL;
+	/*
+	 * User-space lost the fence object. We need to sync
+	 * and unreference the handle.
+	 */
+	if (unlikely(ret != 0) && (fence_rep.error == 0)) {
+		ttm_ref_object_base_unref(vmw_fp->tfile,
+					  fence_handle, TTM_REF_USAGE);
+		DRM_ERROR("Fence copy error. Syncing.\n");
+		(void) vmw_fence_obj_wait(fence, fence->signal_mask,
+					  false, false,
+					  VMW_FENCE_WAIT_TIMEOUT);
 	}
+}
 
-	ret = ttm_read_lock(&vmaster->lock, true);
-	if (unlikely(ret != 0))
-		return ret;
+int vmw_execbuf_process(struct drm_file *file_priv,
+			struct vmw_private *dev_priv,
+			void __user *user_commands,
+			void *kernel_commands,
+			uint32_t command_size,
+			uint64_t throttle_us,
+			struct drm_vmw_fence_rep __user *user_fence_rep)
+{
+	struct vmw_sw_context *sw_context = &dev_priv->ctx;
+	struct vmw_fence_obj *fence;
+	uint32_t handle;
+	void *cmd;
+	int ret;
 
 	ret = mutex_lock_interruptible(&dev_priv->cmdbuf_mutex);
-	if (unlikely(ret != 0)) {
-		ret = -ERESTARTSYS;
-		goto out_no_cmd_mutex;
-	}
-
-	ret = vmw_resize_cmd_bounce(sw_context, arg->command_size);
 	if (unlikely(ret != 0))
-		goto out_unlock;
+		return -ERESTARTSYS;
 
-	user_cmd = (void __user *)(unsigned long)arg->commands;
-	ret = copy_from_user(sw_context->cmd_bounce,
-			     user_cmd, arg->command_size);
+	if (kernel_commands == NULL) {
+		sw_context->kernel = false;
 
-	if (unlikely(ret != 0)) {
-		ret = -EFAULT;
-		DRM_ERROR("Failed copying commands.\n");
-		goto out_unlock;
-	}
+		ret = vmw_resize_cmd_bounce(sw_context, command_size);
+		if (unlikely(ret != 0))
+			goto out_unlock;
+
+
+		ret = copy_from_user(sw_context->cmd_bounce,
+				     user_commands, command_size);
+
+		if (unlikely(ret != 0)) {
+			ret = -EFAULT;
+			DRM_ERROR("Failed copying commands.\n");
+			goto out_unlock;
+		}
+		kernel_commands = sw_context->cmd_bounce;
+	} else
+		sw_context->kernel = true;
 
 	sw_context->tfile = vmw_fpriv(file_priv)->tfile;
 	sw_context->cid_valid = false;
 	sw_context->sid_valid = false;
 	sw_context->cur_reloc = 0;
 	sw_context->cur_val_buf = 0;
-	sw_context->num_ref_resources = 0;
+	sw_context->fence_flags = 0;
+	INIT_LIST_HEAD(&sw_context->query_list);
+	INIT_LIST_HEAD(&sw_context->resource_list);
+	sw_context->cur_query_bo = dev_priv->pinned_bo;
+	sw_context->cur_query_cid = dev_priv->query_cid;
+	sw_context->query_cid_valid = (dev_priv->pinned_bo != NULL);
 
 	INIT_LIST_HEAD(&sw_context->validate_nodes);
 
-	ret = vmw_cmd_check_all(dev_priv, sw_context, arg->command_size);
+	ret = vmw_cmd_check_all(dev_priv, sw_context, kernel_commands,
+				command_size);
 	if (unlikely(ret != 0))
 		goto out_err;
 
@@ -795,26 +1171,25 @@ int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
 
 	vmw_apply_relocations(sw_context);
 
-	if (arg->throttle_us) {
+	if (throttle_us) {
 		ret = vmw_wait_lag(dev_priv, &dev_priv->fifo.marker_queue,
-				   arg->throttle_us);
+				   throttle_us);
 
 		if (unlikely(ret != 0))
 			goto out_throttle;
 	}
 
-	cmd = vmw_fifo_reserve(dev_priv, arg->command_size);
+	cmd = vmw_fifo_reserve(dev_priv, command_size);
 	if (unlikely(cmd == NULL)) {
 		DRM_ERROR("Failed reserving fifo space for commands.\n");
 		ret = -ENOMEM;
-		goto out_err;
+		goto out_throttle;
 	}
 
-	memcpy(cmd, sw_context->cmd_bounce, arg->command_size);
-	vmw_fifo_commit(dev_priv, arg->command_size);
+	memcpy(cmd, kernel_commands, command_size);
+	vmw_fifo_commit(dev_priv, command_size);
 
-	user_fence_rep = (struct drm_vmw_fence_rep __user *)
-		(unsigned long)arg->fence_rep;
+	vmw_query_bo_switch_commit(dev_priv, sw_context);
 	ret = vmw_execbuf_fence_commands(file_priv, dev_priv,
 					 &fence,
 					 (user_fence_rep) ? &handle : NULL);
@@ -831,54 +1206,171 @@ int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
 				    (void *) fence);
 
 	vmw_clear_validations(sw_context);
-	mutex_unlock(&dev_priv->cmdbuf_mutex);
-
-	if (user_fence_rep) {
-		fence_rep.error = ret;
-		fence_rep.handle = handle;
-		fence_rep.seqno = fence->seqno;
-		vmw_update_seqno(dev_priv, &dev_priv->fifo);
-		fence_rep.passed_seqno = dev_priv->last_read_seqno;
-
-		/*
-		 * copy_to_user errors will be detected by user space not
-		 * seeing fence_rep::error filled in. Typically
-		 * user-space would have pre-set that member to -EFAULT.
-		 */
-		ret = copy_to_user(user_fence_rep, &fence_rep,
-				   sizeof(fence_rep));
-
-		/*
-		 * User-space lost the fence object. We need to sync
-		 * and unreference the handle.
-		 */
-		if (unlikely(ret != 0) && (fence_rep.error == 0)) {
-			BUG_ON(fence == NULL);
-
-			ttm_ref_object_base_unref(vmw_fpriv(file_priv)->tfile,
-						  handle, TTM_REF_USAGE);
-			DRM_ERROR("Fence copy error. Syncing.\n");
-			(void) vmw_fence_obj_wait(fence,
-						  fence->signal_mask,
-						  false, false,
-						  VMW_FENCE_WAIT_TIMEOUT);
-		}
-	}
+	vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret,
+				    user_fence_rep, fence, handle);
 
 	if (likely(fence != NULL))
 		vmw_fence_obj_unreference(&fence);
 
-	vmw_kms_cursor_post_execbuf(dev_priv);
-	ttm_read_unlock(&vmaster->lock);
+	mutex_unlock(&dev_priv->cmdbuf_mutex);
 	return 0;
+
 out_err:
 	vmw_free_relocations(sw_context);
 out_throttle:
+	vmw_query_switch_backoff(sw_context);
 	ttm_eu_backoff_reservation(&sw_context->validate_nodes);
 	vmw_clear_validations(sw_context);
 out_unlock:
 	mutex_unlock(&dev_priv->cmdbuf_mutex);
-out_no_cmd_mutex:
+	return ret;
+}
+
+/**
+ * vmw_execbuf_unpin_panic - Idle the fifo and unpin the query buffer.
+ *
+ * @dev_priv: The device private structure.
+ *
+ * This function is called to idle the fifo and unpin the query buffer
+ * if the normal way to do this hits an error, which should typically be
+ * extremely rare.
+ */
+static void vmw_execbuf_unpin_panic(struct vmw_private *dev_priv)
+{
+	DRM_ERROR("Can't unpin query buffer. Trying to recover.\n");
+
+	(void) vmw_fallback_wait(dev_priv, false, true, 0, false, 10*HZ);
+	vmw_bo_pin(dev_priv->pinned_bo, false);
+	vmw_bo_pin(dev_priv->dummy_query_bo, false);
+	dev_priv->dummy_query_bo_pinned = false;
+}
+
+
+/**
+ * vmw_execbuf_release_pinned_bo - Flush queries and unpin the pinned
+ * query bo.
+ *
+ * @dev_priv: The device private structure.
+ * @only_on_cid_match: Only flush and unpin if the current active query cid
+ * matches @cid.
+ * @cid: Optional context id to match.
+ *
+ * This function should be used to unpin the pinned query bo, or
+ * as a query barrier when we need to make sure that all queries have
+ * finished before the next fifo command. (For example on hardware
+ * context destructions where the hardware may otherwise leak unfinished
+ * queries).
+ *
+ * This function does not return any failure codes, but make attempts
+ * to do safe unpinning in case of errors.
+ *
+ * The function will synchronize on the previous query barrier, and will
+ * thus not finish until that barrier has executed.
+ */
+void vmw_execbuf_release_pinned_bo(struct vmw_private *dev_priv,
+				   bool only_on_cid_match, uint32_t cid)
+{
+	int ret = 0;
+	struct list_head validate_list;
+	struct ttm_validate_buffer pinned_val, query_val;
+	struct vmw_fence_obj *fence;
+
+	mutex_lock(&dev_priv->cmdbuf_mutex);
+
+	if (dev_priv->pinned_bo == NULL)
+		goto out_unlock;
+
+	if (only_on_cid_match && cid != dev_priv->query_cid)
+		goto out_unlock;
+
+	INIT_LIST_HEAD(&validate_list);
+
+	pinned_val.new_sync_obj_arg = (void *)(unsigned long)
+		DRM_VMW_FENCE_FLAG_EXEC;
+	pinned_val.bo = ttm_bo_reference(dev_priv->pinned_bo);
+	list_add_tail(&pinned_val.head, &validate_list);
+
+	query_val.new_sync_obj_arg = pinned_val.new_sync_obj_arg;
+	query_val.bo = ttm_bo_reference(dev_priv->dummy_query_bo);
+	list_add_tail(&query_val.head, &validate_list);
+
+	do {
+		ret = ttm_eu_reserve_buffers(&validate_list);
+	} while (ret == -ERESTARTSYS);
+
+	if (unlikely(ret != 0)) {
+		vmw_execbuf_unpin_panic(dev_priv);
+		goto out_no_reserve;
+	}
+
+	ret = vmw_fifo_emit_dummy_query(dev_priv, dev_priv->query_cid);
+	if (unlikely(ret != 0)) {
+		vmw_execbuf_unpin_panic(dev_priv);
+		goto out_no_emit;
+	}
+
+	vmw_bo_pin(dev_priv->pinned_bo, false);
+	vmw_bo_pin(dev_priv->dummy_query_bo, false);
+	dev_priv->dummy_query_bo_pinned = false;
+
+	(void) vmw_execbuf_fence_commands(NULL, dev_priv, &fence, NULL);
+	ttm_eu_fence_buffer_objects(&validate_list, (void *) fence);
+
+	ttm_bo_unref(&query_val.bo);
+	ttm_bo_unref(&pinned_val.bo);
+	ttm_bo_unref(&dev_priv->pinned_bo);
+
+out_unlock:
+	mutex_unlock(&dev_priv->cmdbuf_mutex);
+	return;
+
+out_no_emit:
+	ttm_eu_backoff_reservation(&validate_list);
+out_no_reserve:
+	ttm_bo_unref(&query_val.bo);
+	ttm_bo_unref(&pinned_val.bo);
+	ttm_bo_unref(&dev_priv->pinned_bo);
+	mutex_unlock(&dev_priv->cmdbuf_mutex);
+}
+
+
+int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
+		      struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct drm_vmw_execbuf_arg *arg = (struct drm_vmw_execbuf_arg *)data;
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
+	int ret;
+
+	/*
+	 * This will allow us to extend the ioctl argument while
+	 * maintaining backwards compatibility:
+	 * We take different code paths depending on the value of
+	 * arg->version.
+	 */
+
+	if (unlikely(arg->version != DRM_VMW_EXECBUF_VERSION)) {
+		DRM_ERROR("Incorrect execbuf version.\n");
+		DRM_ERROR("You're running outdated experimental "
+			  "vmwgfx user-space drivers.");
+		return -EINVAL;
+	}
+
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = vmw_execbuf_process(file_priv, dev_priv,
+				  (void __user *)(unsigned long)arg->commands,
+				  NULL, arg->command_size, arg->throttle_us,
+				  (void __user *)(unsigned long)arg->fence_rep);
+
+	if (unlikely(ret != 0))
+		goto out_unlock;
+
+	vmw_kms_cursor_post_execbuf(dev_priv);
+
+out_unlock:
 	ttm_read_unlock(&vmaster->lock);
 	return ret;
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index b1888e801e22..070797b7b03a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -592,58 +592,6 @@ int vmw_fb_close(struct vmw_private *vmw_priv)
 	return 0;
 }
 
-int vmw_dmabuf_from_vram(struct vmw_private *vmw_priv,
-			 struct vmw_dma_buffer *vmw_bo)
-{
-	struct ttm_buffer_object *bo = &vmw_bo->base;
-	int ret = 0;
-
-	ret = ttm_bo_reserve(bo, false, false, false, 0);
-	if (unlikely(ret != 0))
-		return ret;
-
-	ret = ttm_bo_validate(bo, &vmw_sys_placement, false, false, false);
-	ttm_bo_unreserve(bo);
-
-	return ret;
-}
-
-int vmw_dmabuf_to_start_of_vram(struct vmw_private *vmw_priv,
-				struct vmw_dma_buffer *vmw_bo)
-{
-	struct ttm_buffer_object *bo = &vmw_bo->base;
-	struct ttm_placement ne_placement = vmw_vram_ne_placement;
-	int ret = 0;
-
-	ne_placement.lpfn = bo->num_pages;
-
-	/* interuptable? */
-	ret = ttm_write_lock(&vmw_priv->active_master->lock, false);
-	if (unlikely(ret != 0))
-		return ret;
-
-	ret = ttm_bo_reserve(bo, false, false, false, 0);
-	if (unlikely(ret != 0))
-		goto err_unlock;
-
-	if (bo->mem.mem_type == TTM_PL_VRAM &&
-	    bo->mem.start < bo->num_pages &&
-	    bo->mem.start > 0)
-		(void) ttm_bo_validate(bo, &vmw_sys_placement, false,
-				       false, false);
-
-	ret = ttm_bo_validate(bo, &ne_placement, false, false, false);
-
-	/* Could probably bug on */
-	WARN_ON(bo->offset != 0);
-
-	ttm_bo_unreserve(bo);
-err_unlock:
-	ttm_write_unlock(&vmw_priv->active_master->lock);
-
-	return ret;
-}
-
 int vmw_fb_off(struct vmw_private *vmw_priv)
 {
 	struct fb_info *info;
@@ -665,7 +613,7 @@ int vmw_fb_off(struct vmw_private *vmw_priv)
 	par->bo_ptr = NULL;
 	ttm_bo_kunmap(&par->map);
 
-	vmw_dmabuf_from_vram(vmw_priv, par->vmw_bo);
+	vmw_dmabuf_unpin(vmw_priv, par->vmw_bo, false);
 
 	return 0;
 }
@@ -691,7 +639,7 @@ int vmw_fb_on(struct vmw_private *vmw_priv)
 	/* Make sure that all overlays are stoped when we take over */
 	vmw_overlay_stop_all(vmw_priv);
 
-	ret = vmw_dmabuf_to_start_of_vram(vmw_priv, par->vmw_bo);
+	ret = vmw_dmabuf_to_start_of_vram(vmw_priv, par->vmw_bo, true, false);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("could not move buffer to start of VRAM\n");
 		goto err_no_buffer;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 5065a140fdf8..15fb26088d68 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -34,13 +34,18 @@ struct vmw_fence_manager {
 	int num_fence_objects;
 	struct vmw_private *dev_priv;
 	spinlock_t lock;
-	u32 next_seqno;
 	struct list_head fence_list;
 	struct work_struct work;
 	u32 user_fence_size;
 	u32 fence_size;
+	u32 event_fence_action_size;
 	bool fifo_down;
 	struct list_head cleanup_list;
+	uint32_t pending_actions[VMW_ACTION_MAX];
+	struct mutex goal_irq_mutex;
+	bool goal_irq_on; /* Protected by @goal_irq_mutex */
+	bool seqno_valid; /* Protected by @lock, and may not be set to true
+			     without the @goal_irq_mutex held. */
 };
 
 struct vmw_user_fence {
@@ -49,8 +54,51 @@ struct vmw_user_fence {
 };
 
 /**
- * vmw_fence_destroy_locked
+ * struct vmw_event_fence_action - fence action that delivers a drm event.
  *
+ * @e: A struct drm_pending_event that controls the event delivery.
+ * @action: A struct vmw_fence_action to hook up to a fence.
+ * @fence: A referenced pointer to the fence to keep it alive while @action
+ * hangs on it.
+ * @dev: Pointer to a struct drm_device so we can access the event stuff.
+ * @kref: Both @e and @action has destructors, so we need to refcount.
+ * @size: Size accounted for this object.
+ * @tv_sec: If non-null, the variable pointed to will be assigned
+ * current time tv_sec val when the fence signals.
+ * @tv_usec: Must be set if @tv_sec is set, and the variable pointed to will
+ * be assigned the current time tv_usec val when the fence signals.
+ */
+struct vmw_event_fence_action {
+	struct drm_pending_event e;
+	struct vmw_fence_action action;
+	struct vmw_fence_obj *fence;
+	struct drm_device *dev;
+	struct kref kref;
+	uint32_t size;
+	uint32_t *tv_sec;
+	uint32_t *tv_usec;
+};
+
+/**
+ * Note on fencing subsystem usage of irqs:
+ * Typically the vmw_fences_update function is called
+ *
+ * a) When a new fence seqno has been submitted by the fifo code.
+ * b) On-demand when we have waiters. Sleeping waiters will switch on the
+ * ANY_FENCE irq and call vmw_fences_update function each time an ANY_FENCE
+ * irq is received. When the last fence waiter is gone, that IRQ is masked
+ * away.
+ *
+ * In situations where there are no waiters and we don't submit any new fences,
+ * fence objects may not be signaled. This is perfectly OK, since there are
+ * no consumers of the signaled data, but that is NOT ok when there are fence
+ * actions attached to a fence. The fencing subsystem then makes use of the
+ * FENCE_GOAL irq and sets the fence goal seqno to that of the next fence
+ * which has an action attached, and each time vmw_fences_update is called,
+ * the subsystem makes sure the fence goal seqno is updated.
+ *
+ * The fence goal seqno irq is on as long as there are unsignaled fence
+ * objects with actions attached to them.
  */
 
 static void vmw_fence_obj_destroy_locked(struct kref *kref)
@@ -85,24 +133,36 @@ static void vmw_fence_work_func(struct work_struct *work)
 		container_of(work, struct vmw_fence_manager, work);
 	struct list_head list;
 	struct vmw_fence_action *action, *next_action;
+	bool seqno_valid;
 
 	do {
 		INIT_LIST_HEAD(&list);
+		mutex_lock(&fman->goal_irq_mutex);
+
 		spin_lock_irq(&fman->lock);
 		list_splice_init(&fman->cleanup_list, &list);
+		seqno_valid = fman->seqno_valid;
 		spin_unlock_irq(&fman->lock);
 
+		if (!seqno_valid && fman->goal_irq_on) {
+			fman->goal_irq_on = false;
+			vmw_goal_waiter_remove(fman->dev_priv);
+		}
+		mutex_unlock(&fman->goal_irq_mutex);
+
 		if (list_empty(&list))
 			return;
 
 		/*
 		 * At this point, only we should be able to manipulate the
 		 * list heads of the actions we have on the private list.
+		 * hence fman::lock not held.
 		 */
 
 		list_for_each_entry_safe(action, next_action, &list, head) {
 			list_del_init(&action->head);
-			action->cleanup(action);
+			if (action->cleanup)
+				action->cleanup(action);
 		}
 	} while (1);
 }
@@ -122,6 +182,9 @@ struct vmw_fence_manager *vmw_fence_manager_init(struct vmw_private *dev_priv)
 	fman->fifo_down = true;
 	fman->user_fence_size = ttm_round_pot(sizeof(struct vmw_user_fence));
 	fman->fence_size = ttm_round_pot(sizeof(struct vmw_fence_obj));
+	fman->event_fence_action_size =
+		ttm_round_pot(sizeof(struct vmw_event_fence_action));
+	mutex_init(&fman->goal_irq_mutex);
 
 	return fman;
 }
@@ -177,6 +240,9 @@ out_unlock:
 
 struct vmw_fence_obj *vmw_fence_obj_reference(struct vmw_fence_obj *fence)
 {
+	if (unlikely(fence == NULL))
+		return NULL;
+
 	kref_get(&fence->kref);
 	return fence;
 }
@@ -191,8 +257,12 @@ struct vmw_fence_obj *vmw_fence_obj_reference(struct vmw_fence_obj *fence)
 void vmw_fence_obj_unreference(struct vmw_fence_obj **fence_p)
 {
 	struct vmw_fence_obj *fence = *fence_p;
-	struct vmw_fence_manager *fman = fence->fman;
+	struct vmw_fence_manager *fman;
 
+	if (unlikely(fence == NULL))
+		return;
+
+	fman = fence->fman;
 	*fence_p = NULL;
 	spin_lock_irq(&fman->lock);
 	BUG_ON(atomic_read(&fence->kref.refcount) == 0);
@@ -207,6 +277,7 @@ void vmw_fences_perform_actions(struct vmw_fence_manager *fman,
 
 	list_for_each_entry_safe(action, next_action, list, head) {
 		list_del_init(&action->head);
+		fman->pending_actions[action->type]--;
 		if (action->seq_passed != NULL)
 			action->seq_passed(action);
 
@@ -215,17 +286,101 @@ void vmw_fences_perform_actions(struct vmw_fence_manager *fman,
 		 * it will be performed by a worker task.
 		 */
 
-		if (action->cleanup != NULL)
-			list_add_tail(&action->head, &fman->cleanup_list);
+		list_add_tail(&action->head, &fman->cleanup_list);
 	}
 }
 
-void vmw_fences_update(struct vmw_fence_manager *fman, u32 seqno)
+/**
+ * vmw_fence_goal_new_locked - Figure out a new device fence goal
+ * seqno if needed.
+ *
+ * @fman: Pointer to a fence manager.
+ * @passed_seqno: The seqno the device currently signals as passed.
+ *
+ * This function should be called with the fence manager lock held.
+ * It is typically called when we have a new passed_seqno, and
+ * we might need to update the fence goal. It checks to see whether
+ * the current fence goal has already passed, and, in that case,
+ * scans through all unsignaled fences to get the next fence object with an
+ * action attached, and sets the seqno of that fence as a new fence goal.
+ *
+ * returns true if the device goal seqno was updated. False otherwise.
+ */
+static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman,
+				      u32 passed_seqno)
+{
+	u32 goal_seqno;
+	__le32 __iomem *fifo_mem;
+	struct vmw_fence_obj *fence;
+
+	if (likely(!fman->seqno_valid))
+		return false;
+
+	fifo_mem = fman->dev_priv->mmio_virt;
+	goal_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE_GOAL);
+	if (likely(passed_seqno - goal_seqno >= VMW_FENCE_WRAP))
+		return false;
+
+	fman->seqno_valid = false;
+	list_for_each_entry(fence, &fman->fence_list, head) {
+		if (!list_empty(&fence->seq_passed_actions)) {
+			fman->seqno_valid = true;
+			iowrite32(fence->seqno,
+				  fifo_mem + SVGA_FIFO_FENCE_GOAL);
+			break;
+		}
+	}
+
+	return true;
+}
+
+
+/**
+ * vmw_fence_goal_check_locked - Replace the device fence goal seqno if
+ * needed.
+ *
+ * @fence: Pointer to a struct vmw_fence_obj the seqno of which should be
+ * considered as a device fence goal.
+ *
+ * This function should be called with the fence manager lock held.
+ * It is typically called when an action has been attached to a fence to
+ * check whether the seqno of that fence should be used for a fence
+ * goal interrupt. This is typically needed if the current fence goal is
+ * invalid, or has a higher seqno than that of the current fence object.
+ *
+ * returns true if the device goal seqno was updated. False otherwise.
+ */
+static bool vmw_fence_goal_check_locked(struct vmw_fence_obj *fence)
+{
+	u32 goal_seqno;
+	__le32 __iomem *fifo_mem;
+
+	if (fence->signaled & DRM_VMW_FENCE_FLAG_EXEC)
+		return false;
+
+	fifo_mem = fence->fman->dev_priv->mmio_virt;
+	goal_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE_GOAL);
+	if (likely(fence->fman->seqno_valid &&
+		   goal_seqno - fence->seqno < VMW_FENCE_WRAP))
+		return false;
+
+	iowrite32(fence->seqno, fifo_mem + SVGA_FIFO_FENCE_GOAL);
+	fence->fman->seqno_valid = true;
+
+	return true;
+}
+
+void vmw_fences_update(struct vmw_fence_manager *fman)
 {
 	unsigned long flags;
 	struct vmw_fence_obj *fence, *next_fence;
 	struct list_head action_list;
+	bool needs_rerun;
+	uint32_t seqno, new_seqno;
+	__le32 __iomem *fifo_mem = fman->dev_priv->mmio_virt;
 
+	seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+rerun:
 	spin_lock_irqsave(&fman->lock, flags);
 	list_for_each_entry_safe(fence, next_fence, &fman->fence_list, head) {
 		if (seqno - fence->seqno < VMW_FENCE_WRAP) {
@@ -236,14 +391,30 @@ void vmw_fences_update(struct vmw_fence_manager *fman, u32 seqno)
 					 &action_list);
 			vmw_fences_perform_actions(fman, &action_list);
 			wake_up_all(&fence->queue);
-		}
-
+		} else
+			break;
 	}
+
+	needs_rerun = vmw_fence_goal_new_locked(fman, seqno);
+
 	if (!list_empty(&fman->cleanup_list))
 		(void) schedule_work(&fman->work);
 	spin_unlock_irqrestore(&fman->lock, flags);
-}
 
+	/*
+	 * Rerun if the fence goal seqno was updated, and the
+	 * hardware might have raced with that update, so that
+	 * we missed a fence_goal irq.
+	 */
+
+	if (unlikely(needs_rerun)) {
+		new_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+		if (new_seqno != seqno) {
+			seqno = new_seqno;
+			goto rerun;
+		}
+	}
+}
 
 bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence,
 			    uint32_t flags)
@@ -260,14 +431,8 @@ bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence,
 	if ((signaled & flags) == flags)
 		return 1;
 
-	if ((signaled & DRM_VMW_FENCE_FLAG_EXEC) == 0) {
-		struct vmw_private *dev_priv = fman->dev_priv;
-		__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
-		u32 seqno;
-
-		seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
-		vmw_fences_update(fman, seqno);
-	}
+	if ((signaled & DRM_VMW_FENCE_FLAG_EXEC) == 0)
+		vmw_fences_update(fman);
 
 	spin_lock_irqsave(&fman->lock, irq_flags);
 	signaled = fence->signaled;
@@ -617,3 +782,343 @@ int vmw_fence_obj_unref_ioctl(struct drm_device *dev, void *data,
 					 arg->handle,
 					 TTM_REF_USAGE);
 }
+
+/**
+ * vmw_event_fence_action_destroy
+ *
+ * @kref: The struct kref embedded in a struct vmw_event_fence_action.
+ *
+ * The vmw_event_fence_action destructor that may be called either after
+ * the fence action cleanup, or when the event is delivered.
+ * It frees both the vmw_event_fence_action struct and the actual
+ * event structure copied to user-space.
+ */
+static void vmw_event_fence_action_destroy(struct kref *kref)
+{
+	struct vmw_event_fence_action *eaction =
+		container_of(kref, struct vmw_event_fence_action, kref);
+	struct ttm_mem_global *mem_glob =
+		vmw_mem_glob(vmw_priv(eaction->dev));
+	uint32_t size = eaction->size;
+
+	kfree(eaction->e.event);
+	kfree(eaction);
+	ttm_mem_global_free(mem_glob, size);
+}
+
+
+/**
+ * vmw_event_fence_action_delivered
+ *
+ * @e: The struct drm_pending_event embedded in a struct
+ * vmw_event_fence_action.
+ *
+ * The struct drm_pending_event destructor that is called by drm
+ * once the event is delivered. Since we don't know whether this function
+ * will be called before or after the fence action destructor, we
+ * free a refcount and destroy if it becomes zero.
+ */
+static void vmw_event_fence_action_delivered(struct drm_pending_event *e)
+{
+	struct vmw_event_fence_action *eaction =
+		container_of(e, struct vmw_event_fence_action, e);
+
+	kref_put(&eaction->kref, vmw_event_fence_action_destroy);
+}
+
+
+/**
+ * vmw_event_fence_action_seq_passed
+ *
+ * @action: The struct vmw_fence_action embedded in a struct
+ * vmw_event_fence_action.
+ *
+ * This function is called when the seqno of the fence where @action is
+ * attached has passed. It queues the event on the submitter's event list.
+ * This function is always called from atomic context, and may be called
+ * from irq context. It ups a refcount reflecting that we now have two
+ * destructors.
+ */
+static void vmw_event_fence_action_seq_passed(struct vmw_fence_action *action)
+{
+	struct vmw_event_fence_action *eaction =
+		container_of(action, struct vmw_event_fence_action, action);
+	struct drm_device *dev = eaction->dev;
+	struct drm_file *file_priv = eaction->e.file_priv;
+	unsigned long irq_flags;
+
+	kref_get(&eaction->kref);
+	spin_lock_irqsave(&dev->event_lock, irq_flags);
+
+	if (likely(eaction->tv_sec != NULL)) {
+		struct timeval tv;
+
+		do_gettimeofday(&tv);
+		*eaction->tv_sec = tv.tv_sec;
+		*eaction->tv_usec = tv.tv_usec;
+	}
+
+	list_add_tail(&eaction->e.link, &file_priv->event_list);
+	wake_up_all(&file_priv->event_wait);
+	spin_unlock_irqrestore(&dev->event_lock, irq_flags);
+}
+
+/**
+ * vmw_event_fence_action_cleanup
+ *
+ * @action: The struct vmw_fence_action embedded in a struct
+ * vmw_event_fence_action.
+ *
+ * This function is the struct vmw_fence_action destructor. It's typically
+ * called from a workqueue.
+ */
+static void vmw_event_fence_action_cleanup(struct vmw_fence_action *action)
+{
+	struct vmw_event_fence_action *eaction =
+		container_of(action, struct vmw_event_fence_action, action);
+
+	vmw_fence_obj_unreference(&eaction->fence);
+	kref_put(&eaction->kref, vmw_event_fence_action_destroy);
+}
+
+
+/**
+ * vmw_fence_obj_add_action - Add an action to a fence object.
+ *
+ * @fence - The fence object.
+ * @action - The action to add.
+ *
+ * Note that the action callbacks may be executed before this function
+ * returns.
+ */
+void vmw_fence_obj_add_action(struct vmw_fence_obj *fence,
+			      struct vmw_fence_action *action)
+{
+	struct vmw_fence_manager *fman = fence->fman;
+	unsigned long irq_flags;
+	bool run_update = false;
+
+	mutex_lock(&fman->goal_irq_mutex);
+	spin_lock_irqsave(&fman->lock, irq_flags);
+
+	fman->pending_actions[action->type]++;
+	if (fence->signaled & DRM_VMW_FENCE_FLAG_EXEC) {
+		struct list_head action_list;
+
+		INIT_LIST_HEAD(&action_list);
+		list_add_tail(&action->head, &action_list);
+		vmw_fences_perform_actions(fman, &action_list);
+	} else {
+		list_add_tail(&action->head, &fence->seq_passed_actions);
+
+		/*
+		 * This function may set fman::seqno_valid, so it must
+		 * be run with the goal_irq_mutex held.
+		 */
+		run_update = vmw_fence_goal_check_locked(fence);
+	}
+
+	spin_unlock_irqrestore(&fman->lock, irq_flags);
+
+	if (run_update) {
+		if (!fman->goal_irq_on) {
+			fman->goal_irq_on = true;
+			vmw_goal_waiter_add(fman->dev_priv);
+		}
+		vmw_fences_update(fman);
+	}
+	mutex_unlock(&fman->goal_irq_mutex);
+
+}
+
+/**
+ * vmw_event_fence_action_create - Post an event for sending when a fence
+ * object seqno has passed.
+ *
+ * @file_priv: The file connection on which the event should be posted.
+ * @fence: The fence object on which to post the event.
+ * @event: Event to be posted. This event should've been alloced
+ * using k[mz]alloc, and should've been completely initialized.
+ * @interruptible: Interruptible waits if possible.
+ *
+ * As a side effect, the object pointed to by @event may have been
+ * freed when this function returns. If this function returns with
+ * an error code, the caller needs to free that object.
+ */
+
+int vmw_event_fence_action_create(struct drm_file *file_priv,
+				  struct vmw_fence_obj *fence,
+				  struct drm_event *event,
+				  uint32_t *tv_sec,
+				  uint32_t *tv_usec,
+				  bool interruptible)
+{
+	struct vmw_event_fence_action *eaction;
+	struct ttm_mem_global *mem_glob =
+		vmw_mem_glob(fence->fman->dev_priv);
+	struct vmw_fence_manager *fman = fence->fman;
+	uint32_t size = fman->event_fence_action_size +
+		ttm_round_pot(event->length);
+	int ret;
+
+	/*
+	 * Account for internal structure size as well as the
+	 * event size itself.
+	 */
+
+	ret = ttm_mem_global_alloc(mem_glob, size, false, interruptible);
+	if (unlikely(ret != 0))
+		return ret;
+
+	eaction = kzalloc(sizeof(*eaction), GFP_KERNEL);
+	if (unlikely(eaction == NULL)) {
+		ttm_mem_global_free(mem_glob, size);
+		return -ENOMEM;
+	}
+
+	eaction->e.event = event;
+	eaction->e.file_priv = file_priv;
+	eaction->e.destroy = vmw_event_fence_action_delivered;
+
+	eaction->action.seq_passed = vmw_event_fence_action_seq_passed;
+	eaction->action.cleanup = vmw_event_fence_action_cleanup;
+	eaction->action.type = VMW_ACTION_EVENT;
+
+	eaction->fence = vmw_fence_obj_reference(fence);
+	eaction->dev = fman->dev_priv->dev;
+	eaction->size = size;
+	eaction->tv_sec = tv_sec;
+	eaction->tv_usec = tv_usec;
+
+	kref_init(&eaction->kref);
+	vmw_fence_obj_add_action(fence, &eaction->action);
+
+	return 0;
+}
+
+int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct drm_vmw_fence_event_arg *arg =
+		(struct drm_vmw_fence_event_arg *) data;
+	struct vmw_fence_obj *fence = NULL;
+	struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
+	struct drm_vmw_fence_rep __user *user_fence_rep =
+		(struct drm_vmw_fence_rep __user *)(unsigned long)
+		arg->fence_rep;
+	uint32_t handle;
+	unsigned long irq_flags;
+	struct drm_vmw_event_fence *event;
+	int ret;
+
+	/*
+	 * Look up an existing fence object,
+	 * and if user-space wants a new reference,
+	 * add one.
+	 */
+	if (arg->handle) {
+		struct ttm_base_object *base =
+			ttm_base_object_lookup(vmw_fp->tfile, arg->handle);
+
+		if (unlikely(base == NULL)) {
+			DRM_ERROR("Fence event invalid fence object handle "
+				  "0x%08lx.\n",
+				  (unsigned long)arg->handle);
+			return -EINVAL;
+		}
+		fence = &(container_of(base, struct vmw_user_fence,
+				       base)->fence);
+		(void) vmw_fence_obj_reference(fence);
+
+		if (user_fence_rep != NULL) {
+			bool existed;
+
+			ret = ttm_ref_object_add(vmw_fp->tfile, base,
+						 TTM_REF_USAGE, &existed);
+			if (unlikely(ret != 0)) {
+				DRM_ERROR("Failed to reference a fence "
+					  "object.\n");
+				goto out_no_ref_obj;
+			}
+			handle = base->hash.key;
+		}
+		ttm_base_object_unref(&base);
+	}
+
+	/*
+	 * Create a new fence object.
+	 */
+	if (!fence) {
+		ret = vmw_execbuf_fence_commands(file_priv, dev_priv,
+						 &fence,
+						 (user_fence_rep) ?
+						 &handle : NULL);
+		if (unlikely(ret != 0)) {
+			DRM_ERROR("Fence event failed to create fence.\n");
+			return ret;
+		}
+	}
+
+	BUG_ON(fence == NULL);
+
+	spin_lock_irqsave(&dev->event_lock, irq_flags);
+
+	ret = (file_priv->event_space < sizeof(*event)) ? -EBUSY : 0;
+	if (likely(ret == 0))
+		file_priv->event_space -= sizeof(*event);
+
+	spin_unlock_irqrestore(&dev->event_lock, irq_flags);
+
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("Failed to allocate event space for this file.\n");
+		goto out_no_event_space;
+	}
+
+	event = kzalloc(sizeof(*event), GFP_KERNEL);
+	if (unlikely(event == NULL)) {
+		DRM_ERROR("Failed to allocate an event.\n");
+		goto out_no_event;
+	}
+
+	event->base.type = DRM_VMW_EVENT_FENCE_SIGNALED;
+	event->base.length = sizeof(*event);
+	event->user_data = arg->user_data;
+
+	if (arg->flags & DRM_VMW_FE_FLAG_REQ_TIME)
+		ret = vmw_event_fence_action_create(file_priv, fence,
+						    &event->base,
+						    &event->tv_sec,
+						    &event->tv_usec,
+						    true);
+	else
+		ret = vmw_event_fence_action_create(file_priv, fence,
+						    &event->base,
+						    NULL,
+						    NULL,
+						    true);
+
+	if (unlikely(ret != 0)) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("Failed to attach event to fence.\n");
+		goto out_no_attach;
+	}
+
+	vmw_execbuf_copy_fence_user(dev_priv, vmw_fp, 0, user_fence_rep, fence,
+				    handle);
+	vmw_fence_obj_unreference(&fence);
+	return 0;
+out_no_attach:
+	kfree(event);
+out_no_event:
+	spin_lock_irqsave(&dev->event_lock, irq_flags);
+	file_priv->event_space += sizeof(*event);
+	spin_unlock_irqrestore(&dev->event_lock, irq_flags);
+out_no_event_space:
+	if (user_fence_rep != NULL)
+		ttm_ref_object_base_unref(vmw_fpriv(file_priv)->tfile,
+					  handle, TTM_REF_USAGE);
+out_no_ref_obj:
+	vmw_fence_obj_unreference(&fence);
+	return ret;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
index 93074064aaf3..0854a2096b55 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
@@ -37,8 +37,14 @@ struct vmw_fence_manager;
  *
  *
  */
+enum vmw_action_type {
+	VMW_ACTION_EVENT = 0,
+	VMW_ACTION_MAX
+};
+
 struct vmw_fence_action {
 	struct list_head head;
+	enum vmw_action_type type;
 	void (*seq_passed) (struct vmw_fence_action *action);
 	void (*cleanup) (struct vmw_fence_action *action);
 };
@@ -66,8 +72,7 @@ extern void vmw_fence_obj_unreference(struct vmw_fence_obj **fence_p);
 extern struct vmw_fence_obj *
 vmw_fence_obj_reference(struct vmw_fence_obj *fence);
 
-extern void vmw_fences_update(struct vmw_fence_manager *fman,
-			      u32 sequence);
+extern void vmw_fences_update(struct vmw_fence_manager *fman);
 
 extern bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence,
 				   uint32_t flags);
@@ -102,4 +107,7 @@ extern int vmw_fence_obj_signaled_ioctl(struct drm_device *dev, void *data,
 
 extern int vmw_fence_obj_unref_ioctl(struct drm_device *dev, void *data,
 				     struct drm_file *file_priv);
+extern int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
+				 struct drm_file *file_priv);
+
 #endif /* _VMWGFX_FENCE_H_ */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index 3ba9cac579e0..03bbc2a6f9a7 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -45,7 +45,11 @@ bool vmw_fifo_have_3d(struct vmw_private *dev_priv)
 	if (hwversion == 0)
 		return false;
 
-	if (hwversion < SVGA3D_HWVERSION_WS65_B1)
+	if (hwversion < SVGA3D_HWVERSION_WS8_B1)
+		return false;
+
+	/* Non-Screen Object path does not support surfaces */
+	if (!dev_priv->sou_priv)
 		return false;
 
 	return true;
@@ -243,9 +247,8 @@ static int vmw_fifo_wait(struct vmw_private *dev_priv,
 		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
 		outl(SVGA_IRQFLAG_FIFO_PROGRESS,
 		     dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
-		vmw_write(dev_priv, SVGA_REG_IRQMASK,
-			  vmw_read(dev_priv, SVGA_REG_IRQMASK) |
-			  SVGA_IRQFLAG_FIFO_PROGRESS);
+		dev_priv->irq_mask |= SVGA_IRQFLAG_FIFO_PROGRESS;
+		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
 		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
 	}
 	mutex_unlock(&dev_priv->hw_mutex);
@@ -267,9 +270,8 @@ static int vmw_fifo_wait(struct vmw_private *dev_priv,
 	mutex_lock(&dev_priv->hw_mutex);
 	if (atomic_dec_and_test(&dev_priv->fifo_queue_waiters)) {
 		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
-		vmw_write(dev_priv, SVGA_REG_IRQMASK,
-			  vmw_read(dev_priv, SVGA_REG_IRQMASK) &
-			  ~SVGA_IRQFLAG_FIFO_PROGRESS);
+		dev_priv->irq_mask &= ~SVGA_IRQFLAG_FIFO_PROGRESS;
+		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
 		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
 	}
 	mutex_unlock(&dev_priv->hw_mutex);
@@ -277,6 +279,16 @@ static int vmw_fifo_wait(struct vmw_private *dev_priv,
 	return ret;
 }
 
+/**
+ * Reserve @bytes number of bytes in the fifo.
+ *
+ * This function will return NULL (error) on two conditions:
+ *  If it timeouts waiting for fifo space, or if @bytes is larger than the
+ *   available fifo space.
+ *
+ * Returns:
+ *   Pointer to the fifo, or null on error (possible hardware hang).
+ */
 void *vmw_fifo_reserve(struct vmw_private *dev_priv, uint32_t bytes)
 {
 	struct vmw_fifo_state *fifo_state = &dev_priv->fifo;
@@ -491,3 +503,60 @@ int vmw_fifo_send_fence(struct vmw_private *dev_priv, uint32_t *seqno)
 out_err:
 	return ret;
 }
+
+/**
+ * vmw_fifo_emit_dummy_query - emits a dummy query to the fifo.
+ *
+ * @dev_priv: The device private structure.
+ * @cid: The hardware context id used for the query.
+ *
+ * This function is used to emit a dummy occlusion query with
+ * no primitives rendered between query begin and query end.
+ * It's used to provide a query barrier, in order to know that when
+ * this query is finished, all preceding queries are also finished.
+ *
+ * A Query results structure should have been initialized at the start
+ * of the dev_priv->dummy_query_bo buffer object. And that buffer object
+ * must also be either reserved or pinned when this function is called.
+ *
+ * Returns -ENOMEM on failure to reserve fifo space.
+ */
+int vmw_fifo_emit_dummy_query(struct vmw_private *dev_priv,
+			      uint32_t cid)
+{
+	/*
+	 * A query wait without a preceding query end will
+	 * actually finish all queries for this cid
+	 * without writing to the query result structure.
+	 */
+
+	struct ttm_buffer_object *bo = dev_priv->dummy_query_bo;
+	struct {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdWaitForQuery body;
+	} *cmd;
+
+	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
+
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Out of fifo space for dummy query.\n");
+		return -ENOMEM;
+	}
+
+	cmd->header.id = SVGA_3D_CMD_WAIT_FOR_QUERY;
+	cmd->header.size = sizeof(cmd->body);
+	cmd->body.cid = cid;
+	cmd->body.type = SVGA3D_QUERYTYPE_OCCLUSION;
+
+	if (bo->mem.mem_type == TTM_PL_VRAM) {
+		cmd->body.guestResult.gmrId = SVGA_GMR_FRAMEBUFFER;
+		cmd->body.guestResult.offset = bo->offset;
+	} else {
+		cmd->body.guestResult.gmrId = bo->mem.start;
+		cmd->body.guestResult.offset = 0;
+	}
+
+	vmw_fifo_commit(dev_priv, sizeof(*cmd));
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index 5ecf96660644..3f6343502d1f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -27,6 +27,7 @@
 
 #include "vmwgfx_drv.h"
 #include "vmwgfx_drm.h"
+#include "vmwgfx_kms.h"
 
 int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv)
@@ -110,3 +111,219 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
 
 	return ret;
 }
+
+int vmw_present_ioctl(struct drm_device *dev, void *data,
+		      struct drm_file *file_priv)
+{
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct drm_vmw_present_arg *arg =
+		(struct drm_vmw_present_arg *)data;
+	struct vmw_surface *surface;
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
+	struct drm_vmw_rect __user *clips_ptr;
+	struct drm_vmw_rect *clips = NULL;
+	struct drm_mode_object *obj;
+	struct vmw_framebuffer *vfb;
+	uint32_t num_clips;
+	int ret;
+
+	num_clips = arg->num_clips;
+	clips_ptr = (struct drm_vmw_rect *)(unsigned long)arg->clips_ptr;
+
+	if (unlikely(num_clips == 0))
+		return 0;
+
+	if (clips_ptr == NULL) {
+		DRM_ERROR("Variable clips_ptr must be specified.\n");
+		ret = -EINVAL;
+		goto out_clips;
+	}
+
+	clips = kzalloc(num_clips * sizeof(*clips), GFP_KERNEL);
+	if (clips == NULL) {
+		DRM_ERROR("Failed to allocate clip rect list.\n");
+		ret = -ENOMEM;
+		goto out_clips;
+	}
+
+	ret = copy_from_user(clips, clips_ptr, num_clips * sizeof(*clips));
+	if (ret) {
+		DRM_ERROR("Failed to copy clip rects from userspace.\n");
+		ret = -EFAULT;
+		goto out_no_copy;
+	}
+
+	ret = mutex_lock_interruptible(&dev->mode_config.mutex);
+	if (unlikely(ret != 0)) {
+		ret = -ERESTARTSYS;
+		goto out_no_mode_mutex;
+	}
+
+	obj = drm_mode_object_find(dev, arg->fb_id, DRM_MODE_OBJECT_FB);
+	if (!obj) {
+		DRM_ERROR("Invalid framebuffer id.\n");
+		ret = -EINVAL;
+		goto out_no_fb;
+	}
+
+	vfb = vmw_framebuffer_to_vfb(obj_to_fb(obj));
+	if (!vfb->dmabuf) {
+		DRM_ERROR("Framebuffer not dmabuf backed.\n");
+		ret = -EINVAL;
+		goto out_no_fb;
+	}
+
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		goto out_no_ttm_lock;
+
+	ret = vmw_user_surface_lookup_handle(dev_priv, tfile, arg->sid,
+					     &surface);
+	if (ret)
+		goto out_no_surface;
+
+	ret = vmw_kms_present(dev_priv, file_priv,
+			      vfb, surface, arg->sid,
+			      arg->dest_x, arg->dest_y,
+			      clips, num_clips);
+
+	/* vmw_user_surface_lookup takes one ref so does new_fb */
+	vmw_surface_unreference(&surface);
+
+out_no_surface:
+	ttm_read_unlock(&vmaster->lock);
+out_no_ttm_lock:
+out_no_fb:
+	mutex_unlock(&dev->mode_config.mutex);
+out_no_mode_mutex:
+out_no_copy:
+	kfree(clips);
+out_clips:
+	return ret;
+}
+
+int vmw_present_readback_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct drm_vmw_present_readback_arg *arg =
+		(struct drm_vmw_present_readback_arg *)data;
+	struct drm_vmw_fence_rep __user *user_fence_rep =
+		(struct drm_vmw_fence_rep __user *)
+		(unsigned long)arg->fence_rep;
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
+	struct drm_vmw_rect __user *clips_ptr;
+	struct drm_vmw_rect *clips = NULL;
+	struct drm_mode_object *obj;
+	struct vmw_framebuffer *vfb;
+	uint32_t num_clips;
+	int ret;
+
+	num_clips = arg->num_clips;
+	clips_ptr = (struct drm_vmw_rect *)(unsigned long)arg->clips_ptr;
+
+	if (unlikely(num_clips == 0))
+		return 0;
+
+	if (clips_ptr == NULL) {
+		DRM_ERROR("Argument clips_ptr must be specified.\n");
+		ret = -EINVAL;
+		goto out_clips;
+	}
+
+	clips = kzalloc(num_clips * sizeof(*clips), GFP_KERNEL);
+	if (clips == NULL) {
+		DRM_ERROR("Failed to allocate clip rect list.\n");
+		ret = -ENOMEM;
+		goto out_clips;
+	}
+
+	ret = copy_from_user(clips, clips_ptr, num_clips * sizeof(*clips));
+	if (ret) {
+		DRM_ERROR("Failed to copy clip rects from userspace.\n");
+		ret = -EFAULT;
+		goto out_no_copy;
+	}
+
+	ret = mutex_lock_interruptible(&dev->mode_config.mutex);
+	if (unlikely(ret != 0)) {
+		ret = -ERESTARTSYS;
+		goto out_no_mode_mutex;
+	}
+
+	obj = drm_mode_object_find(dev, arg->fb_id, DRM_MODE_OBJECT_FB);
+	if (!obj) {
+		DRM_ERROR("Invalid framebuffer id.\n");
+		ret = -EINVAL;
+		goto out_no_fb;
+	}
+
+	vfb = vmw_framebuffer_to_vfb(obj_to_fb(obj));
+	if (!vfb->dmabuf) {
+		DRM_ERROR("Framebuffer not dmabuf backed.\n");
+		ret = -EINVAL;
+		goto out_no_fb;
+	}
+
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		goto out_no_ttm_lock;
+
+	ret = vmw_kms_readback(dev_priv, file_priv,
+			       vfb, user_fence_rep,
+			       clips, num_clips);
+
+	ttm_read_unlock(&vmaster->lock);
+out_no_ttm_lock:
+out_no_fb:
+	mutex_unlock(&dev->mode_config.mutex);
+out_no_mode_mutex:
+out_no_copy:
+	kfree(clips);
+out_clips:
+	return ret;
+}
+
+
+/**
+ * vmw_fops_poll - wrapper around the drm_poll function
+ *
+ * @filp: See the linux fops poll documentation.
+ * @wait: See the linux fops poll documentation.
+ *
+ * Wrapper around the drm_poll function that makes sure the device is
+ * processing the fifo if drm_poll decides to wait.
+ */
+unsigned int vmw_fops_poll(struct file *filp, struct poll_table_struct *wait)
+{
+	struct drm_file *file_priv = filp->private_data;
+	struct vmw_private *dev_priv =
+		vmw_priv(file_priv->minor->dev);
+
+	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+	return drm_poll(filp, wait);
+}
+
+
+/**
+ * vmw_fops_read - wrapper around the drm_read function
+ *
+ * @filp: See the linux fops read documentation.
+ * @buffer: See the linux fops read documentation.
+ * @count: See the linux fops read documentation.
+ * offset: See the linux fops read documentation.
+ *
+ * Wrapper around the drm_read function that makes sure the device is
+ * processing the fifo if drm_read decides to wait.
+ */
+ssize_t vmw_fops_read(struct file *filp, char __user *buffer,
+		      size_t count, loff_t *offset)
+{
+	struct drm_file *file_priv = filp->private_data;
+	struct vmw_private *dev_priv =
+		vmw_priv(file_priv->minor->dev);
+
+	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+	return drm_read(filp, buffer, count, offset);
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
index a005292a8908..cabc95f7517e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
@@ -34,28 +34,30 @@ irqreturn_t vmw_irq_handler(DRM_IRQ_ARGS)
 {
 	struct drm_device *dev = (struct drm_device *)arg;
 	struct vmw_private *dev_priv = vmw_priv(dev);
-	uint32_t status;
+	uint32_t status, masked_status;
 
 	spin_lock(&dev_priv->irq_lock);
 	status = inl(dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
+	masked_status = status & dev_priv->irq_mask;
 	spin_unlock(&dev_priv->irq_lock);
 
-	if (status & SVGA_IRQFLAG_ANY_FENCE) {
-		__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
-		uint32_t seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+	if (likely(status))
+		outl(status, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
+
+	if (!masked_status)
+		return IRQ_NONE;
 
-		vmw_fences_update(dev_priv->fman, seqno);
+	if (masked_status & (SVGA_IRQFLAG_ANY_FENCE |
+			     SVGA_IRQFLAG_FENCE_GOAL)) {
+		vmw_fences_update(dev_priv->fman);
 		wake_up_all(&dev_priv->fence_queue);
 	}
-	if (status & SVGA_IRQFLAG_FIFO_PROGRESS)
+
+	if (masked_status & SVGA_IRQFLAG_FIFO_PROGRESS)
 		wake_up_all(&dev_priv->fifo_queue);
 
-	if (likely(status)) {
-		outl(status, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
-		return IRQ_HANDLED;
-	}
 
-	return IRQ_NONE;
+	return IRQ_HANDLED;
 }
 
 static bool vmw_fifo_idle(struct vmw_private *dev_priv, uint32_t seqno)
@@ -78,7 +80,7 @@ void vmw_update_seqno(struct vmw_private *dev_priv,
 	if (dev_priv->last_read_seqno != seqno) {
 		dev_priv->last_read_seqno = seqno;
 		vmw_marker_pull(&fifo_state->marker_queue, seqno);
-		vmw_fences_update(dev_priv->fman, seqno);
+		vmw_fences_update(dev_priv->fman);
 	}
 }
 
@@ -189,9 +191,8 @@ void vmw_seqno_waiter_add(struct vmw_private *dev_priv)
 		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
 		outl(SVGA_IRQFLAG_ANY_FENCE,
 		     dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
-		vmw_write(dev_priv, SVGA_REG_IRQMASK,
-			  vmw_read(dev_priv, SVGA_REG_IRQMASK) |
-			  SVGA_IRQFLAG_ANY_FENCE);
+		dev_priv->irq_mask |= SVGA_IRQFLAG_ANY_FENCE;
+		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
 		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
 	}
 	mutex_unlock(&dev_priv->hw_mutex);
@@ -204,9 +205,39 @@ void vmw_seqno_waiter_remove(struct vmw_private *dev_priv)
 		unsigned long irq_flags;
 
 		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
-		vmw_write(dev_priv, SVGA_REG_IRQMASK,
-			  vmw_read(dev_priv, SVGA_REG_IRQMASK) &
-			  ~SVGA_IRQFLAG_ANY_FENCE);
+		dev_priv->irq_mask &= ~SVGA_IRQFLAG_ANY_FENCE;
+		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
+		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
+	}
+	mutex_unlock(&dev_priv->hw_mutex);
+}
+
+
+void vmw_goal_waiter_add(struct vmw_private *dev_priv)
+{
+	mutex_lock(&dev_priv->hw_mutex);
+	if (dev_priv->goal_queue_waiters++ == 0) {
+		unsigned long irq_flags;
+
+		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
+		outl(SVGA_IRQFLAG_FENCE_GOAL,
+		     dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
+		dev_priv->irq_mask |= SVGA_IRQFLAG_FENCE_GOAL;
+		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
+		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
+	}
+	mutex_unlock(&dev_priv->hw_mutex);
+}
+
+void vmw_goal_waiter_remove(struct vmw_private *dev_priv)
+{
+	mutex_lock(&dev_priv->hw_mutex);
+	if (--dev_priv->goal_queue_waiters == 0) {
+		unsigned long irq_flags;
+
+		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
+		dev_priv->irq_mask &= ~SVGA_IRQFLAG_FENCE_GOAL;
+		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
 		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
 	}
 	mutex_unlock(&dev_priv->hw_mutex);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 1a4c84cecca7..8b14dfd513a1 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -27,12 +27,10 @@
 
 #include "vmwgfx_kms.h"
 
+
 /* Might need a hrtimer here? */
 #define VMWGFX_PRESENT_RATE ((HZ / 60 > 0) ? HZ / 60 : 1)
 
-static int vmw_surface_dmabuf_pin(struct vmw_framebuffer *vfb);
-static int vmw_surface_dmabuf_unpin(struct vmw_framebuffer *vfb);
-
 void vmw_display_unit_cleanup(struct vmw_display_unit *du)
 {
 	if (du->cursor_surface)
@@ -329,41 +327,10 @@ struct vmw_framebuffer_surface {
 	struct vmw_framebuffer base;
 	struct vmw_surface *surface;
 	struct vmw_dma_buffer *buffer;
-	struct delayed_work d_work;
-	struct mutex work_lock;
-	bool present_fs;
 	struct list_head head;
 	struct drm_master *master;
 };
 
-/**
- * vmw_kms_idle_workqueues - Flush workqueues on this master
- *
- * @vmaster - Pointer identifying the master, for the surfaces of which
- * we idle the dirty work queues.
- *
- * This function should be called with the ttm lock held in exclusive mode
- * to idle all dirty work queues before the fifo is taken down.
- *
- * The work task may actually requeue itself, but after the flush returns we're
- * sure that there's nothing to present, since the ttm lock is held in
- * exclusive mode, so the fifo will never get used.
- */
-
-void vmw_kms_idle_workqueues(struct vmw_master *vmaster)
-{
-	struct vmw_framebuffer_surface *entry;
-
-	mutex_lock(&vmaster->fb_surf_mutex);
-	list_for_each_entry(entry, &vmaster->fb_surf, head) {
-		if (cancel_delayed_work_sync(&entry->d_work))
-			(void) entry->d_work.work.func(&entry->d_work.work);
-
-		(void) cancel_delayed_work_sync(&entry->d_work);
-	}
-	mutex_unlock(&vmaster->fb_surf_mutex);
-}
-
 void vmw_framebuffer_surface_destroy(struct drm_framebuffer *framebuffer)
 {
 	struct vmw_framebuffer_surface *vfbs =
@@ -375,64 +342,127 @@ void vmw_framebuffer_surface_destroy(struct drm_framebuffer *framebuffer)
 	list_del(&vfbs->head);
 	mutex_unlock(&vmaster->fb_surf_mutex);
 
-	cancel_delayed_work_sync(&vfbs->d_work);
 	drm_master_put(&vfbs->master);
 	drm_framebuffer_cleanup(framebuffer);
 	vmw_surface_unreference(&vfbs->surface);
+	ttm_base_object_unref(&vfbs->base.user_obj);
 
 	kfree(vfbs);
 }
 
-static void vmw_framebuffer_present_fs_callback(struct work_struct *work)
+static int do_surface_dirty_sou(struct vmw_private *dev_priv,
+				struct drm_file *file_priv,
+				struct vmw_framebuffer *framebuffer,
+				unsigned flags, unsigned color,
+				struct drm_clip_rect *clips,
+				unsigned num_clips, int inc)
 {
-	struct delayed_work *d_work =
-		container_of(work, struct delayed_work, work);
-	struct vmw_framebuffer_surface *vfbs =
-		container_of(d_work, struct vmw_framebuffer_surface, d_work);
-	struct vmw_surface *surf = vfbs->surface;
-	struct drm_framebuffer *framebuffer = &vfbs->base.base;
-	struct vmw_private *dev_priv = vmw_priv(framebuffer->dev);
+	struct drm_clip_rect *clips_ptr;
+	struct vmw_display_unit *units[VMWGFX_NUM_DISPLAY_UNITS];
+	struct drm_crtc *crtc;
+	size_t fifo_size;
+	int i, num_units;
+	int ret = 0; /* silence warning */
+	int left, right, top, bottom;
 
 	struct {
 		SVGA3dCmdHeader header;
-		SVGA3dCmdPresent body;
-		SVGA3dCopyRect cr;
+		SVGA3dCmdBlitSurfaceToScreen body;
 	} *cmd;
+	SVGASignedRect *blits;
+
+
+	num_units = 0;
+	list_for_each_entry(crtc, &dev_priv->dev->mode_config.crtc_list,
+			    head) {
+		if (crtc->fb != &framebuffer->base)
+			continue;
+		units[num_units++] = vmw_crtc_to_du(crtc);
+	}
+
+	BUG_ON(!clips || !num_clips);
+
+	fifo_size = sizeof(*cmd) + sizeof(SVGASignedRect) * num_clips;
+	cmd = kzalloc(fifo_size, GFP_KERNEL);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Temporary fifo memory alloc failed.\n");
+		return -ENOMEM;
+	}
+
+	left = clips->x1;
+	right = clips->x2;
+	top = clips->y1;
+	bottom = clips->y2;
+
+	clips_ptr = clips;
+	for (i = 1; i < num_clips; i++, clips_ptr += inc) {
+		left = min_t(int, left, (int)clips_ptr->x1);
+		right = max_t(int, right, (int)clips_ptr->x2);
+		top = min_t(int, top, (int)clips_ptr->y1);
+		bottom = max_t(int, bottom, (int)clips_ptr->y2);
+	}
+
+	/* only need to do this once */
+	memset(cmd, 0, fifo_size);
+	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN);
+	cmd->header.size = cpu_to_le32(fifo_size - sizeof(cmd->header));
+
+	cmd->body.srcRect.left = left;
+	cmd->body.srcRect.right = right;
+	cmd->body.srcRect.top = top;
+	cmd->body.srcRect.bottom = bottom;
+
+	clips_ptr = clips;
+	blits = (SVGASignedRect *)&cmd[1];
+	for (i = 0; i < num_clips; i++, clips_ptr += inc) {
+		blits[i].left   = clips_ptr->x1 - left;
+		blits[i].right  = clips_ptr->x2 - left;
+		blits[i].top    = clips_ptr->y1 - top;
+		blits[i].bottom = clips_ptr->y2 - top;
+	}
+
+	/* do per unit writing, reuse fifo for each */
+	for (i = 0; i < num_units; i++) {
+		struct vmw_display_unit *unit = units[i];
+		int clip_x1 = left - unit->crtc.x;
+		int clip_y1 = top - unit->crtc.y;
+		int clip_x2 = right - unit->crtc.x;
+		int clip_y2 = bottom - unit->crtc.y;
+
+		/* skip any crtcs that misses the clip region */
+		if (clip_x1 >= unit->crtc.mode.hdisplay ||
+		    clip_y1 >= unit->crtc.mode.vdisplay ||
+		    clip_x2 <= 0 || clip_y2 <= 0)
+			continue;
+
+		/* need to reset sid as it is changed by execbuf */
+		cmd->body.srcImage.sid = cpu_to_le32(framebuffer->user_handle);
+
+		cmd->body.destScreenId = unit->unit;
+
+		/*
+		 * The blit command is a lot more resilient then the
+		 * readback command when it comes to clip rects. So its
+		 * okay to go out of bounds.
+		 */
+
+		cmd->body.destRect.left = clip_x1;
+		cmd->body.destRect.right = clip_x2;
+		cmd->body.destRect.top = clip_y1;
+		cmd->body.destRect.bottom = clip_y2;
 
-	/**
-	 * Strictly we should take the ttm_lock in read mode before accessing
-	 * the fifo, to make sure the fifo is present and up. However,
-	 * instead we flush all workqueues under the ttm lock in exclusive mode
-	 * before taking down the fifo.
-	 */
-	mutex_lock(&vfbs->work_lock);
-	if (!vfbs->present_fs)
-		goto out_unlock;
-
-	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
-	if (unlikely(cmd == NULL))
-		goto out_resched;
-
-	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_PRESENT);
-	cmd->header.size = cpu_to_le32(sizeof(cmd->body) + sizeof(cmd->cr));
-	cmd->body.sid = cpu_to_le32(surf->res.id);
-	cmd->cr.x = cpu_to_le32(0);
-	cmd->cr.y = cpu_to_le32(0);
-	cmd->cr.srcx = cmd->cr.x;
-	cmd->cr.srcy = cmd->cr.y;
-	cmd->cr.w = cpu_to_le32(framebuffer->width);
-	cmd->cr.h = cpu_to_le32(framebuffer->height);
-	vfbs->present_fs = false;
-	vmw_fifo_commit(dev_priv, sizeof(*cmd));
-out_resched:
-	/**
-	 * Will not re-add if already pending.
-	 */
-	schedule_delayed_work(&vfbs->d_work, VMWGFX_PRESENT_RATE);
-out_unlock:
-	mutex_unlock(&vfbs->work_lock);
-}
 
+		ret = vmw_execbuf_process(file_priv, dev_priv, NULL, cmd,
+					  fifo_size, 0, NULL);
+
+		if (unlikely(ret != 0))
+			break;
+	}
+
+	kfree(cmd);
+
+	return ret;
+}
 
 int vmw_framebuffer_surface_dirty(struct drm_framebuffer *framebuffer,
 				  struct drm_file *file_priv,
@@ -444,44 +474,20 @@ int vmw_framebuffer_surface_dirty(struct drm_framebuffer *framebuffer,
 	struct vmw_master *vmaster = vmw_master(file_priv->master);
 	struct vmw_framebuffer_surface *vfbs =
 		vmw_framebuffer_to_vfbs(framebuffer);
-	struct vmw_surface *surf = vfbs->surface;
 	struct drm_clip_rect norect;
-	SVGA3dCopyRect *cr;
-	int i, inc = 1;
-	int ret;
-
-	struct {
-		SVGA3dCmdHeader header;
-		SVGA3dCmdPresent body;
-		SVGA3dCopyRect cr;
-	} *cmd;
+	int ret, inc = 1;
 
 	if (unlikely(vfbs->master != file_priv->master))
 		return -EINVAL;
 
+	/* Require ScreenObject support for 3D */
+	if (!dev_priv->sou_priv)
+		return -EINVAL;
+
 	ret = ttm_read_lock(&vmaster->lock, true);
 	if (unlikely(ret != 0))
 		return ret;
 
-	if (!num_clips ||
-	    !(dev_priv->fifo.capabilities &
-	      SVGA_FIFO_CAP_SCREEN_OBJECT)) {
-		int ret;
-
-		mutex_lock(&vfbs->work_lock);
-		vfbs->present_fs = true;
-		ret = schedule_delayed_work(&vfbs->d_work, VMWGFX_PRESENT_RATE);
-		mutex_unlock(&vfbs->work_lock);
-		if (ret) {
-			/**
-			 * No work pending, Force immediate present.
-			 */
-			vmw_framebuffer_present_fs_callback(&vfbs->d_work.work);
-		}
-		ttm_read_unlock(&vmaster->lock);
-		return 0;
-	}
-
 	if (!num_clips) {
 		num_clips = 1;
 		clips = &norect;
@@ -493,29 +499,10 @@ int vmw_framebuffer_surface_dirty(struct drm_framebuffer *framebuffer,
 		inc = 2; /* skip source rects */
 	}
 
-	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd) + (num_clips - 1) * sizeof(cmd->cr));
-	if (unlikely(cmd == NULL)) {
-		DRM_ERROR("Fifo reserve failed.\n");
-		ttm_read_unlock(&vmaster->lock);
-		return -ENOMEM;
-	}
-
-	memset(cmd, 0, sizeof(*cmd));
-
-	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_PRESENT);
-	cmd->header.size = cpu_to_le32(sizeof(cmd->body) + num_clips * sizeof(cmd->cr));
-	cmd->body.sid = cpu_to_le32(surf->res.id);
-
-	for (i = 0, cr = &cmd->cr; i < num_clips; i++, cr++, clips += inc) {
-		cr->x = cpu_to_le16(clips->x1);
-		cr->y = cpu_to_le16(clips->y1);
-		cr->srcx = cr->x;
-		cr->srcy = cr->y;
-		cr->w = cpu_to_le16(clips->x2 - clips->x1);
-		cr->h = cpu_to_le16(clips->y2 - clips->y1);
-	}
+	ret = do_surface_dirty_sou(dev_priv, file_priv, &vfbs->base,
+				   flags, color,
+				   clips, num_clips, inc);
 
-	vmw_fifo_commit(dev_priv, sizeof(*cmd) + (num_clips - 1) * sizeof(cmd->cr));
 	ttm_read_unlock(&vmaster->lock);
 	return 0;
 }
@@ -540,6 +527,10 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv,
 	struct vmw_master *vmaster = vmw_master(file_priv->master);
 	int ret;
 
+	/* 3D is only supported on HWv8 hosts which supports screen objects */
+	if (!dev_priv->sou_priv)
+		return -ENOSYS;
+
 	/*
 	 * Sanity checks.
 	 */
@@ -602,14 +593,11 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv,
 	vfbs->base.base.depth = mode_cmd->depth;
 	vfbs->base.base.width = mode_cmd->width;
 	vfbs->base.base.height = mode_cmd->height;
-	vfbs->base.pin = &vmw_surface_dmabuf_pin;
-	vfbs->base.unpin = &vmw_surface_dmabuf_unpin;
 	vfbs->surface = surface;
+	vfbs->base.user_handle = mode_cmd->handle;
 	vfbs->master = drm_master_get(file_priv->master);
-	mutex_init(&vfbs->work_lock);
 
 	mutex_lock(&vmaster->fb_surf_mutex);
-	INIT_DELAYED_WORK(&vfbs->d_work, &vmw_framebuffer_present_fs_callback);
 	list_add_tail(&vfbs->head, &vmaster->fb_surf);
 	mutex_unlock(&vmaster->fb_surf_mutex);
 
@@ -644,48 +632,33 @@ void vmw_framebuffer_dmabuf_destroy(struct drm_framebuffer *framebuffer)
 
 	drm_framebuffer_cleanup(framebuffer);
 	vmw_dmabuf_unreference(&vfbd->buffer);
+	ttm_base_object_unref(&vfbd->base.user_obj);
 
 	kfree(vfbd);
 }
 
-int vmw_framebuffer_dmabuf_dirty(struct drm_framebuffer *framebuffer,
-				 struct drm_file *file_priv,
-				 unsigned flags, unsigned color,
-				 struct drm_clip_rect *clips,
-				 unsigned num_clips)
+static int do_dmabuf_dirty_ldu(struct vmw_private *dev_priv,
+			       struct vmw_framebuffer *framebuffer,
+			       unsigned flags, unsigned color,
+			       struct drm_clip_rect *clips,
+			       unsigned num_clips, int increment)
 {
-	struct vmw_private *dev_priv = vmw_priv(framebuffer->dev);
-	struct vmw_master *vmaster = vmw_master(file_priv->master);
-	struct drm_clip_rect norect;
-	int ret;
+	size_t fifo_size;
+	int i;
+
 	struct {
 		uint32_t header;
 		SVGAFifoCmdUpdate body;
 	} *cmd;
-	int i, increment = 1;
-
-	ret = ttm_read_lock(&vmaster->lock, true);
-	if (unlikely(ret != 0))
-		return ret;
 
-	if (!num_clips) {
-		num_clips = 1;
-		clips = &norect;
-		norect.x1 = norect.y1 = 0;
-		norect.x2 = framebuffer->width;
-		norect.y2 = framebuffer->height;
-	} else if (flags & DRM_MODE_FB_DIRTY_ANNOTATE_COPY) {
-		num_clips /= 2;
-		increment = 2;
-	}
-
-	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd) * num_clips);
+	fifo_size = sizeof(*cmd) * num_clips;
+	cmd = vmw_fifo_reserve(dev_priv, fifo_size);
 	if (unlikely(cmd == NULL)) {
 		DRM_ERROR("Fifo reserve failed.\n");
-		ttm_read_unlock(&vmaster->lock);
 		return -ENOMEM;
 	}
 
+	memset(cmd, 0, fifo_size);
 	for (i = 0; i < num_clips; i++, clips += increment) {
 		cmd[i].header = cpu_to_le32(SVGA_CMD_UPDATE);
 		cmd[i].body.x = cpu_to_le32(clips->x1);
@@ -694,57 +667,186 @@ int vmw_framebuffer_dmabuf_dirty(struct drm_framebuffer *framebuffer,
 		cmd[i].body.height = cpu_to_le32(clips->y2 - clips->y1);
 	}
 
-	vmw_fifo_commit(dev_priv, sizeof(*cmd) * num_clips);
-	ttm_read_unlock(&vmaster->lock);
-
+	vmw_fifo_commit(dev_priv, fifo_size);
 	return 0;
 }
 
-static struct drm_framebuffer_funcs vmw_framebuffer_dmabuf_funcs = {
-	.destroy = vmw_framebuffer_dmabuf_destroy,
-	.dirty = vmw_framebuffer_dmabuf_dirty,
-	.create_handle = vmw_framebuffer_create_handle,
-};
-
-static int vmw_surface_dmabuf_pin(struct vmw_framebuffer *vfb)
+static int do_dmabuf_define_gmrfb(struct drm_file *file_priv,
+				  struct vmw_private *dev_priv,
+				  struct vmw_framebuffer *framebuffer)
 {
-	struct vmw_private *dev_priv = vmw_priv(vfb->base.dev);
-	struct vmw_framebuffer_surface *vfbs =
-		vmw_framebuffer_to_vfbs(&vfb->base);
-	unsigned long size = vfbs->base.base.pitch * vfbs->base.base.height;
+	int depth = framebuffer->base.depth;
+	size_t fifo_size;
 	int ret;
 
-	vfbs->buffer = kzalloc(sizeof(*vfbs->buffer), GFP_KERNEL);
-	if (unlikely(vfbs->buffer == NULL))
+	struct {
+		uint32_t header;
+		SVGAFifoCmdDefineGMRFB body;
+	} *cmd;
+
+	/* Emulate RGBA support, contrary to svga_reg.h this is not
+	 * supported by hosts. This is only a problem if we are reading
+	 * this value later and expecting what we uploaded back.
+	 */
+	if (depth == 32)
+		depth = 24;
+
+	fifo_size = sizeof(*cmd);
+	cmd = kmalloc(fifo_size, GFP_KERNEL);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Failed to allocate temporary cmd buffer.\n");
 		return -ENOMEM;
+	}
 
-	vmw_overlay_pause_all(dev_priv);
-	ret = vmw_dmabuf_init(dev_priv, vfbs->buffer, size,
-			       &vmw_vram_ne_placement,
-			       false, &vmw_dmabuf_bo_free);
-	vmw_overlay_resume_all(dev_priv);
+	memset(cmd, 0, fifo_size);
+	cmd->header = SVGA_CMD_DEFINE_GMRFB;
+	cmd->body.format.bitsPerPixel = framebuffer->base.bits_per_pixel;
+	cmd->body.format.colorDepth = depth;
+	cmd->body.format.reserved = 0;
+	cmd->body.bytesPerLine = framebuffer->base.pitch;
+	cmd->body.ptr.gmrId = framebuffer->user_handle;
+	cmd->body.ptr.offset = 0;
+
+	ret = vmw_execbuf_process(file_priv, dev_priv, NULL, cmd,
+				  fifo_size, 0, NULL);
+
+	kfree(cmd);
+
+	return ret;
+}
+
+static int do_dmabuf_dirty_sou(struct drm_file *file_priv,
+			       struct vmw_private *dev_priv,
+			       struct vmw_framebuffer *framebuffer,
+			       unsigned flags, unsigned color,
+			       struct drm_clip_rect *clips,
+			       unsigned num_clips, int increment)
+{
+	struct vmw_display_unit *units[VMWGFX_NUM_DISPLAY_UNITS];
+	struct drm_clip_rect *clips_ptr;
+	int i, k, num_units, ret;
+	struct drm_crtc *crtc;
+	size_t fifo_size;
+
+	struct {
+		uint32_t header;
+		SVGAFifoCmdBlitGMRFBToScreen body;
+	} *blits;
+
+	ret = do_dmabuf_define_gmrfb(file_priv, dev_priv, framebuffer);
 	if (unlikely(ret != 0))
-		vfbs->buffer = NULL;
+		return ret; /* define_gmrfb prints warnings */
+
+	fifo_size = sizeof(*blits) * num_clips;
+	blits = kmalloc(fifo_size, GFP_KERNEL);
+	if (unlikely(blits == NULL)) {
+		DRM_ERROR("Failed to allocate temporary cmd buffer.\n");
+		return -ENOMEM;
+	}
+
+	num_units = 0;
+	list_for_each_entry(crtc, &dev_priv->dev->mode_config.crtc_list, head) {
+		if (crtc->fb != &framebuffer->base)
+			continue;
+		units[num_units++] = vmw_crtc_to_du(crtc);
+	}
+
+	for (k = 0; k < num_units; k++) {
+		struct vmw_display_unit *unit = units[k];
+		int hit_num = 0;
+
+		clips_ptr = clips;
+		for (i = 0; i < num_clips; i++, clips_ptr += increment) {
+			int clip_x1 = clips_ptr->x1 - unit->crtc.x;
+			int clip_y1 = clips_ptr->y1 - unit->crtc.y;
+			int clip_x2 = clips_ptr->x2 - unit->crtc.x;
+			int clip_y2 = clips_ptr->y2 - unit->crtc.y;
+
+			/* skip any crtcs that misses the clip region */
+			if (clip_x1 >= unit->crtc.mode.hdisplay ||
+			    clip_y1 >= unit->crtc.mode.vdisplay ||
+			    clip_x2 <= 0 || clip_y2 <= 0)
+				continue;
+
+			blits[hit_num].header = SVGA_CMD_BLIT_GMRFB_TO_SCREEN;
+			blits[hit_num].body.destScreenId = unit->unit;
+			blits[hit_num].body.srcOrigin.x = clips_ptr->x1;
+			blits[hit_num].body.srcOrigin.y = clips_ptr->y1;
+			blits[hit_num].body.destRect.left = clip_x1;
+			blits[hit_num].body.destRect.top = clip_y1;
+			blits[hit_num].body.destRect.right = clip_x2;
+			blits[hit_num].body.destRect.bottom = clip_y2;
+			hit_num++;
+		}
+
+		/* no clips hit the crtc */
+		if (hit_num == 0)
+			continue;
+
+		fifo_size = sizeof(*blits) * hit_num;
+		ret = vmw_execbuf_process(file_priv, dev_priv, NULL, blits,
+					  fifo_size, 0, NULL);
+
+		if (unlikely(ret != 0))
+			break;
+	}
+
+	kfree(blits);
 
 	return ret;
 }
 
-static int vmw_surface_dmabuf_unpin(struct vmw_framebuffer *vfb)
+int vmw_framebuffer_dmabuf_dirty(struct drm_framebuffer *framebuffer,
+				 struct drm_file *file_priv,
+				 unsigned flags, unsigned color,
+				 struct drm_clip_rect *clips,
+				 unsigned num_clips)
 {
-	struct ttm_buffer_object *bo;
-	struct vmw_framebuffer_surface *vfbs =
-		vmw_framebuffer_to_vfbs(&vfb->base);
+	struct vmw_private *dev_priv = vmw_priv(framebuffer->dev);
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
+	struct vmw_framebuffer_dmabuf *vfbd =
+		vmw_framebuffer_to_vfbd(framebuffer);
+	struct drm_clip_rect norect;
+	int ret, increment = 1;
 
-	if (unlikely(vfbs->buffer == NULL))
-		return 0;
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		return ret;
 
-	bo = &vfbs->buffer->base;
-	ttm_bo_unref(&bo);
-	vfbs->buffer = NULL;
+	if (!num_clips) {
+		num_clips = 1;
+		clips = &norect;
+		norect.x1 = norect.y1 = 0;
+		norect.x2 = framebuffer->width;
+		norect.y2 = framebuffer->height;
+	} else if (flags & DRM_MODE_FB_DIRTY_ANNOTATE_COPY) {
+		num_clips /= 2;
+		increment = 2;
+	}
 
-	return 0;
+	if (dev_priv->ldu_priv) {
+		ret = do_dmabuf_dirty_ldu(dev_priv, &vfbd->base,
+					  flags, color,
+					  clips, num_clips, increment);
+	} else {
+		ret = do_dmabuf_dirty_sou(file_priv, dev_priv, &vfbd->base,
+					  flags, color,
+					  clips, num_clips, increment);
+	}
+
+	ttm_read_unlock(&vmaster->lock);
+	return ret;
 }
 
+static struct drm_framebuffer_funcs vmw_framebuffer_dmabuf_funcs = {
+	.destroy = vmw_framebuffer_dmabuf_destroy,
+	.dirty = vmw_framebuffer_dmabuf_dirty,
+	.create_handle = vmw_framebuffer_create_handle,
+};
+
+/**
+ * Pin the dmabuffer to the start of vram.
+ */
 static int vmw_framebuffer_dmabuf_pin(struct vmw_framebuffer *vfb)
 {
 	struct vmw_private *dev_priv = vmw_priv(vfb->base.dev);
@@ -752,10 +854,12 @@ static int vmw_framebuffer_dmabuf_pin(struct vmw_framebuffer *vfb)
 		vmw_framebuffer_to_vfbd(&vfb->base);
 	int ret;
 
+	/* This code should not be used with screen objects */
+	BUG_ON(dev_priv->sou_priv);
 
 	vmw_overlay_pause_all(dev_priv);
 
-	ret = vmw_dmabuf_to_start_of_vram(dev_priv, vfbd->buffer);
+	ret = vmw_dmabuf_to_start_of_vram(dev_priv, vfbd->buffer, true, false);
 
 	vmw_overlay_resume_all(dev_priv);
 
@@ -775,7 +879,7 @@ static int vmw_framebuffer_dmabuf_unpin(struct vmw_framebuffer *vfb)
 		return 0;
 	}
 
-	return vmw_dmabuf_from_vram(dev_priv, vfbd->buffer);
+	return vmw_dmabuf_unpin(dev_priv, vfbd->buffer, false);
 }
 
 static int vmw_kms_new_framebuffer_dmabuf(struct vmw_private *dev_priv,
@@ -797,6 +901,33 @@ static int vmw_kms_new_framebuffer_dmabuf(struct vmw_private *dev_priv,
 		return -EINVAL;
 	}
 
+	/* Limited framebuffer color depth support for screen objects */
+	if (dev_priv->sou_priv) {
+		switch (mode_cmd->depth) {
+		case 32:
+		case 24:
+			/* Only support 32 bpp for 32 and 24 depth fbs */
+			if (mode_cmd->bpp == 32)
+				break;
+
+			DRM_ERROR("Invalid color depth/bbp: %d %d\n",
+				  mode_cmd->depth, mode_cmd->bpp);
+			return -EINVAL;
+		case 16:
+		case 15:
+			/* Only support 16 bpp for 16 and 15 depth fbs */
+			if (mode_cmd->bpp == 16)
+				break;
+
+			DRM_ERROR("Invalid color depth/bbp: %d %d\n",
+				  mode_cmd->depth, mode_cmd->bpp);
+			return -EINVAL;
+		default:
+			DRM_ERROR("Invalid color depth: %d\n", mode_cmd->depth);
+			return -EINVAL;
+		}
+	}
+
 	vfbd = kzalloc(sizeof(*vfbd), GFP_KERNEL);
 	if (!vfbd) {
 		ret = -ENOMEM;
@@ -818,9 +949,13 @@ static int vmw_kms_new_framebuffer_dmabuf(struct vmw_private *dev_priv,
 	vfbd->base.base.depth = mode_cmd->depth;
 	vfbd->base.base.width = mode_cmd->width;
 	vfbd->base.base.height = mode_cmd->height;
-	vfbd->base.pin = vmw_framebuffer_dmabuf_pin;
-	vfbd->base.unpin = vmw_framebuffer_dmabuf_unpin;
+	if (!dev_priv->sou_priv) {
+		vfbd->base.pin = vmw_framebuffer_dmabuf_pin;
+		vfbd->base.unpin = vmw_framebuffer_dmabuf_unpin;
+	}
+	vfbd->base.dmabuf = true;
 	vfbd->buffer = dmabuf;
+	vfbd->base.user_handle = mode_cmd->handle;
 	*out = &vfbd->base;
 
 	return 0;
@@ -846,6 +981,7 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
 	struct vmw_framebuffer *vfb = NULL;
 	struct vmw_surface *surface = NULL;
 	struct vmw_dma_buffer *bo = NULL;
+	struct ttm_base_object *user_obj;
 	u64 required_size;
 	int ret;
 
@@ -861,6 +997,21 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
 		return NULL;
 	}
 
+	/*
+	 * Take a reference on the user object of the resource
+	 * backing the kms fb. This ensures that user-space handle
+	 * lookups on that resource will always work as long as
+	 * it's registered with a kms framebuffer. This is important,
+	 * since vmw_execbuf_process identifies resources in the
+	 * command stream using user-space handles.
+	 */
+
+	user_obj = ttm_base_object_lookup(tfile, mode_cmd->handle);
+	if (unlikely(user_obj == NULL)) {
+		DRM_ERROR("Could not locate requested kms frame buffer.\n");
+		return ERR_PTR(-ENOENT);
+	}
+
 	/**
 	 * End conditioned code.
 	 */
@@ -881,8 +1032,10 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
 
 	if (ret) {
 		DRM_ERROR("failed to create vmw_framebuffer: %i\n", ret);
+		ttm_base_object_unref(&user_obj);
 		return ERR_PTR(ret);
-	}
+	} else
+		vfb->user_obj = user_obj;
 	return &vfb->base;
 
 try_dmabuf:
@@ -902,8 +1055,10 @@ try_dmabuf:
 
 	if (ret) {
 		DRM_ERROR("failed to create vmw_framebuffer: %i\n", ret);
+		ttm_base_object_unref(&user_obj);
 		return ERR_PTR(ret);
-	}
+	} else
+		vfb->user_obj = user_obj;
 
 	return &vfb->base;
 
@@ -911,6 +1066,7 @@ err_not_scanout:
 	DRM_ERROR("surface not marked as scanout\n");
 	/* vmw_user_surface_lookup takes one ref */
 	vmw_surface_unreference(&surface);
+	ttm_base_object_unref(&user_obj);
 
 	return ERR_PTR(-EINVAL);
 }
@@ -919,6 +1075,210 @@ static struct drm_mode_config_funcs vmw_kms_funcs = {
 	.fb_create = vmw_kms_fb_create,
 };
 
+int vmw_kms_present(struct vmw_private *dev_priv,
+		    struct drm_file *file_priv,
+		    struct vmw_framebuffer *vfb,
+		    struct vmw_surface *surface,
+		    uint32_t sid,
+		    int32_t destX, int32_t destY,
+		    struct drm_vmw_rect *clips,
+		    uint32_t num_clips)
+{
+	struct vmw_display_unit *units[VMWGFX_NUM_DISPLAY_UNITS];
+	struct drm_crtc *crtc;
+	size_t fifo_size;
+	int i, k, num_units;
+	int ret = 0; /* silence warning */
+
+	struct {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdBlitSurfaceToScreen body;
+	} *cmd;
+	SVGASignedRect *blits;
+
+	num_units = 0;
+	list_for_each_entry(crtc, &dev_priv->dev->mode_config.crtc_list, head) {
+		if (crtc->fb != &vfb->base)
+			continue;
+		units[num_units++] = vmw_crtc_to_du(crtc);
+	}
+
+	BUG_ON(surface == NULL);
+	BUG_ON(!clips || !num_clips);
+
+	fifo_size = sizeof(*cmd) + sizeof(SVGASignedRect) * num_clips;
+	cmd = kmalloc(fifo_size, GFP_KERNEL);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Failed to allocate temporary fifo memory.\n");
+		return -ENOMEM;
+	}
+
+	/* only need to do this once */
+	memset(cmd, 0, fifo_size);
+	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN);
+	cmd->header.size = cpu_to_le32(fifo_size - sizeof(cmd->header));
+
+	cmd->body.srcRect.left = 0;
+	cmd->body.srcRect.right = surface->sizes[0].width;
+	cmd->body.srcRect.top = 0;
+	cmd->body.srcRect.bottom = surface->sizes[0].height;
+
+	blits = (SVGASignedRect *)&cmd[1];
+	for (i = 0; i < num_clips; i++) {
+		blits[i].left   = clips[i].x;
+		blits[i].right  = clips[i].x + clips[i].w;
+		blits[i].top    = clips[i].y;
+		blits[i].bottom = clips[i].y + clips[i].h;
+	}
+
+	for (k = 0; k < num_units; k++) {
+		struct vmw_display_unit *unit = units[k];
+		int clip_x1 = destX - unit->crtc.x;
+		int clip_y1 = destY - unit->crtc.y;
+		int clip_x2 = clip_x1 + surface->sizes[0].width;
+		int clip_y2 = clip_y1 + surface->sizes[0].height;
+
+		/* skip any crtcs that misses the clip region */
+		if (clip_x1 >= unit->crtc.mode.hdisplay ||
+		    clip_y1 >= unit->crtc.mode.vdisplay ||
+		    clip_x2 <= 0 || clip_y2 <= 0)
+			continue;
+
+		/* need to reset sid as it is changed by execbuf */
+		cmd->body.srcImage.sid = sid;
+
+		cmd->body.destScreenId = unit->unit;
+
+		/*
+		 * The blit command is a lot more resilient then the
+		 * readback command when it comes to clip rects. So its
+		 * okay to go out of bounds.
+		 */
+
+		cmd->body.destRect.left = clip_x1;
+		cmd->body.destRect.right = clip_x2;
+		cmd->body.destRect.top = clip_y1;
+		cmd->body.destRect.bottom = clip_y2;
+
+		ret = vmw_execbuf_process(file_priv, dev_priv, NULL, cmd,
+					  fifo_size, 0, NULL);
+
+		if (unlikely(ret != 0))
+			break;
+	}
+
+	kfree(cmd);
+
+	return ret;
+}
+
+int vmw_kms_readback(struct vmw_private *dev_priv,
+		     struct drm_file *file_priv,
+		     struct vmw_framebuffer *vfb,
+		     struct drm_vmw_fence_rep __user *user_fence_rep,
+		     struct drm_vmw_rect *clips,
+		     uint32_t num_clips)
+{
+	struct vmw_framebuffer_dmabuf *vfbd =
+		vmw_framebuffer_to_vfbd(&vfb->base);
+	struct vmw_dma_buffer *dmabuf = vfbd->buffer;
+	struct vmw_display_unit *units[VMWGFX_NUM_DISPLAY_UNITS];
+	struct drm_crtc *crtc;
+	size_t fifo_size;
+	int i, k, ret, num_units, blits_pos;
+
+	struct {
+		uint32_t header;
+		SVGAFifoCmdDefineGMRFB body;
+	} *cmd;
+	struct {
+		uint32_t header;
+		SVGAFifoCmdBlitScreenToGMRFB body;
+	} *blits;
+
+	num_units = 0;
+	list_for_each_entry(crtc, &dev_priv->dev->mode_config.crtc_list, head) {
+		if (crtc->fb != &vfb->base)
+			continue;
+		units[num_units++] = vmw_crtc_to_du(crtc);
+	}
+
+	BUG_ON(dmabuf == NULL);
+	BUG_ON(!clips || !num_clips);
+
+	/* take a safe guess at fifo size */
+	fifo_size = sizeof(*cmd) + sizeof(*blits) * num_clips * num_units;
+	cmd = kmalloc(fifo_size, GFP_KERNEL);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Failed to allocate temporary fifo memory.\n");
+		return -ENOMEM;
+	}
+
+	memset(cmd, 0, fifo_size);
+	cmd->header = SVGA_CMD_DEFINE_GMRFB;
+	cmd->body.format.bitsPerPixel = vfb->base.bits_per_pixel;
+	cmd->body.format.colorDepth = vfb->base.depth;
+	cmd->body.format.reserved = 0;
+	cmd->body.bytesPerLine = vfb->base.pitch;
+	cmd->body.ptr.gmrId = vfb->user_handle;
+	cmd->body.ptr.offset = 0;
+
+	blits = (void *)&cmd[1];
+	blits_pos = 0;
+	for (i = 0; i < num_units; i++) {
+		struct drm_vmw_rect *c = clips;
+		for (k = 0; k < num_clips; k++, c++) {
+			/* transform clip coords to crtc origin based coords */
+			int clip_x1 = c->x - units[i]->crtc.x;
+			int clip_x2 = c->x - units[i]->crtc.x + c->w;
+			int clip_y1 = c->y - units[i]->crtc.y;
+			int clip_y2 = c->y - units[i]->crtc.y + c->h;
+			int dest_x = c->x;
+			int dest_y = c->y;
+
+			/* compensate for clipping, we negate
+			 * a negative number and add that.
+			 */
+			if (clip_x1 < 0)
+				dest_x += -clip_x1;
+			if (clip_y1 < 0)
+				dest_y += -clip_y1;
+
+			/* clip */
+			clip_x1 = max(clip_x1, 0);
+			clip_y1 = max(clip_y1, 0);
+			clip_x2 = min(clip_x2, units[i]->crtc.mode.hdisplay);
+			clip_y2 = min(clip_y2, units[i]->crtc.mode.vdisplay);
+
+			/* and cull any rects that misses the crtc */
+			if (clip_x1 >= units[i]->crtc.mode.hdisplay ||
+			    clip_y1 >= units[i]->crtc.mode.vdisplay ||
+			    clip_x2 <= 0 || clip_y2 <= 0)
+				continue;
+
+			blits[blits_pos].header = SVGA_CMD_BLIT_SCREEN_TO_GMRFB;
+			blits[blits_pos].body.srcScreenId = units[i]->unit;
+			blits[blits_pos].body.destOrigin.x = dest_x;
+			blits[blits_pos].body.destOrigin.y = dest_y;
+
+			blits[blits_pos].body.srcRect.left = clip_x1;
+			blits[blits_pos].body.srcRect.top = clip_y1;
+			blits[blits_pos].body.srcRect.right = clip_x2;
+			blits[blits_pos].body.srcRect.bottom = clip_y2;
+			blits_pos++;
+		}
+	}
+	/* reset size here and use calculated exact size from loops */
+	fifo_size = sizeof(*cmd) + sizeof(*blits) * blits_pos;
+
+	ret = vmw_execbuf_process(file_priv, dev_priv, NULL, cmd, fifo_size,
+				  0, user_fence_rep);
+
+	kfree(cmd);
+
+	return ret;
+}
+
 int vmw_kms_init(struct vmw_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
@@ -932,7 +1292,9 @@ int vmw_kms_init(struct vmw_private *dev_priv)
 	dev->mode_config.max_width = 8192;
 	dev->mode_config.max_height = 8192;
 
-	ret = vmw_kms_init_legacy_display_system(dev_priv);
+	ret = vmw_kms_init_screen_object_display(dev_priv);
+	if (ret) /* Fallback */
+		(void)vmw_kms_init_legacy_display_system(dev_priv);
 
 	return 0;
 }
@@ -1099,7 +1461,265 @@ bool vmw_kms_validate_mode_vram(struct vmw_private *dev_priv,
 	return ((u64) pitch * (u64) height) < (u64) dev_priv->vram_size;
 }
 
+
+/**
+ * Function called by DRM code called with vbl_lock held.
+ */
 u32 vmw_get_vblank_counter(struct drm_device *dev, int crtc)
 {
 	return 0;
 }
+
+/**
+ * Function called by DRM code called with vbl_lock held.
+ */
+int vmw_enable_vblank(struct drm_device *dev, int crtc)
+{
+	return -ENOSYS;
+}
+
+/**
+ * Function called by DRM code called with vbl_lock held.
+ */
+void vmw_disable_vblank(struct drm_device *dev, int crtc)
+{
+}
+
+
+/*
+ * Small shared kms functions.
+ */
+
+int vmw_du_update_layout(struct vmw_private *dev_priv, unsigned num,
+			 struct drm_vmw_rect *rects)
+{
+	struct drm_device *dev = dev_priv->dev;
+	struct vmw_display_unit *du;
+	struct drm_connector *con;
+
+	mutex_lock(&dev->mode_config.mutex);
+
+#if 0
+	{
+		unsigned int i;
+
+		DRM_INFO("%s: new layout ", __func__);
+		for (i = 0; i < num; i++)
+			DRM_INFO("(%i, %i %ux%u) ", rects[i].x, rects[i].y,
+				 rects[i].w, rects[i].h);
+		DRM_INFO("\n");
+	}
+#endif
+
+	list_for_each_entry(con, &dev->mode_config.connector_list, head) {
+		du = vmw_connector_to_du(con);
+		if (num > du->unit) {
+			du->pref_width = rects[du->unit].w;
+			du->pref_height = rects[du->unit].h;
+			du->pref_active = true;
+		} else {
+			du->pref_width = 800;
+			du->pref_height = 600;
+			du->pref_active = false;
+		}
+		con->status = vmw_du_connector_detect(con, true);
+	}
+
+	mutex_unlock(&dev->mode_config.mutex);
+
+	return 0;
+}
+
+void vmw_du_crtc_save(struct drm_crtc *crtc)
+{
+}
+
+void vmw_du_crtc_restore(struct drm_crtc *crtc)
+{
+}
+
+void vmw_du_crtc_gamma_set(struct drm_crtc *crtc,
+			   u16 *r, u16 *g, u16 *b,
+			   uint32_t start, uint32_t size)
+{
+	struct vmw_private *dev_priv = vmw_priv(crtc->dev);
+	int i;
+
+	for (i = 0; i < size; i++) {
+		DRM_DEBUG("%d r/g/b = 0x%04x / 0x%04x / 0x%04x\n", i,
+			  r[i], g[i], b[i]);
+		vmw_write(dev_priv, SVGA_PALETTE_BASE + i * 3 + 0, r[i] >> 8);
+		vmw_write(dev_priv, SVGA_PALETTE_BASE + i * 3 + 1, g[i] >> 8);
+		vmw_write(dev_priv, SVGA_PALETTE_BASE + i * 3 + 2, b[i] >> 8);
+	}
+}
+
+void vmw_du_connector_dpms(struct drm_connector *connector, int mode)
+{
+}
+
+void vmw_du_connector_save(struct drm_connector *connector)
+{
+}
+
+void vmw_du_connector_restore(struct drm_connector *connector)
+{
+}
+
+enum drm_connector_status
+vmw_du_connector_detect(struct drm_connector *connector, bool force)
+{
+	uint32_t num_displays;
+	struct drm_device *dev = connector->dev;
+	struct vmw_private *dev_priv = vmw_priv(dev);
+
+	mutex_lock(&dev_priv->hw_mutex);
+	num_displays = vmw_read(dev_priv, SVGA_REG_NUM_DISPLAYS);
+	mutex_unlock(&dev_priv->hw_mutex);
+
+	return ((vmw_connector_to_du(connector)->unit < num_displays) ?
+		connector_status_connected : connector_status_disconnected);
+}
+
+static struct drm_display_mode vmw_kms_connector_builtin[] = {
+	/* 640x480@60Hz */
+	{ DRM_MODE("640x480", DRM_MODE_TYPE_DRIVER, 25175, 640, 656,
+		   752, 800, 0, 480, 489, 492, 525, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) },
+	/* 800x600@60Hz */
+	{ DRM_MODE("800x600", DRM_MODE_TYPE_DRIVER, 40000, 800, 840,
+		   968, 1056, 0, 600, 601, 605, 628, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1024x768@60Hz */
+	{ DRM_MODE("1024x768", DRM_MODE_TYPE_DRIVER, 65000, 1024, 1048,
+		   1184, 1344, 0, 768, 771, 777, 806, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) },
+	/* 1152x864@75Hz */
+	{ DRM_MODE("1152x864", DRM_MODE_TYPE_DRIVER, 108000, 1152, 1216,
+		   1344, 1600, 0, 864, 865, 868, 900, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1280x768@60Hz */
+	{ DRM_MODE("1280x768", DRM_MODE_TYPE_DRIVER, 79500, 1280, 1344,
+		   1472, 1664, 0, 768, 771, 778, 798, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1280x800@60Hz */
+	{ DRM_MODE("1280x800", DRM_MODE_TYPE_DRIVER, 83500, 1280, 1352,
+		   1480, 1680, 0, 800, 803, 809, 831, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+	/* 1280x960@60Hz */
+	{ DRM_MODE("1280x960", DRM_MODE_TYPE_DRIVER, 108000, 1280, 1376,
+		   1488, 1800, 0, 960, 961, 964, 1000, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1280x1024@60Hz */
+	{ DRM_MODE("1280x1024", DRM_MODE_TYPE_DRIVER, 108000, 1280, 1328,
+		   1440, 1688, 0, 1024, 1025, 1028, 1066, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1360x768@60Hz */
+	{ DRM_MODE("1360x768", DRM_MODE_TYPE_DRIVER, 85500, 1360, 1424,
+		   1536, 1792, 0, 768, 771, 777, 795, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1440x1050@60Hz */
+	{ DRM_MODE("1400x1050", DRM_MODE_TYPE_DRIVER, 121750, 1400, 1488,
+		   1632, 1864, 0, 1050, 1053, 1057, 1089, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1440x900@60Hz */
+	{ DRM_MODE("1440x900", DRM_MODE_TYPE_DRIVER, 106500, 1440, 1520,
+		   1672, 1904, 0, 900, 903, 909, 934, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1600x1200@60Hz */
+	{ DRM_MODE("1600x1200", DRM_MODE_TYPE_DRIVER, 162000, 1600, 1664,
+		   1856, 2160, 0, 1200, 1201, 1204, 1250, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1680x1050@60Hz */
+	{ DRM_MODE("1680x1050", DRM_MODE_TYPE_DRIVER, 146250, 1680, 1784,
+		   1960, 2240, 0, 1050, 1053, 1059, 1089, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1792x1344@60Hz */
+	{ DRM_MODE("1792x1344", DRM_MODE_TYPE_DRIVER, 204750, 1792, 1920,
+		   2120, 2448, 0, 1344, 1345, 1348, 1394, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1853x1392@60Hz */
+	{ DRM_MODE("1856x1392", DRM_MODE_TYPE_DRIVER, 218250, 1856, 1952,
+		   2176, 2528, 0, 1392, 1393, 1396, 1439, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1920x1200@60Hz */
+	{ DRM_MODE("1920x1200", DRM_MODE_TYPE_DRIVER, 193250, 1920, 2056,
+		   2256, 2592, 0, 1200, 1203, 1209, 1245, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1920x1440@60Hz */
+	{ DRM_MODE("1920x1440", DRM_MODE_TYPE_DRIVER, 234000, 1920, 2048,
+		   2256, 2600, 0, 1440, 1441, 1444, 1500, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 2560x1600@60Hz */
+	{ DRM_MODE("2560x1600", DRM_MODE_TYPE_DRIVER, 348500, 2560, 2752,
+		   3032, 3504, 0, 1600, 1603, 1609, 1658, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* Terminate */
+	{ DRM_MODE("", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) },
+};
+
+int vmw_du_connector_fill_modes(struct drm_connector *connector,
+				uint32_t max_width, uint32_t max_height)
+{
+	struct vmw_display_unit *du = vmw_connector_to_du(connector);
+	struct drm_device *dev = connector->dev;
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct drm_display_mode *mode = NULL;
+	struct drm_display_mode *bmode;
+	struct drm_display_mode prefmode = { DRM_MODE("preferred",
+		DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC)
+	};
+	int i;
+
+	/* Add preferred mode */
+	{
+		mode = drm_mode_duplicate(dev, &prefmode);
+		if (!mode)
+			return 0;
+		mode->hdisplay = du->pref_width;
+		mode->vdisplay = du->pref_height;
+		mode->vrefresh = drm_mode_vrefresh(mode);
+		if (vmw_kms_validate_mode_vram(dev_priv, mode->hdisplay * 2,
+					       mode->vdisplay)) {
+			drm_mode_probed_add(connector, mode);
+
+			if (du->pref_mode) {
+				list_del_init(&du->pref_mode->head);
+				drm_mode_destroy(dev, du->pref_mode);
+			}
+
+			du->pref_mode = mode;
+		}
+	}
+
+	for (i = 0; vmw_kms_connector_builtin[i].type != 0; i++) {
+		bmode = &vmw_kms_connector_builtin[i];
+		if (bmode->hdisplay > max_width ||
+		    bmode->vdisplay > max_height)
+			continue;
+
+		if (!vmw_kms_validate_mode_vram(dev_priv, bmode->hdisplay * 2,
+						bmode->vdisplay))
+			continue;
+
+		mode = drm_mode_duplicate(dev, bmode);
+		if (!mode)
+			return 0;
+		mode->vrefresh = drm_mode_vrefresh(mode);
+
+		drm_mode_probed_add(connector, mode);
+	}
+
+	drm_mode_connector_list_update(connector);
+
+	return 1;
+}
+
+int vmw_du_connector_set_property(struct drm_connector *connector,
+				  struct drm_property *property,
+				  uint64_t val)
+{
+	return 0;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
index 8a398a0339b6..db0b901f8c3f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -31,6 +31,8 @@
 #include "drmP.h"
 #include "vmwgfx_drv.h"
 
+#define VMWGFX_NUM_DISPLAY_UNITS 8
+
 
 #define vmw_framebuffer_to_vfb(x) \
 	container_of(x, struct vmw_framebuffer, base)
@@ -45,6 +47,9 @@ struct vmw_framebuffer {
 	struct drm_framebuffer base;
 	int (*pin)(struct vmw_framebuffer *fb);
 	int (*unpin)(struct vmw_framebuffer *fb);
+	bool dmabuf;
+	struct ttm_base_object *user_obj;
+	uint32_t user_handle;
 };
 
 
@@ -83,22 +88,59 @@ struct vmw_display_unit {
 	int hotspot_y;
 
 	unsigned unit;
+
+	/*
+	 * Prefered mode tracking.
+	 */
+	unsigned pref_width;
+	unsigned pref_height;
+	bool pref_active;
+	struct drm_display_mode *pref_mode;
 };
 
+#define vmw_crtc_to_du(x) \
+	container_of(x, struct vmw_display_unit, crtc)
+#define vmw_connector_to_du(x) \
+	container_of(x, struct vmw_display_unit, connector)
+
+
 /*
  * Shared display unit functions - vmwgfx_kms.c
  */
 void vmw_display_unit_cleanup(struct vmw_display_unit *du);
+void vmw_du_crtc_save(struct drm_crtc *crtc);
+void vmw_du_crtc_restore(struct drm_crtc *crtc);
+void vmw_du_crtc_gamma_set(struct drm_crtc *crtc,
+			   u16 *r, u16 *g, u16 *b,
+			   uint32_t start, uint32_t size);
 int vmw_du_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
 			   uint32_t handle, uint32_t width, uint32_t height);
 int vmw_du_crtc_cursor_move(struct drm_crtc *crtc, int x, int y);
+void vmw_du_connector_dpms(struct drm_connector *connector, int mode);
+void vmw_du_connector_save(struct drm_connector *connector);
+void vmw_du_connector_restore(struct drm_connector *connector);
+enum drm_connector_status
+vmw_du_connector_detect(struct drm_connector *connector, bool force);
+int vmw_du_connector_fill_modes(struct drm_connector *connector,
+				uint32_t max_width, uint32_t max_height);
+int vmw_du_connector_set_property(struct drm_connector *connector,
+				  struct drm_property *property,
+				  uint64_t val);
+int vmw_du_update_layout(struct vmw_private *dev_priv, unsigned num,
+			 struct drm_vmw_rect *rects);
 
 /*
  * Legacy display unit functions - vmwgfx_ldu.c
  */
 int vmw_kms_init_legacy_display_system(struct vmw_private *dev_priv);
 int vmw_kms_close_legacy_display_system(struct vmw_private *dev_priv);
-int vmw_kms_ldu_update_layout(struct vmw_private *dev_priv, unsigned num,
+
+/*
+ * Screen Objects display functions - vmwgfx_scrn.c
+ */
+int vmw_kms_init_screen_object_display(struct vmw_private *dev_priv);
+int vmw_kms_close_screen_object_display(struct vmw_private *dev_priv);
+int vmw_kms_sou_update_layout(struct vmw_private *dev_priv, unsigned num,
 			      struct drm_vmw_rect *rects);
 
 #endif
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
index 7e1901c4f065..92f56bc594eb 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
@@ -27,7 +27,6 @@
 
 #include "vmwgfx_kms.h"
 
-#define VMWGFX_LDU_NUM_DU 8
 
 #define vmw_crtc_to_ldu(x) \
 	container_of(x, struct vmw_legacy_display_unit, base.crtc)
@@ -51,11 +50,6 @@ struct vmw_legacy_display {
 struct vmw_legacy_display_unit {
 	struct vmw_display_unit base;
 
-	unsigned pref_width;
-	unsigned pref_height;
-	bool pref_active;
-	struct drm_display_mode *pref_mode;
-
 	struct list_head active;
 };
 
@@ -71,29 +65,6 @@ static void vmw_ldu_destroy(struct vmw_legacy_display_unit *ldu)
  * Legacy Display Unit CRTC functions
  */
 
-static void vmw_ldu_crtc_save(struct drm_crtc *crtc)
-{
-}
-
-static void vmw_ldu_crtc_restore(struct drm_crtc *crtc)
-{
-}
-
-static void vmw_ldu_crtc_gamma_set(struct drm_crtc *crtc,
-				   u16 *r, u16 *g, u16 *b,
-				   uint32_t start, uint32_t size)
-{
-	struct vmw_private *dev_priv = vmw_priv(crtc->dev);
-	int i;
-
-	for (i = 0; i < size; i++) {
-		DRM_DEBUG("%d r/g/b = 0x%04x / 0x%04x / 0x%04x\n", i, r[i], g[i], b[i]);
-		vmw_write(dev_priv, SVGA_PALETTE_BASE + i * 3 + 0, r[i] >> 8);
-		vmw_write(dev_priv, SVGA_PALETTE_BASE + i * 3 + 1, g[i] >> 8);
-		vmw_write(dev_priv, SVGA_PALETTE_BASE + i * 3 + 2, b[i] >> 8);
-	}
-}
-
 static void vmw_ldu_crtc_destroy(struct drm_crtc *crtc)
 {
 	vmw_ldu_destroy(vmw_crtc_to_ldu(crtc));
@@ -301,15 +272,16 @@ static int vmw_ldu_crtc_set_config(struct drm_mode_set *set)
 }
 
 static struct drm_crtc_funcs vmw_legacy_crtc_funcs = {
-	.save = vmw_ldu_crtc_save,
-	.restore = vmw_ldu_crtc_restore,
+	.save = vmw_du_crtc_save,
+	.restore = vmw_du_crtc_restore,
 	.cursor_set = vmw_du_crtc_cursor_set,
 	.cursor_move = vmw_du_crtc_cursor_move,
-	.gamma_set = vmw_ldu_crtc_gamma_set,
+	.gamma_set = vmw_du_crtc_gamma_set,
 	.destroy = vmw_ldu_crtc_destroy,
 	.set_config = vmw_ldu_crtc_set_config,
 };
 
+
 /*
  * Legacy Display Unit encoder functions
  */
@@ -327,190 +299,18 @@ static struct drm_encoder_funcs vmw_legacy_encoder_funcs = {
  * Legacy Display Unit connector functions
  */
 
-static void vmw_ldu_connector_dpms(struct drm_connector *connector, int mode)
-{
-}
-
-static void vmw_ldu_connector_save(struct drm_connector *connector)
-{
-}
-
-static void vmw_ldu_connector_restore(struct drm_connector *connector)
-{
-}
-
-static enum drm_connector_status
-	vmw_ldu_connector_detect(struct drm_connector *connector,
-				 bool force)
-{
-	uint32_t num_displays;
-	struct drm_device *dev = connector->dev;
-	struct vmw_private *dev_priv = vmw_priv(dev);
-
-	mutex_lock(&dev_priv->hw_mutex);
-	num_displays = vmw_read(dev_priv, SVGA_REG_NUM_DISPLAYS);
-	mutex_unlock(&dev_priv->hw_mutex);
-
-	return ((vmw_connector_to_ldu(connector)->base.unit < num_displays) ?
-		connector_status_connected : connector_status_disconnected);
-}
-
-static const struct drm_display_mode vmw_ldu_connector_builtin[] = {
-	/* 640x480@60Hz */
-	{ DRM_MODE("640x480", DRM_MODE_TYPE_DRIVER, 25175, 640, 656,
-		   752, 800, 0, 480, 489, 492, 525, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) },
-	/* 800x600@60Hz */
-	{ DRM_MODE("800x600", DRM_MODE_TYPE_DRIVER, 40000, 800, 840,
-		   968, 1056, 0, 600, 601, 605, 628, 0,
-		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1024x768@60Hz */
-	{ DRM_MODE("1024x768", DRM_MODE_TYPE_DRIVER, 65000, 1024, 1048,
-		   1184, 1344, 0, 768, 771, 777, 806, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) },
-	/* 1152x864@75Hz */
-	{ DRM_MODE("1152x864", DRM_MODE_TYPE_DRIVER, 108000, 1152, 1216,
-		   1344, 1600, 0, 864, 865, 868, 900, 0,
-		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1280x768@60Hz */
-	{ DRM_MODE("1280x768", DRM_MODE_TYPE_DRIVER, 79500, 1280, 1344,
-		   1472, 1664, 0, 768, 771, 778, 798, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1280x800@60Hz */
-	{ DRM_MODE("1280x800", DRM_MODE_TYPE_DRIVER, 83500, 1280, 1352,
-		   1480, 1680, 0, 800, 803, 809, 831, 0,
-		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
-	/* 1280x960@60Hz */
-	{ DRM_MODE("1280x960", DRM_MODE_TYPE_DRIVER, 108000, 1280, 1376,
-		   1488, 1800, 0, 960, 961, 964, 1000, 0,
-		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1280x1024@60Hz */
-	{ DRM_MODE("1280x1024", DRM_MODE_TYPE_DRIVER, 108000, 1280, 1328,
-		   1440, 1688, 0, 1024, 1025, 1028, 1066, 0,
-		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1360x768@60Hz */
-	{ DRM_MODE("1360x768", DRM_MODE_TYPE_DRIVER, 85500, 1360, 1424,
-		   1536, 1792, 0, 768, 771, 777, 795, 0,
-		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1440x1050@60Hz */
-	{ DRM_MODE("1400x1050", DRM_MODE_TYPE_DRIVER, 121750, 1400, 1488,
-		   1632, 1864, 0, 1050, 1053, 1057, 1089, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1440x900@60Hz */
-	{ DRM_MODE("1440x900", DRM_MODE_TYPE_DRIVER, 106500, 1440, 1520,
-		   1672, 1904, 0, 900, 903, 909, 934, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1600x1200@60Hz */
-	{ DRM_MODE("1600x1200", DRM_MODE_TYPE_DRIVER, 162000, 1600, 1664,
-		   1856, 2160, 0, 1200, 1201, 1204, 1250, 0,
-		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1680x1050@60Hz */
-	{ DRM_MODE("1680x1050", DRM_MODE_TYPE_DRIVER, 146250, 1680, 1784,
-		   1960, 2240, 0, 1050, 1053, 1059, 1089, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1792x1344@60Hz */
-	{ DRM_MODE("1792x1344", DRM_MODE_TYPE_DRIVER, 204750, 1792, 1920,
-		   2120, 2448, 0, 1344, 1345, 1348, 1394, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1853x1392@60Hz */
-	{ DRM_MODE("1856x1392", DRM_MODE_TYPE_DRIVER, 218250, 1856, 1952,
-		   2176, 2528, 0, 1392, 1393, 1396, 1439, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1920x1200@60Hz */
-	{ DRM_MODE("1920x1200", DRM_MODE_TYPE_DRIVER, 193250, 1920, 2056,
-		   2256, 2592, 0, 1200, 1203, 1209, 1245, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1920x1440@60Hz */
-	{ DRM_MODE("1920x1440", DRM_MODE_TYPE_DRIVER, 234000, 1920, 2048,
-		   2256, 2600, 0, 1440, 1441, 1444, 1500, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 2560x1600@60Hz */
-	{ DRM_MODE("2560x1600", DRM_MODE_TYPE_DRIVER, 348500, 2560, 2752,
-		   3032, 3504, 0, 1600, 1603, 1609, 1658, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* Terminate */
-	{ DRM_MODE("", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) },
-};
-
-static int vmw_ldu_connector_fill_modes(struct drm_connector *connector,
-					uint32_t max_width, uint32_t max_height)
-{
-	struct vmw_legacy_display_unit *ldu = vmw_connector_to_ldu(connector);
-	struct drm_device *dev = connector->dev;
-	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct drm_display_mode *mode = NULL;
-	struct drm_display_mode prefmode = { DRM_MODE("preferred",
-		DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC)
-	};
-	int i;
-
-	/* Add preferred mode */
-	{
-		mode = drm_mode_duplicate(dev, &prefmode);
-		if (!mode)
-			return 0;
-		mode->hdisplay = ldu->pref_width;
-		mode->vdisplay = ldu->pref_height;
-		mode->vrefresh = drm_mode_vrefresh(mode);
-		if (vmw_kms_validate_mode_vram(dev_priv, mode->hdisplay * 2,
-					       mode->vdisplay)) {
-			drm_mode_probed_add(connector, mode);
-
-			if (ldu->pref_mode) {
-				list_del_init(&ldu->pref_mode->head);
-				drm_mode_destroy(dev, ldu->pref_mode);
-			}
-
-			ldu->pref_mode = mode;
-		}
-	}
-
-	for (i = 0; vmw_ldu_connector_builtin[i].type != 0; i++) {
-		const struct drm_display_mode *bmode;
-
-		bmode = &vmw_ldu_connector_builtin[i];
-		if (bmode->hdisplay > max_width ||
-		    bmode->vdisplay > max_height)
-			continue;
-
-		if (!vmw_kms_validate_mode_vram(dev_priv, bmode->hdisplay * 2,
-						bmode->vdisplay))
-			continue;
-
-		mode = drm_mode_duplicate(dev, bmode);
-		if (!mode)
-			return 0;
-		mode->vrefresh = drm_mode_vrefresh(mode);
-
-		drm_mode_probed_add(connector, mode);
-	}
-
-	drm_mode_connector_list_update(connector);
-
-	return 1;
-}
-
-static int vmw_ldu_connector_set_property(struct drm_connector *connector,
-					  struct drm_property *property,
-					  uint64_t val)
-{
-	return 0;
-}
-
 static void vmw_ldu_connector_destroy(struct drm_connector *connector)
 {
 	vmw_ldu_destroy(vmw_connector_to_ldu(connector));
 }
 
 static struct drm_connector_funcs vmw_legacy_connector_funcs = {
-	.dpms = vmw_ldu_connector_dpms,
-	.save = vmw_ldu_connector_save,
-	.restore = vmw_ldu_connector_restore,
-	.detect = vmw_ldu_connector_detect,
-	.fill_modes = vmw_ldu_connector_fill_modes,
-	.set_property = vmw_ldu_connector_set_property,
+	.dpms = vmw_du_connector_dpms,
+	.save = vmw_du_connector_save,
+	.restore = vmw_du_connector_restore,
+	.detect = vmw_du_connector_detect,
+	.fill_modes = vmw_du_connector_fill_modes,
+	.set_property = vmw_du_connector_set_property,
 	.destroy = vmw_ldu_connector_destroy,
 };
 
@@ -533,14 +333,14 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit)
 
 	INIT_LIST_HEAD(&ldu->active);
 
-	ldu->pref_active = (unit == 0);
-	ldu->pref_width = 800;
-	ldu->pref_height = 600;
-	ldu->pref_mode = NULL;
+	ldu->base.pref_active = (unit == 0);
+	ldu->base.pref_width = 800;
+	ldu->base.pref_height = 600;
+	ldu->base.pref_mode = NULL;
 
 	drm_connector_init(dev, connector, &vmw_legacy_connector_funcs,
 			   DRM_MODE_CONNECTOR_LVDS);
-	connector->status = vmw_ldu_connector_detect(connector, true);
+	connector->status = vmw_du_connector_detect(connector, true);
 
 	drm_encoder_init(dev, encoder, &vmw_legacy_encoder_funcs,
 			 DRM_MODE_ENCODER_LVDS);
@@ -562,8 +362,7 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit)
 int vmw_kms_init_legacy_display_system(struct vmw_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
-	int i;
-	int ret;
+	int i, ret;
 
 	if (dev_priv->ldu_priv) {
 		DRM_INFO("ldu system already on\n");
@@ -571,7 +370,6 @@ int vmw_kms_init_legacy_display_system(struct vmw_private *dev_priv)
 	}
 
 	dev_priv->ldu_priv = kmalloc(sizeof(*dev_priv->ldu_priv), GFP_KERNEL);
-
 	if (!dev_priv->ldu_priv)
 		return -ENOMEM;
 
@@ -580,18 +378,31 @@ int vmw_kms_init_legacy_display_system(struct vmw_private *dev_priv)
 	dev_priv->ldu_priv->last_num_active = 0;
 	dev_priv->ldu_priv->fb = NULL;
 
-	drm_mode_create_dirty_info_property(dev_priv->dev);
+	/* for old hardware without multimon only enable one display */
+	if (dev_priv->capabilities & SVGA_CAP_MULTIMON)
+		ret = drm_vblank_init(dev, VMWGFX_NUM_DISPLAY_UNITS);
+	else
+		ret = drm_vblank_init(dev, 1);
+	if (ret != 0)
+		goto err_free;
+
+	ret = drm_mode_create_dirty_info_property(dev);
+	if (ret != 0)
+		goto err_vblank_cleanup;
 
-	if (dev_priv->capabilities & SVGA_CAP_MULTIMON) {
-		for (i = 0; i < VMWGFX_LDU_NUM_DU; ++i)
+	if (dev_priv->capabilities & SVGA_CAP_MULTIMON)
+		for (i = 0; i < VMWGFX_NUM_DISPLAY_UNITS; ++i)
 			vmw_ldu_init(dev_priv, i);
-		ret = drm_vblank_init(dev, VMWGFX_LDU_NUM_DU);
-	} else {
-		/* for old hardware without multimon only enable one display */
+	else
 		vmw_ldu_init(dev_priv, 0);
-		ret = drm_vblank_init(dev, 1);
-	}
 
+	return 0;
+
+err_vblank_cleanup:
+	drm_vblank_cleanup(dev);
+err_free:
+	kfree(dev_priv->ldu_priv);
+	dev_priv->ldu_priv = NULL;
 	return ret;
 }
 
@@ -599,52 +410,14 @@ int vmw_kms_close_legacy_display_system(struct vmw_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
 
-	drm_vblank_cleanup(dev);
 	if (!dev_priv->ldu_priv)
 		return -ENOSYS;
 
+	drm_vblank_cleanup(dev);
+
 	BUG_ON(!list_empty(&dev_priv->ldu_priv->active));
 
 	kfree(dev_priv->ldu_priv);
 
 	return 0;
 }
-
-int vmw_kms_ldu_update_layout(struct vmw_private *dev_priv, unsigned num,
-			      struct drm_vmw_rect *rects)
-{
-	struct drm_device *dev = dev_priv->dev;
-	struct vmw_legacy_display_unit *ldu;
-	struct drm_connector *con;
-	int i;
-
-	mutex_lock(&dev->mode_config.mutex);
-
-#if 0
-	DRM_INFO("%s: new layout ", __func__);
-	for (i = 0; i < (int)num; i++)
-		DRM_INFO("(%i, %i %ux%u) ", rects[i].x, rects[i].y,
-			 rects[i].w, rects[i].h);
-	DRM_INFO("\n");
-#else
-	(void)i;
-#endif
-
-	list_for_each_entry(con, &dev->mode_config.connector_list, head) {
-		ldu = vmw_connector_to_ldu(con);
-		if (num > ldu->base.unit) {
-			ldu->pref_width = rects[ldu->base.unit].w;
-			ldu->pref_height = rects[ldu->base.unit].h;
-			ldu->pref_active = true;
-		} else {
-			ldu->pref_width = 800;
-			ldu->pref_height = 600;
-			ldu->pref_active = false;
-		}
-		con->status = vmw_ldu_connector_detect(con, true);
-	}
-
-	mutex_unlock(&dev->mode_config.mutex);
-
-	return 0;
-}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
index 07ce02da78a4..14399eec9c3c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
@@ -87,48 +87,6 @@ static inline void fill_flush(struct vmw_escape_video_flush *cmd,
 }
 
 /**
- * Pin or unpin a buffer in vram.
- *
- * @dev_priv:  Driver private.
- * @buf:  DMA buffer to pin or unpin.
- * @pin:  Pin buffer in vram if true.
- * @interruptible:  Use interruptible wait.
- *
- * Takes the current masters ttm lock in read.
- *
- * Returns
- * -ERESTARTSYS if interrupted by a signal.
- */
-static int vmw_dmabuf_pin_in_vram(struct vmw_private *dev_priv,
-				  struct vmw_dma_buffer *buf,
-				  bool pin, bool interruptible)
-{
-	struct ttm_buffer_object *bo = &buf->base;
-	struct ttm_placement *overlay_placement = &vmw_vram_placement;
-	int ret;
-
-	ret = ttm_read_lock(&dev_priv->active_master->lock, interruptible);
-	if (unlikely(ret != 0))
-		return ret;
-
-	ret = ttm_bo_reserve(bo, interruptible, false, false, 0);
-	if (unlikely(ret != 0))
-		goto err;
-
-	if (pin)
-		overlay_placement = &vmw_vram_ne_placement;
-
-	ret = ttm_bo_validate(bo, overlay_placement, interruptible, false, false);
-
-	ttm_bo_unreserve(bo);
-
-err:
-	ttm_read_unlock(&dev_priv->active_master->lock);
-
-	return ret;
-}
-
-/**
  * Send put command to hw.
  *
  * Returns
@@ -139,68 +97,80 @@ static int vmw_overlay_send_put(struct vmw_private *dev_priv,
 				struct drm_vmw_control_stream_arg *arg,
 				bool interruptible)
 {
+	struct vmw_escape_video_flush *flush;
+	size_t fifo_size;
+	bool have_so = dev_priv->sou_priv ? true : false;
+	int i, num_items;
+	SVGAGuestPtr ptr;
+
 	struct {
 		struct vmw_escape_header escape;
 		struct {
-			struct {
-				uint32_t cmdType;
-				uint32_t streamId;
-			} header;
-			struct {
-				uint32_t registerId;
-				uint32_t value;
-			} items[SVGA_VIDEO_PITCH_3 + 1];
-		} body;
-		struct vmw_escape_video_flush flush;
+			uint32_t cmdType;
+			uint32_t streamId;
+		} header;
 	} *cmds;
-	uint32_t offset;
-	int i, ret;
+	struct {
+		uint32_t registerId;
+		uint32_t value;
+	} *items;
 
-	for (;;) {
-		cmds = vmw_fifo_reserve(dev_priv, sizeof(*cmds));
-		if (cmds)
-			break;
+	/* defines are a index needs + 1 */
+	if (have_so)
+		num_items = SVGA_VIDEO_DST_SCREEN_ID + 1;
+	else
+		num_items = SVGA_VIDEO_PITCH_3 + 1;
 
-		ret = vmw_fallback_wait(dev_priv, false, true, 0,
-					interruptible, 3*HZ);
-		if (interruptible && ret == -ERESTARTSYS)
-			return ret;
-		else
-			BUG_ON(ret != 0);
+	fifo_size = sizeof(*cmds) + sizeof(*flush) + sizeof(*items) * num_items;
+
+	cmds = vmw_fifo_reserve(dev_priv, fifo_size);
+	/* hardware has hung, can't do anything here */
+	if (!cmds)
+		return -ENOMEM;
+
+	items = (typeof(items))&cmds[1];
+	flush = (struct vmw_escape_video_flush *)&items[num_items];
+
+	/* the size is header + number of items */
+	fill_escape(&cmds->escape, sizeof(*items) * (num_items + 1));
+
+	cmds->header.cmdType = SVGA_ESCAPE_VMWARE_VIDEO_SET_REGS;
+	cmds->header.streamId = arg->stream_id;
+
+	/* the IDs are neatly numbered */
+	for (i = 0; i < num_items; i++)
+		items[i].registerId = i;
+
+	vmw_bo_get_guest_ptr(&buf->base, &ptr);
+	ptr.offset += arg->offset;
+
+	items[SVGA_VIDEO_ENABLED].value     = true;
+	items[SVGA_VIDEO_FLAGS].value       = arg->flags;
+	items[SVGA_VIDEO_DATA_OFFSET].value = ptr.offset;
+	items[SVGA_VIDEO_FORMAT].value      = arg->format;
+	items[SVGA_VIDEO_COLORKEY].value    = arg->color_key;
+	items[SVGA_VIDEO_SIZE].value        = arg->size;
+	items[SVGA_VIDEO_WIDTH].value       = arg->width;
+	items[SVGA_VIDEO_HEIGHT].value      = arg->height;
+	items[SVGA_VIDEO_SRC_X].value       = arg->src.x;
+	items[SVGA_VIDEO_SRC_Y].value       = arg->src.y;
+	items[SVGA_VIDEO_SRC_WIDTH].value   = arg->src.w;
+	items[SVGA_VIDEO_SRC_HEIGHT].value  = arg->src.h;
+	items[SVGA_VIDEO_DST_X].value       = arg->dst.x;
+	items[SVGA_VIDEO_DST_Y].value       = arg->dst.y;
+	items[SVGA_VIDEO_DST_WIDTH].value   = arg->dst.w;
+	items[SVGA_VIDEO_DST_HEIGHT].value  = arg->dst.h;
+	items[SVGA_VIDEO_PITCH_1].value     = arg->pitch[0];
+	items[SVGA_VIDEO_PITCH_2].value     = arg->pitch[1];
+	items[SVGA_VIDEO_PITCH_3].value     = arg->pitch[2];
+	if (have_so) {
+		items[SVGA_VIDEO_DATA_GMRID].value    = ptr.gmrId;
+		items[SVGA_VIDEO_DST_SCREEN_ID].value = SVGA_ID_INVALID;
 	}
 
-	fill_escape(&cmds->escape, sizeof(cmds->body));
-	cmds->body.header.cmdType = SVGA_ESCAPE_VMWARE_VIDEO_SET_REGS;
-	cmds->body.header.streamId = arg->stream_id;
-
-	for (i = 0; i <= SVGA_VIDEO_PITCH_3; i++)
-		cmds->body.items[i].registerId = i;
-
-	offset = buf->base.offset + arg->offset;
-
-	cmds->body.items[SVGA_VIDEO_ENABLED].value     = true;
-	cmds->body.items[SVGA_VIDEO_FLAGS].value       = arg->flags;
-	cmds->body.items[SVGA_VIDEO_DATA_OFFSET].value = offset;
-	cmds->body.items[SVGA_VIDEO_FORMAT].value      = arg->format;
-	cmds->body.items[SVGA_VIDEO_COLORKEY].value    = arg->color_key;
-	cmds->body.items[SVGA_VIDEO_SIZE].value        = arg->size;
-	cmds->body.items[SVGA_VIDEO_WIDTH].value       = arg->width;
-	cmds->body.items[SVGA_VIDEO_HEIGHT].value      = arg->height;
-	cmds->body.items[SVGA_VIDEO_SRC_X].value       = arg->src.x;
-	cmds->body.items[SVGA_VIDEO_SRC_Y].value       = arg->src.y;
-	cmds->body.items[SVGA_VIDEO_SRC_WIDTH].value   = arg->src.w;
-	cmds->body.items[SVGA_VIDEO_SRC_HEIGHT].value  = arg->src.h;
-	cmds->body.items[SVGA_VIDEO_DST_X].value       = arg->dst.x;
-	cmds->body.items[SVGA_VIDEO_DST_Y].value       = arg->dst.y;
-	cmds->body.items[SVGA_VIDEO_DST_WIDTH].value   = arg->dst.w;
-	cmds->body.items[SVGA_VIDEO_DST_HEIGHT].value  = arg->dst.h;
-	cmds->body.items[SVGA_VIDEO_PITCH_1].value     = arg->pitch[0];
-	cmds->body.items[SVGA_VIDEO_PITCH_2].value     = arg->pitch[1];
-	cmds->body.items[SVGA_VIDEO_PITCH_3].value     = arg->pitch[2];
-
-	fill_flush(&cmds->flush, arg->stream_id);
+	fill_flush(flush, arg->stream_id);
 
-	vmw_fifo_commit(dev_priv, sizeof(*cmds));
+	vmw_fifo_commit(dev_priv, fifo_size);
 
 	return 0;
 }
@@ -248,6 +218,25 @@ static int vmw_overlay_send_stop(struct vmw_private *dev_priv,
 }
 
 /**
+ * Move a buffer to vram or gmr if @pin is set, else unpin the buffer.
+ *
+ * With the introduction of screen objects buffers could now be
+ * used with GMRs instead of being locked to vram.
+ */
+static int vmw_overlay_move_buffer(struct vmw_private *dev_priv,
+				   struct vmw_dma_buffer *buf,
+				   bool pin, bool inter)
+{
+	if (!pin)
+		return vmw_dmabuf_unpin(dev_priv, buf, inter);
+
+	if (!dev_priv->sou_priv)
+		return vmw_dmabuf_to_vram(dev_priv, buf, true, inter);
+
+	return vmw_dmabuf_to_vram_or_gmr(dev_priv, buf, true, inter);
+}
+
+/**
  * Stop or pause a stream.
  *
  * If the stream is paused the no evict flag is removed from the buffer
@@ -279,8 +268,8 @@ static int vmw_overlay_stop(struct vmw_private *dev_priv,
 			return ret;
 
 		/* We just remove the NO_EVICT flag so no -ENOMEM */
-		ret = vmw_dmabuf_pin_in_vram(dev_priv, stream->buf, false,
-					     interruptible);
+		ret = vmw_overlay_move_buffer(dev_priv, stream->buf, false,
+					      interruptible);
 		if (interruptible && ret == -ERESTARTSYS)
 			return ret;
 		else
@@ -342,7 +331,7 @@ static int vmw_overlay_update_stream(struct vmw_private *dev_priv,
 	/* We don't start the old stream if we are interrupted.
 	 * Might return -ENOMEM if it can't fit the buffer in vram.
 	 */
-	ret = vmw_dmabuf_pin_in_vram(dev_priv, buf, true, interruptible);
+	ret = vmw_overlay_move_buffer(dev_priv, buf, true, interruptible);
 	if (ret)
 		return ret;
 
@@ -351,7 +340,8 @@ static int vmw_overlay_update_stream(struct vmw_private *dev_priv,
 		/* This one needs to happen no matter what. We only remove
 		 * the NO_EVICT flag so this is safe from -ENOMEM.
 		 */
-		BUG_ON(vmw_dmabuf_pin_in_vram(dev_priv, buf, false, false) != 0);
+		BUG_ON(vmw_overlay_move_buffer(dev_priv, buf, false, false)
+		       != 0);
 		return ret;
 	}
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index c1b6ffd4ce7b..86c5e4cceb31 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -39,6 +39,7 @@ struct vmw_user_context {
 struct vmw_user_surface {
 	struct ttm_base_object base;
 	struct vmw_surface srf;
+	uint32_t size;
 };
 
 struct vmw_user_dma_buffer {
@@ -61,6 +62,17 @@ struct vmw_user_stream {
 	struct vmw_stream stream;
 };
 
+struct vmw_surface_offset {
+	uint32_t face;
+	uint32_t mip;
+	uint32_t bo_offset;
+};
+
+
+static uint64_t vmw_user_context_size;
+static uint64_t vmw_user_surface_size;
+static uint64_t vmw_user_stream_size;
+
 static inline struct vmw_dma_buffer *
 vmw_dma_buffer(struct ttm_buffer_object *bo)
 {
@@ -80,13 +92,36 @@ struct vmw_resource *vmw_resource_reference(struct vmw_resource *res)
 	return res;
 }
 
+
+/**
+ * vmw_resource_release_id - release a resource id to the id manager.
+ *
+ * @res: Pointer to the resource.
+ *
+ * Release the resource id to the resource id manager and set it to -1
+ */
+static void vmw_resource_release_id(struct vmw_resource *res)
+{
+	struct vmw_private *dev_priv = res->dev_priv;
+
+	write_lock(&dev_priv->resource_lock);
+	if (res->id != -1)
+		idr_remove(res->idr, res->id);
+	res->id = -1;
+	write_unlock(&dev_priv->resource_lock);
+}
+
 static void vmw_resource_release(struct kref *kref)
 {
 	struct vmw_resource *res =
 	    container_of(kref, struct vmw_resource, kref);
 	struct vmw_private *dev_priv = res->dev_priv;
+	int id = res->id;
+	struct idr *idr = res->idr;
 
-	idr_remove(res->idr, res->id);
+	res->avail = false;
+	if (res->remove_from_lists != NULL)
+		res->remove_from_lists(res);
 	write_unlock(&dev_priv->resource_lock);
 
 	if (likely(res->hw_destroy != NULL))
@@ -98,6 +133,9 @@ static void vmw_resource_release(struct kref *kref)
 		kfree(res);
 
 	write_lock(&dev_priv->resource_lock);
+
+	if (id != -1)
+		idr_remove(idr, id);
 }
 
 void vmw_resource_unreference(struct vmw_resource **p_res)
@@ -111,28 +149,29 @@ void vmw_resource_unreference(struct vmw_resource **p_res)
 	write_unlock(&dev_priv->resource_lock);
 }
 
-static int vmw_resource_init(struct vmw_private *dev_priv,
-			     struct vmw_resource *res,
-			     struct idr *idr,
-			     enum ttm_object_type obj_type,
-			     void (*res_free) (struct vmw_resource *res))
+
+/**
+ * vmw_resource_alloc_id - release a resource id to the id manager.
+ *
+ * @dev_priv: Pointer to the device private structure.
+ * @res: Pointer to the resource.
+ *
+ * Allocate the lowest free resource from the resource manager, and set
+ * @res->id to that id. Returns 0 on success and -ENOMEM on failure.
+ */
+static int vmw_resource_alloc_id(struct vmw_private *dev_priv,
+				 struct vmw_resource *res)
 {
 	int ret;
 
-	kref_init(&res->kref);
-	res->hw_destroy = NULL;
-	res->res_free = res_free;
-	res->res_type = obj_type;
-	res->idr = idr;
-	res->avail = false;
-	res->dev_priv = dev_priv;
+	BUG_ON(res->id != -1);
 
 	do {
-		if (unlikely(idr_pre_get(idr, GFP_KERNEL) == 0))
+		if (unlikely(idr_pre_get(res->idr, GFP_KERNEL) == 0))
 			return -ENOMEM;
 
 		write_lock(&dev_priv->resource_lock);
-		ret = idr_get_new_above(idr, res, 1, &res->id);
+		ret = idr_get_new_above(res->idr, res, 1, &res->id);
 		write_unlock(&dev_priv->resource_lock);
 
 	} while (ret == -EAGAIN);
@@ -140,6 +179,33 @@ static int vmw_resource_init(struct vmw_private *dev_priv,
 	return ret;
 }
 
+
+static int vmw_resource_init(struct vmw_private *dev_priv,
+			     struct vmw_resource *res,
+			     struct idr *idr,
+			     enum ttm_object_type obj_type,
+			     bool delay_id,
+			     void (*res_free) (struct vmw_resource *res),
+			     void (*remove_from_lists)
+			     (struct vmw_resource *res))
+{
+	kref_init(&res->kref);
+	res->hw_destroy = NULL;
+	res->res_free = res_free;
+	res->remove_from_lists = remove_from_lists;
+	res->res_type = obj_type;
+	res->idr = idr;
+	res->avail = false;
+	res->dev_priv = dev_priv;
+	INIT_LIST_HEAD(&res->query_head);
+	INIT_LIST_HEAD(&res->validate_head);
+	res->id = -1;
+	if (delay_id)
+		return 0;
+	else
+		return vmw_resource_alloc_id(dev_priv, res);
+}
+
 /**
  * vmw_resource_activate
  *
@@ -194,8 +260,12 @@ static void vmw_hw_context_destroy(struct vmw_resource *res)
 	struct {
 		SVGA3dCmdHeader header;
 		SVGA3dCmdDestroyContext body;
-	} *cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
+	} *cmd;
+
 
+	vmw_execbuf_release_pinned_bo(dev_priv, true, res->id);
+
+	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
 	if (unlikely(cmd == NULL)) {
 		DRM_ERROR("Failed reserving FIFO space for surface "
 			  "destruction.\n");
@@ -222,14 +292,17 @@ static int vmw_context_init(struct vmw_private *dev_priv,
 	} *cmd;
 
 	ret = vmw_resource_init(dev_priv, res, &dev_priv->context_idr,
-				VMW_RES_CONTEXT, res_free);
+				VMW_RES_CONTEXT, false, res_free, NULL);
 
 	if (unlikely(ret != 0)) {
-		if (res_free == NULL)
-			kfree(res);
-		else
-			res_free(res);
-		return ret;
+		DRM_ERROR("Failed to allocate a resource id.\n");
+		goto out_early;
+	}
+
+	if (unlikely(res->id >= SVGA3D_MAX_CONTEXT_IDS)) {
+		DRM_ERROR("Out of hw context ids.\n");
+		vmw_resource_unreference(&res);
+		return -ENOMEM;
 	}
 
 	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
@@ -247,6 +320,13 @@ static int vmw_context_init(struct vmw_private *dev_priv,
 	(void) vmw_3d_resource_inc(dev_priv, false);
 	vmw_resource_activate(res, vmw_hw_context_destroy);
 	return 0;
+
+out_early:
+	if (res_free == NULL)
+		kfree(res);
+	else
+		res_free(res);
+	return ret;
 }
 
 struct vmw_resource *vmw_context_alloc(struct vmw_private *dev_priv)
@@ -269,8 +349,11 @@ static void vmw_user_context_free(struct vmw_resource *res)
 {
 	struct vmw_user_context *ctx =
 	    container_of(res, struct vmw_user_context, res);
+	struct vmw_private *dev_priv = res->dev_priv;
 
 	kfree(ctx);
+	ttm_mem_global_free(vmw_mem_glob(dev_priv),
+			    vmw_user_context_size);
 }
 
 /**
@@ -324,23 +407,56 @@ int vmw_context_define_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_priv)
 {
 	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct vmw_user_context *ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+	struct vmw_user_context *ctx;
 	struct vmw_resource *res;
 	struct vmw_resource *tmp;
 	struct drm_vmw_context_arg *arg = (struct drm_vmw_context_arg *)data;
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
 	int ret;
 
-	if (unlikely(ctx == NULL))
-		return -ENOMEM;
+
+	/*
+	 * Approximate idr memory usage with 128 bytes. It will be limited
+	 * by maximum number_of contexts anyway.
+	 */
+
+	if (unlikely(vmw_user_context_size == 0))
+		vmw_user_context_size = ttm_round_pot(sizeof(*ctx)) + 128;
+
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
+				   vmw_user_context_size,
+				   false, true);
+	if (unlikely(ret != 0)) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("Out of graphics memory for context"
+				  " creation.\n");
+		goto out_unlock;
+	}
+
+	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+	if (unlikely(ctx == NULL)) {
+		ttm_mem_global_free(vmw_mem_glob(dev_priv),
+				    vmw_user_context_size);
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
 
 	res = &ctx->res;
 	ctx->base.shareable = false;
 	ctx->base.tfile = NULL;
 
+	/*
+	 * From here on, the destructor takes over resource freeing.
+	 */
+
 	ret = vmw_context_init(dev_priv, res, vmw_user_context_free);
 	if (unlikely(ret != 0))
-		return ret;
+		goto out_unlock;
 
 	tmp = vmw_resource_reference(&ctx->res);
 	ret = ttm_base_object_init(tfile, &ctx->base, false, VMW_RES_CONTEXT,
@@ -354,6 +470,8 @@ int vmw_context_define_ioctl(struct drm_device *dev, void *data,
 	arg->cid = res->id;
 out_err:
 	vmw_resource_unreference(&res);
+out_unlock:
+	ttm_read_unlock(&vmaster->lock);
 	return ret;
 
 }
@@ -382,31 +500,285 @@ int vmw_context_check(struct vmw_private *dev_priv,
 	return ret;
 }
 
+struct vmw_bpp {
+	uint8_t bpp;
+	uint8_t s_bpp;
+};
+
+/*
+ * Size table for the supported SVGA3D surface formats. It consists of
+ * two values. The bpp value and the s_bpp value which is short for
+ * "stride bits per pixel" The values are given in such a way that the
+ * minimum stride for the image is calculated using
+ *
+ * min_stride = w*s_bpp
+ *
+ * and the total memory requirement for the image is
+ *
+ * h*min_stride*bpp/s_bpp
+ *
+ */
+static const struct vmw_bpp vmw_sf_bpp[] = {
+	[SVGA3D_FORMAT_INVALID] = {0, 0},
+	[SVGA3D_X8R8G8B8] = {32, 32},
+	[SVGA3D_A8R8G8B8] = {32, 32},
+	[SVGA3D_R5G6B5] = {16, 16},
+	[SVGA3D_X1R5G5B5] = {16, 16},
+	[SVGA3D_A1R5G5B5] = {16, 16},
+	[SVGA3D_A4R4G4B4] = {16, 16},
+	[SVGA3D_Z_D32] = {32, 32},
+	[SVGA3D_Z_D16] = {16, 16},
+	[SVGA3D_Z_D24S8] = {32, 32},
+	[SVGA3D_Z_D15S1] = {16, 16},
+	[SVGA3D_LUMINANCE8] = {8, 8},
+	[SVGA3D_LUMINANCE4_ALPHA4] = {8, 8},
+	[SVGA3D_LUMINANCE16] = {16, 16},
+	[SVGA3D_LUMINANCE8_ALPHA8] = {16, 16},
+	[SVGA3D_DXT1] = {4, 16},
+	[SVGA3D_DXT2] = {8, 32},
+	[SVGA3D_DXT3] = {8, 32},
+	[SVGA3D_DXT4] = {8, 32},
+	[SVGA3D_DXT5] = {8, 32},
+	[SVGA3D_BUMPU8V8] = {16, 16},
+	[SVGA3D_BUMPL6V5U5] = {16, 16},
+	[SVGA3D_BUMPX8L8V8U8] = {32, 32},
+	[SVGA3D_ARGB_S10E5] = {16, 16},
+	[SVGA3D_ARGB_S23E8] = {32, 32},
+	[SVGA3D_A2R10G10B10] = {32, 32},
+	[SVGA3D_V8U8] = {16, 16},
+	[SVGA3D_Q8W8V8U8] = {32, 32},
+	[SVGA3D_CxV8U8] = {16, 16},
+	[SVGA3D_X8L8V8U8] = {32, 32},
+	[SVGA3D_A2W10V10U10] = {32, 32},
+	[SVGA3D_ALPHA8] = {8, 8},
+	[SVGA3D_R_S10E5] = {16, 16},
+	[SVGA3D_R_S23E8] = {32, 32},
+	[SVGA3D_RG_S10E5] = {16, 16},
+	[SVGA3D_RG_S23E8] = {32, 32},
+	[SVGA3D_BUFFER] = {8, 8},
+	[SVGA3D_Z_D24X8] = {32, 32},
+	[SVGA3D_V16U16] = {32, 32},
+	[SVGA3D_G16R16] = {32, 32},
+	[SVGA3D_A16B16G16R16] = {64,  64},
+	[SVGA3D_UYVY] = {12, 12},
+	[SVGA3D_YUY2] = {12, 12},
+	[SVGA3D_NV12] = {12, 8},
+	[SVGA3D_AYUV] = {32, 32},
+	[SVGA3D_BC4_UNORM] = {4,  16},
+	[SVGA3D_BC5_UNORM] = {8,  32},
+	[SVGA3D_Z_DF16] = {16,  16},
+	[SVGA3D_Z_DF24] = {24,  24},
+	[SVGA3D_Z_D24S8_INT] = {32,  32}
+};
+
 
 /**
  * Surface management.
  */
 
+struct vmw_surface_dma {
+	SVGA3dCmdHeader header;
+	SVGA3dCmdSurfaceDMA body;
+	SVGA3dCopyBox cb;
+	SVGA3dCmdSurfaceDMASuffix suffix;
+};
+
+struct vmw_surface_define {
+	SVGA3dCmdHeader header;
+	SVGA3dCmdDefineSurface body;
+};
+
+struct vmw_surface_destroy {
+	SVGA3dCmdHeader header;
+	SVGA3dCmdDestroySurface body;
+};
+
+
+/**
+ * vmw_surface_dma_size - Compute fifo size for a dma command.
+ *
+ * @srf: Pointer to a struct vmw_surface
+ *
+ * Computes the required size for a surface dma command for backup or
+ * restoration of the surface represented by @srf.
+ */
+static inline uint32_t vmw_surface_dma_size(const struct vmw_surface *srf)
+{
+	return srf->num_sizes * sizeof(struct vmw_surface_dma);
+}
+
+
+/**
+ * vmw_surface_define_size - Compute fifo size for a surface define command.
+ *
+ * @srf: Pointer to a struct vmw_surface
+ *
+ * Computes the required size for a surface define command for the definition
+ * of the surface represented by @srf.
+ */
+static inline uint32_t vmw_surface_define_size(const struct vmw_surface *srf)
+{
+	return sizeof(struct vmw_surface_define) + srf->num_sizes *
+		sizeof(SVGA3dSize);
+}
+
+
+/**
+ * vmw_surface_destroy_size - Compute fifo size for a surface destroy command.
+ *
+ * Computes the required size for a surface destroy command for the destruction
+ * of a hw surface.
+ */
+static inline uint32_t vmw_surface_destroy_size(void)
+{
+	return sizeof(struct vmw_surface_destroy);
+}
+
+/**
+ * vmw_surface_destroy_encode - Encode a surface_destroy command.
+ *
+ * @id: The surface id
+ * @cmd_space: Pointer to memory area in which the commands should be encoded.
+ */
+static void vmw_surface_destroy_encode(uint32_t id,
+				       void *cmd_space)
+{
+	struct vmw_surface_destroy *cmd = (struct vmw_surface_destroy *)
+		cmd_space;
+
+	cmd->header.id = SVGA_3D_CMD_SURFACE_DESTROY;
+	cmd->header.size = sizeof(cmd->body);
+	cmd->body.sid = id;
+}
+
+/**
+ * vmw_surface_define_encode - Encode a surface_define command.
+ *
+ * @srf: Pointer to a struct vmw_surface object.
+ * @cmd_space: Pointer to memory area in which the commands should be encoded.
+ */
+static void vmw_surface_define_encode(const struct vmw_surface *srf,
+				      void *cmd_space)
+{
+	struct vmw_surface_define *cmd = (struct vmw_surface_define *)
+		cmd_space;
+	struct drm_vmw_size *src_size;
+	SVGA3dSize *cmd_size;
+	uint32_t cmd_len;
+	int i;
+
+	cmd_len = sizeof(cmd->body) + srf->num_sizes * sizeof(SVGA3dSize);
+
+	cmd->header.id = SVGA_3D_CMD_SURFACE_DEFINE;
+	cmd->header.size = cmd_len;
+	cmd->body.sid = srf->res.id;
+	cmd->body.surfaceFlags = srf->flags;
+	cmd->body.format = cpu_to_le32(srf->format);
+	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
+		cmd->body.face[i].numMipLevels = srf->mip_levels[i];
+
+	cmd += 1;
+	cmd_size = (SVGA3dSize *) cmd;
+	src_size = srf->sizes;
+
+	for (i = 0; i < srf->num_sizes; ++i, cmd_size++, src_size++) {
+		cmd_size->width = src_size->width;
+		cmd_size->height = src_size->height;
+		cmd_size->depth = src_size->depth;
+	}
+}
+
+
+/**
+ * vmw_surface_dma_encode - Encode a surface_dma command.
+ *
+ * @srf: Pointer to a struct vmw_surface object.
+ * @cmd_space: Pointer to memory area in which the commands should be encoded.
+ * @ptr: Pointer to an SVGAGuestPtr indicating where the surface contents
+ * should be placed or read from.
+ * @to_surface: Boolean whether to DMA to the surface or from the surface.
+ */
+static void vmw_surface_dma_encode(struct vmw_surface *srf,
+				   void *cmd_space,
+				   const SVGAGuestPtr *ptr,
+				   bool to_surface)
+{
+	uint32_t i;
+	uint32_t bpp = vmw_sf_bpp[srf->format].bpp;
+	uint32_t stride_bpp = vmw_sf_bpp[srf->format].s_bpp;
+	struct vmw_surface_dma *cmd = (struct vmw_surface_dma *)cmd_space;
+
+	for (i = 0; i < srf->num_sizes; ++i) {
+		SVGA3dCmdHeader *header = &cmd->header;
+		SVGA3dCmdSurfaceDMA *body = &cmd->body;
+		SVGA3dCopyBox *cb = &cmd->cb;
+		SVGA3dCmdSurfaceDMASuffix *suffix = &cmd->suffix;
+		const struct vmw_surface_offset *cur_offset = &srf->offsets[i];
+		const struct drm_vmw_size *cur_size = &srf->sizes[i];
+
+		header->id = SVGA_3D_CMD_SURFACE_DMA;
+		header->size = sizeof(*body) + sizeof(*cb) + sizeof(*suffix);
+
+		body->guest.ptr = *ptr;
+		body->guest.ptr.offset += cur_offset->bo_offset;
+		body->guest.pitch = (cur_size->width * stride_bpp + 7) >> 3;
+		body->host.sid = srf->res.id;
+		body->host.face = cur_offset->face;
+		body->host.mipmap = cur_offset->mip;
+		body->transfer = ((to_surface) ?  SVGA3D_WRITE_HOST_VRAM :
+				  SVGA3D_READ_HOST_VRAM);
+		cb->x = 0;
+		cb->y = 0;
+		cb->z = 0;
+		cb->srcx = 0;
+		cb->srcy = 0;
+		cb->srcz = 0;
+		cb->w = cur_size->width;
+		cb->h = cur_size->height;
+		cb->d = cur_size->depth;
+
+		suffix->suffixSize = sizeof(*suffix);
+		suffix->maximumOffset = body->guest.pitch*cur_size->height*
+			cur_size->depth*bpp / stride_bpp;
+		suffix->flags.discard = 0;
+		suffix->flags.unsynchronized = 0;
+		suffix->flags.reserved = 0;
+		++cmd;
+	}
+};
+
+
 static void vmw_hw_surface_destroy(struct vmw_resource *res)
 {
 
 	struct vmw_private *dev_priv = res->dev_priv;
-	struct {
-		SVGA3dCmdHeader header;
-		SVGA3dCmdDestroySurface body;
-	} *cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
+	struct vmw_surface *srf;
+	void *cmd;
 
-	if (unlikely(cmd == NULL)) {
-		DRM_ERROR("Failed reserving FIFO space for surface "
-			  "destruction.\n");
-		return;
-	}
+	if (res->id != -1) {
 
-	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_SURFACE_DESTROY);
-	cmd->header.size = cpu_to_le32(sizeof(cmd->body));
-	cmd->body.sid = cpu_to_le32(res->id);
+		cmd = vmw_fifo_reserve(dev_priv, vmw_surface_destroy_size());
+		if (unlikely(cmd == NULL)) {
+			DRM_ERROR("Failed reserving FIFO space for surface "
+				  "destruction.\n");
+			return;
+		}
 
-	vmw_fifo_commit(dev_priv, sizeof(*cmd));
+		vmw_surface_destroy_encode(res->id, cmd);
+		vmw_fifo_commit(dev_priv, vmw_surface_destroy_size());
+
+		/*
+		 * used_memory_size_atomic, or separate lock
+		 * to avoid taking dev_priv::cmdbuf_mutex in
+		 * the destroy path.
+		 */
+
+		mutex_lock(&dev_priv->cmdbuf_mutex);
+		srf = container_of(res, struct vmw_surface, res);
+		dev_priv->used_memory_size -= srf->backup_size;
+		mutex_unlock(&dev_priv->cmdbuf_mutex);
+
+	}
 	vmw_3d_resource_dec(dev_priv, false);
 }
 
@@ -414,70 +786,352 @@ void vmw_surface_res_free(struct vmw_resource *res)
 {
 	struct vmw_surface *srf = container_of(res, struct vmw_surface, res);
 
+	if (srf->backup)
+		ttm_bo_unref(&srf->backup);
+	kfree(srf->offsets);
 	kfree(srf->sizes);
 	kfree(srf->snooper.image);
 	kfree(srf);
 }
 
-int vmw_surface_init(struct vmw_private *dev_priv,
-		     struct vmw_surface *srf,
-		     void (*res_free) (struct vmw_resource *res))
+
+/**
+ * vmw_surface_do_validate - make a surface available to the device.
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @srf: Pointer to a struct vmw_surface.
+ *
+ * If the surface doesn't have a hw id, allocate one, and optionally
+ * DMA the backed up surface contents to the device.
+ *
+ * Returns -EBUSY if there wasn't sufficient device resources to
+ * complete the validation. Retry after freeing up resources.
+ *
+ * May return other errors if the kernel is out of guest resources.
+ */
+int vmw_surface_do_validate(struct vmw_private *dev_priv,
+			    struct vmw_surface *srf)
 {
-	int ret;
-	struct {
-		SVGA3dCmdHeader header;
-		SVGA3dCmdDefineSurface body;
-	} *cmd;
-	SVGA3dSize *cmd_size;
 	struct vmw_resource *res = &srf->res;
-	struct drm_vmw_size *src_size;
-	size_t submit_size;
-	uint32_t cmd_len;
-	int i;
+	struct list_head val_list;
+	struct ttm_validate_buffer val_buf;
+	uint32_t submit_size;
+	uint8_t *cmd;
+	int ret;
 
-	BUG_ON(res_free == NULL);
-	ret = vmw_resource_init(dev_priv, res, &dev_priv->surface_idr,
-				VMW_RES_SURFACE, res_free);
+	if (likely(res->id != -1))
+		return 0;
+
+	if (unlikely(dev_priv->used_memory_size + srf->backup_size >=
+		     dev_priv->memory_size))
+		return -EBUSY;
+
+	/*
+	 * Reserve- and validate the backup DMA bo.
+	 */
+
+	if (srf->backup) {
+		INIT_LIST_HEAD(&val_list);
+		val_buf.bo = ttm_bo_reference(srf->backup);
+		val_buf.new_sync_obj_arg = (void *)((unsigned long)
+						    DRM_VMW_FENCE_FLAG_EXEC);
+		list_add_tail(&val_buf.head, &val_list);
+		ret = ttm_eu_reserve_buffers(&val_list);
+		if (unlikely(ret != 0))
+			goto out_no_reserve;
+
+		ret = ttm_bo_validate(srf->backup, &vmw_srf_placement,
+				      true, false, false);
+		if (unlikely(ret != 0))
+			goto out_no_validate;
+	}
+
+	/*
+	 * Alloc id for the resource.
+	 */
 
+	ret = vmw_resource_alloc_id(dev_priv, res);
 	if (unlikely(ret != 0)) {
-		res_free(res);
-		return ret;
+		DRM_ERROR("Failed to allocate a surface id.\n");
+		goto out_no_id;
+	}
+	if (unlikely(res->id >= SVGA3D_MAX_SURFACE_IDS)) {
+		ret = -EBUSY;
+		goto out_no_fifo;
 	}
 
-	submit_size = sizeof(*cmd) + srf->num_sizes * sizeof(SVGA3dSize);
-	cmd_len = sizeof(cmd->body) + srf->num_sizes * sizeof(SVGA3dSize);
+
+	/*
+	 * Encode surface define- and dma commands.
+	 */
+
+	submit_size = vmw_surface_define_size(srf);
+	if (srf->backup)
+		submit_size += vmw_surface_dma_size(srf);
 
 	cmd = vmw_fifo_reserve(dev_priv, submit_size);
 	if (unlikely(cmd == NULL)) {
-		DRM_ERROR("Fifo reserve failed for create surface.\n");
-		vmw_resource_unreference(&res);
-		return -ENOMEM;
+		DRM_ERROR("Failed reserving FIFO space for surface "
+			  "validation.\n");
+		ret = -ENOMEM;
+		goto out_no_fifo;
 	}
 
-	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_SURFACE_DEFINE);
-	cmd->header.size = cpu_to_le32(cmd_len);
-	cmd->body.sid = cpu_to_le32(res->id);
-	cmd->body.surfaceFlags = cpu_to_le32(srf->flags);
-	cmd->body.format = cpu_to_le32(srf->format);
-	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i) {
-		cmd->body.face[i].numMipLevels =
-		    cpu_to_le32(srf->mip_levels[i]);
+	vmw_surface_define_encode(srf, cmd);
+	if (srf->backup) {
+		SVGAGuestPtr ptr;
+
+		cmd += vmw_surface_define_size(srf);
+		vmw_bo_get_guest_ptr(srf->backup, &ptr);
+		vmw_surface_dma_encode(srf, cmd, &ptr, true);
 	}
 
-	cmd += 1;
-	cmd_size = (SVGA3dSize *) cmd;
-	src_size = srf->sizes;
+	vmw_fifo_commit(dev_priv, submit_size);
 
-	for (i = 0; i < srf->num_sizes; ++i, cmd_size++, src_size++) {
-		cmd_size->width = cpu_to_le32(src_size->width);
-		cmd_size->height = cpu_to_le32(src_size->height);
-		cmd_size->depth = cpu_to_le32(src_size->depth);
+	/*
+	 * Create a fence object and fence the backup buffer.
+	 */
+
+	if (srf->backup) {
+		struct vmw_fence_obj *fence;
+
+		(void) vmw_execbuf_fence_commands(NULL, dev_priv,
+						  &fence, NULL);
+		ttm_eu_fence_buffer_objects(&val_list, fence);
+		if (likely(fence != NULL))
+			vmw_fence_obj_unreference(&fence);
+		ttm_bo_unref(&val_buf.bo);
+		ttm_bo_unref(&srf->backup);
+	}
+
+	/*
+	 * Surface memory usage accounting.
+	 */
+
+	dev_priv->used_memory_size += srf->backup_size;
+
+	return 0;
+
+out_no_fifo:
+	vmw_resource_release_id(res);
+out_no_id:
+out_no_validate:
+	if (srf->backup)
+		ttm_eu_backoff_reservation(&val_list);
+out_no_reserve:
+	if (srf->backup)
+		ttm_bo_unref(&val_buf.bo);
+	return ret;
+}
+
+/**
+ * vmw_surface_evict - Evict a hw surface.
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @srf: Pointer to a struct vmw_surface
+ *
+ * DMA the contents of a hw surface to a backup guest buffer object,
+ * and destroy the hw surface, releasing its id.
+ */
+int vmw_surface_evict(struct vmw_private *dev_priv,
+		      struct vmw_surface *srf)
+{
+	struct vmw_resource *res = &srf->res;
+	struct list_head val_list;
+	struct ttm_validate_buffer val_buf;
+	uint32_t submit_size;
+	uint8_t *cmd;
+	int ret;
+	struct vmw_fence_obj *fence;
+	SVGAGuestPtr ptr;
+
+	BUG_ON(res->id == -1);
+
+	/*
+	 * Create a surface backup buffer object.
+	 */
+
+	if (!srf->backup) {
+		ret = ttm_bo_create(&dev_priv->bdev, srf->backup_size,
+				    ttm_bo_type_device,
+				    &vmw_srf_placement, 0, 0, true,
+				    NULL, &srf->backup);
+		if (unlikely(ret != 0))
+			return ret;
+	}
+
+	/*
+	 * Reserve- and validate the backup DMA bo.
+	 */
+
+	INIT_LIST_HEAD(&val_list);
+	val_buf.bo = ttm_bo_reference(srf->backup);
+	val_buf.new_sync_obj_arg = (void *)(unsigned long)
+		DRM_VMW_FENCE_FLAG_EXEC;
+	list_add_tail(&val_buf.head, &val_list);
+	ret = ttm_eu_reserve_buffers(&val_list);
+	if (unlikely(ret != 0))
+		goto out_no_reserve;
+
+	ret = ttm_bo_validate(srf->backup, &vmw_srf_placement,
+			      true, false, false);
+	if (unlikely(ret != 0))
+		goto out_no_validate;
+
+
+	/*
+	 * Encode the dma- and surface destroy commands.
+	 */
+
+	submit_size = vmw_surface_dma_size(srf) + vmw_surface_destroy_size();
+	cmd = vmw_fifo_reserve(dev_priv, submit_size);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Failed reserving FIFO space for surface "
+			  "eviction.\n");
+		ret = -ENOMEM;
+		goto out_no_fifo;
 	}
 
+	vmw_bo_get_guest_ptr(srf->backup, &ptr);
+	vmw_surface_dma_encode(srf, cmd, &ptr, false);
+	cmd += vmw_surface_dma_size(srf);
+	vmw_surface_destroy_encode(res->id, cmd);
 	vmw_fifo_commit(dev_priv, submit_size);
+
+	/*
+	 * Surface memory usage accounting.
+	 */
+
+	dev_priv->used_memory_size -= srf->backup_size;
+
+	/*
+	 * Create a fence object and fence the DMA buffer.
+	 */
+
+	(void) vmw_execbuf_fence_commands(NULL, dev_priv,
+					  &fence, NULL);
+	ttm_eu_fence_buffer_objects(&val_list, fence);
+	if (likely(fence != NULL))
+		vmw_fence_obj_unreference(&fence);
+	ttm_bo_unref(&val_buf.bo);
+
+	/*
+	 * Release the surface ID.
+	 */
+
+	vmw_resource_release_id(res);
+
+	return 0;
+
+out_no_fifo:
+out_no_validate:
+	if (srf->backup)
+		ttm_eu_backoff_reservation(&val_list);
+out_no_reserve:
+	ttm_bo_unref(&val_buf.bo);
+	ttm_bo_unref(&srf->backup);
+	return ret;
+}
+
+
+/**
+ * vmw_surface_validate - make a surface available to the device, evicting
+ * other surfaces if needed.
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @srf: Pointer to a struct vmw_surface.
+ *
+ * Try to validate a surface and if it fails due to limited device resources,
+ * repeatedly try to evict other surfaces until the request can be
+ * acommodated.
+ *
+ * May return errors if out of resources.
+ */
+int vmw_surface_validate(struct vmw_private *dev_priv,
+			 struct vmw_surface *srf)
+{
+	int ret;
+	struct vmw_surface *evict_srf;
+
+	do {
+		write_lock(&dev_priv->resource_lock);
+		list_del_init(&srf->lru_head);
+		write_unlock(&dev_priv->resource_lock);
+
+		ret = vmw_surface_do_validate(dev_priv, srf);
+		if (likely(ret != -EBUSY))
+			break;
+
+		write_lock(&dev_priv->resource_lock);
+		if (list_empty(&dev_priv->surface_lru)) {
+			DRM_ERROR("Out of device memory for surfaces.\n");
+			ret = -EBUSY;
+			write_unlock(&dev_priv->resource_lock);
+			break;
+		}
+
+		evict_srf = vmw_surface_reference
+			(list_first_entry(&dev_priv->surface_lru,
+					  struct vmw_surface,
+					  lru_head));
+		list_del_init(&evict_srf->lru_head);
+
+		write_unlock(&dev_priv->resource_lock);
+		(void) vmw_surface_evict(dev_priv, evict_srf);
+
+		vmw_surface_unreference(&evict_srf);
+
+	} while (1);
+
+	if (unlikely(ret != 0 && srf->res.id != -1)) {
+		write_lock(&dev_priv->resource_lock);
+		list_add_tail(&srf->lru_head, &dev_priv->surface_lru);
+		write_unlock(&dev_priv->resource_lock);
+	}
+
+	return ret;
+}
+
+
+/**
+ * vmw_surface_remove_from_lists - Remove surface resources from lookup lists
+ *
+ * @res: Pointer to a struct vmw_resource embedded in a struct vmw_surface
+ *
+ * As part of the resource destruction, remove the surface from any
+ * lookup lists.
+ */
+static void vmw_surface_remove_from_lists(struct vmw_resource *res)
+{
+	struct vmw_surface *srf = container_of(res, struct vmw_surface, res);
+
+	list_del_init(&srf->lru_head);
+}
+
+int vmw_surface_init(struct vmw_private *dev_priv,
+		     struct vmw_surface *srf,
+		     void (*res_free) (struct vmw_resource *res))
+{
+	int ret;
+	struct vmw_resource *res = &srf->res;
+
+	BUG_ON(res_free == NULL);
+	INIT_LIST_HEAD(&srf->lru_head);
+	ret = vmw_resource_init(dev_priv, res, &dev_priv->surface_idr,
+				VMW_RES_SURFACE, true, res_free,
+				vmw_surface_remove_from_lists);
+
+	if (unlikely(ret != 0))
+		res_free(res);
+
+	/*
+	 * The surface won't be visible to hardware until a
+	 * surface validate.
+	 */
+
 	(void) vmw_3d_resource_inc(dev_priv, false);
 	vmw_resource_activate(res, vmw_hw_surface_destroy);
-	return 0;
+	return ret;
 }
 
 static void vmw_user_surface_free(struct vmw_resource *res)
@@ -485,12 +1139,58 @@ static void vmw_user_surface_free(struct vmw_resource *res)
 	struct vmw_surface *srf = container_of(res, struct vmw_surface, res);
 	struct vmw_user_surface *user_srf =
 	    container_of(srf, struct vmw_user_surface, srf);
+	struct vmw_private *dev_priv = srf->res.dev_priv;
+	uint32_t size = user_srf->size;
 
+	if (srf->backup)
+		ttm_bo_unref(&srf->backup);
+	kfree(srf->offsets);
 	kfree(srf->sizes);
 	kfree(srf->snooper.image);
 	kfree(user_srf);
+	ttm_mem_global_free(vmw_mem_glob(dev_priv), size);
+}
+
+/**
+ * vmw_resource_unreserve - unreserve resources previously reserved for
+ * command submission.
+ *
+ * @list_head: list of resources to unreserve.
+ *
+ * Currently only surfaces are considered, and unreserving a surface
+ * means putting it back on the device's surface lru list,
+ * so that it can be evicted if necessary.
+ * This function traverses the resource list and
+ * checks whether resources are surfaces, and in that case puts them back
+ * on the device's surface LRU list.
+ */
+void vmw_resource_unreserve(struct list_head *list)
+{
+	struct vmw_resource *res;
+	struct vmw_surface *srf;
+	rwlock_t *lock = NULL;
+
+	list_for_each_entry(res, list, validate_head) {
+
+		if (res->res_free != &vmw_surface_res_free &&
+		    res->res_free != &vmw_user_surface_free)
+			continue;
+
+		if (unlikely(lock == NULL)) {
+			lock = &res->dev_priv->resource_lock;
+			write_lock(lock);
+		}
+
+		srf = container_of(res, struct vmw_surface, res);
+		list_del_init(&srf->lru_head);
+		list_add_tail(&srf->lru_head, &res->dev_priv->surface_lru);
+	}
+
+	if (lock != NULL)
+		write_unlock(lock);
 }
 
+
 int vmw_user_surface_lookup_handle(struct vmw_private *dev_priv,
 				   struct ttm_object_file *tfile,
 				   uint32_t handle, struct vmw_surface **out)
@@ -555,8 +1255,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_priv)
 {
 	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct vmw_user_surface *user_srf =
-	    kmalloc(sizeof(*user_srf), GFP_KERNEL);
+	struct vmw_user_surface *user_srf;
 	struct vmw_surface *srf;
 	struct vmw_resource *res;
 	struct vmw_resource *tmp;
@@ -567,10 +1266,51 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
 	struct drm_vmw_size __user *user_sizes;
 	int ret;
-	int i;
+	int i, j;
+	uint32_t cur_bo_offset;
+	struct drm_vmw_size *cur_size;
+	struct vmw_surface_offset *cur_offset;
+	uint32_t stride_bpp;
+	uint32_t bpp;
+	uint32_t num_sizes;
+	uint32_t size;
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
 
-	if (unlikely(user_srf == NULL))
-		return -ENOMEM;
+	if (unlikely(vmw_user_surface_size == 0))
+		vmw_user_surface_size = ttm_round_pot(sizeof(*user_srf)) +
+			128;
+
+	num_sizes = 0;
+	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
+		num_sizes += req->mip_levels[i];
+
+	if (num_sizes > DRM_VMW_MAX_SURFACE_FACES *
+	    DRM_VMW_MAX_MIP_LEVELS)
+		return -EINVAL;
+
+	size = vmw_user_surface_size + 128 +
+		ttm_round_pot(num_sizes * sizeof(struct drm_vmw_size)) +
+		ttm_round_pot(num_sizes * sizeof(struct vmw_surface_offset));
+
+
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
+				   size, false, true);
+	if (unlikely(ret != 0)) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("Out of graphics memory for surface"
+				  " creation.\n");
+		goto out_unlock;
+	}
+
+	user_srf = kmalloc(sizeof(*user_srf), GFP_KERNEL);
+	if (unlikely(user_srf == NULL)) {
+		ret = -ENOMEM;
+		goto out_no_user_srf;
+	}
 
 	srf = &user_srf->srf;
 	res = &srf->res;
@@ -578,21 +1318,22 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 	srf->flags = req->flags;
 	srf->format = req->format;
 	srf->scanout = req->scanout;
-	memcpy(srf->mip_levels, req->mip_levels, sizeof(srf->mip_levels));
-	srf->num_sizes = 0;
-	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
-		srf->num_sizes += srf->mip_levels[i];
+	srf->backup = NULL;
 
-	if (srf->num_sizes > DRM_VMW_MAX_SURFACE_FACES *
-	    DRM_VMW_MAX_MIP_LEVELS) {
-		ret = -EINVAL;
-		goto out_err0;
-	}
+	memcpy(srf->mip_levels, req->mip_levels, sizeof(srf->mip_levels));
+	srf->num_sizes = num_sizes;
+	user_srf->size = size;
 
 	srf->sizes = kmalloc(srf->num_sizes * sizeof(*srf->sizes), GFP_KERNEL);
 	if (unlikely(srf->sizes == NULL)) {
 		ret = -ENOMEM;
-		goto out_err0;
+		goto out_no_sizes;
+	}
+	srf->offsets = kmalloc(srf->num_sizes * sizeof(*srf->offsets),
+			       GFP_KERNEL);
+	if (unlikely(srf->sizes == NULL)) {
+		ret = -ENOMEM;
+		goto out_no_offsets;
 	}
 
 	user_sizes = (struct drm_vmw_size __user *)(unsigned long)
@@ -602,9 +1343,32 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 			     srf->num_sizes * sizeof(*srf->sizes));
 	if (unlikely(ret != 0)) {
 		ret = -EFAULT;
-		goto out_err1;
+		goto out_no_copy;
 	}
 
+	cur_bo_offset = 0;
+	cur_offset = srf->offsets;
+	cur_size = srf->sizes;
+
+	bpp = vmw_sf_bpp[srf->format].bpp;
+	stride_bpp = vmw_sf_bpp[srf->format].s_bpp;
+
+	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i) {
+		for (j = 0; j < srf->mip_levels[i]; ++j) {
+			uint32_t stride =
+				(cur_size->width * stride_bpp + 7) >> 3;
+
+			cur_offset->face = i;
+			cur_offset->mip = j;
+			cur_offset->bo_offset = cur_bo_offset;
+			cur_bo_offset += stride * cur_size->height *
+				cur_size->depth * bpp / stride_bpp;
+			++cur_offset;
+			++cur_size;
+		}
+	}
+	srf->backup_size = cur_bo_offset;
+
 	if (srf->scanout &&
 	    srf->num_sizes == 1 &&
 	    srf->sizes[0].width == 64 &&
@@ -616,7 +1380,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 		if (!srf->snooper.image) {
 			DRM_ERROR("Failed to allocate cursor_image\n");
 			ret = -ENOMEM;
-			goto out_err1;
+			goto out_no_copy;
 		}
 	} else {
 		srf->snooper.image = NULL;
@@ -633,7 +1397,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 
 	ret = vmw_surface_init(dev_priv, srf, vmw_user_surface_free);
 	if (unlikely(ret != 0))
-		return ret;
+		goto out_unlock;
 
 	tmp = vmw_resource_reference(&srf->res);
 	ret = ttm_base_object_init(tfile, &user_srf->base,
@@ -643,7 +1407,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 	if (unlikely(ret != 0)) {
 		vmw_resource_unreference(&tmp);
 		vmw_resource_unreference(&res);
-		return ret;
+		goto out_unlock;
 	}
 
 	rep->sid = user_srf->base.hash.key;
@@ -651,11 +1415,19 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 		DRM_ERROR("Created bad Surface ID.\n");
 
 	vmw_resource_unreference(&res);
+
+	ttm_read_unlock(&vmaster->lock);
 	return 0;
-out_err1:
+out_no_copy:
+	kfree(srf->offsets);
+out_no_offsets:
 	kfree(srf->sizes);
-out_err0:
+out_no_sizes:
 	kfree(user_srf);
+out_no_user_srf:
+	ttm_mem_global_free(vmw_mem_glob(dev_priv), size);
+out_unlock:
+	ttm_read_unlock(&vmaster->lock);
 	return ret;
 }
 
@@ -969,7 +1741,7 @@ static int vmw_stream_init(struct vmw_private *dev_priv,
 	int ret;
 
 	ret = vmw_resource_init(dev_priv, res, &dev_priv->stream_idr,
-				VMW_RES_STREAM, res_free);
+				VMW_RES_STREAM, false, res_free, NULL);
 
 	if (unlikely(ret != 0)) {
 		if (res_free == NULL)
@@ -999,8 +1771,11 @@ static void vmw_user_stream_free(struct vmw_resource *res)
 {
 	struct vmw_user_stream *stream =
 	    container_of(res, struct vmw_user_stream, stream.res);
+	struct vmw_private *dev_priv = res->dev_priv;
 
 	kfree(stream);
+	ttm_mem_global_free(vmw_mem_glob(dev_priv),
+			    vmw_user_stream_size);
 }
 
 /**
@@ -1054,23 +1829,56 @@ int vmw_stream_claim_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv)
 {
 	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct vmw_user_stream *stream = kmalloc(sizeof(*stream), GFP_KERNEL);
+	struct vmw_user_stream *stream;
 	struct vmw_resource *res;
 	struct vmw_resource *tmp;
 	struct drm_vmw_stream_arg *arg = (struct drm_vmw_stream_arg *)data;
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
 	int ret;
 
-	if (unlikely(stream == NULL))
-		return -ENOMEM;
+	/*
+	 * Approximate idr memory usage with 128 bytes. It will be limited
+	 * by maximum number_of streams anyway?
+	 */
+
+	if (unlikely(vmw_user_stream_size == 0))
+		vmw_user_stream_size = ttm_round_pot(sizeof(*stream)) + 128;
+
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
+				   vmw_user_stream_size,
+				   false, true);
+	if (unlikely(ret != 0)) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("Out of graphics memory for stream"
+				  " creation.\n");
+		goto out_unlock;
+	}
+
+
+	stream = kmalloc(sizeof(*stream), GFP_KERNEL);
+	if (unlikely(stream == NULL)) {
+		ttm_mem_global_free(vmw_mem_glob(dev_priv),
+				    vmw_user_stream_size);
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
 
 	res = &stream->stream.res;
 	stream->base.shareable = false;
 	stream->base.tfile = NULL;
 
+	/*
+	 * From here on, the destructor takes over resource freeing.
+	 */
+
 	ret = vmw_stream_init(dev_priv, &stream->stream, vmw_user_stream_free);
 	if (unlikely(ret != 0))
-		return ret;
+		goto out_unlock;
 
 	tmp = vmw_resource_reference(res);
 	ret = ttm_base_object_init(tfile, &stream->base, false, VMW_RES_STREAM,
@@ -1084,6 +1892,8 @@ int vmw_stream_claim_ioctl(struct drm_device *dev, void *data,
 	arg->stream_id = res->id;
 out_err:
 	vmw_resource_unreference(&res);
+out_unlock:
+	ttm_read_unlock(&vmaster->lock);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
new file mode 100644
index 000000000000..477b2a9eb3c2
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
@@ -0,0 +1,567 @@
+/**************************************************************************
+ *
+ * Copyright © 2011 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "vmwgfx_kms.h"
+
+
+#define vmw_crtc_to_sou(x) \
+	container_of(x, struct vmw_screen_object_unit, base.crtc)
+#define vmw_encoder_to_sou(x) \
+	container_of(x, struct vmw_screen_object_unit, base.encoder)
+#define vmw_connector_to_sou(x) \
+	container_of(x, struct vmw_screen_object_unit, base.connector)
+
+struct vmw_screen_object_display {
+	struct list_head active;
+
+	unsigned num_active;
+	unsigned last_num_active;
+
+	struct vmw_framebuffer *fb;
+};
+
+/**
+ * Display unit using screen objects.
+ */
+struct vmw_screen_object_unit {
+	struct vmw_display_unit base;
+
+	unsigned long buffer_size; /**< Size of allocated buffer */
+	struct vmw_dma_buffer *buffer; /**< Backing store buffer */
+
+	bool defined;
+
+	struct list_head active;
+};
+
+static void vmw_sou_destroy(struct vmw_screen_object_unit *sou)
+{
+	list_del_init(&sou->active);
+	vmw_display_unit_cleanup(&sou->base);
+	kfree(sou);
+}
+
+
+/*
+ * Screen Object Display Unit CRTC functions
+ */
+
+static void vmw_sou_crtc_destroy(struct drm_crtc *crtc)
+{
+	vmw_sou_destroy(vmw_crtc_to_sou(crtc));
+}
+
+static int vmw_sou_del_active(struct vmw_private *vmw_priv,
+			      struct vmw_screen_object_unit *sou)
+{
+	struct vmw_screen_object_display *ld = vmw_priv->sou_priv;
+	if (list_empty(&sou->active))
+		return 0;
+
+	/* Must init otherwise list_empty(&sou->active) will not work. */
+	list_del_init(&sou->active);
+	if (--(ld->num_active) == 0) {
+		BUG_ON(!ld->fb);
+		if (ld->fb->unpin)
+			ld->fb->unpin(ld->fb);
+		ld->fb = NULL;
+	}
+
+	return 0;
+}
+
+static int vmw_sou_add_active(struct vmw_private *vmw_priv,
+			      struct vmw_screen_object_unit *sou,
+			      struct vmw_framebuffer *vfb)
+{
+	struct vmw_screen_object_display *ld = vmw_priv->sou_priv;
+	struct vmw_screen_object_unit *entry;
+	struct list_head *at;
+
+	BUG_ON(!ld->num_active && ld->fb);
+	if (vfb != ld->fb) {
+		if (ld->fb && ld->fb->unpin)
+			ld->fb->unpin(ld->fb);
+		if (vfb->pin)
+			vfb->pin(vfb);
+		ld->fb = vfb;
+	}
+
+	if (!list_empty(&sou->active))
+		return 0;
+
+	at = &ld->active;
+	list_for_each_entry(entry, &ld->active, active) {
+		if (entry->base.unit > sou->base.unit)
+			break;
+
+		at = &entry->active;
+	}
+
+	list_add(&sou->active, at);
+
+	ld->num_active++;
+
+	return 0;
+}
+
+/**
+ * Send the fifo command to create a screen.
+ */
+static int vmw_sou_fifo_create(struct vmw_private *dev_priv,
+			       struct vmw_screen_object_unit *sou,
+			       uint32_t x, uint32_t y,
+			       struct drm_display_mode *mode)
+{
+	size_t fifo_size;
+
+	struct {
+		struct {
+			uint32_t cmdType;
+		} header;
+		SVGAScreenObject obj;
+	} *cmd;
+
+	BUG_ON(!sou->buffer);
+
+	fifo_size = sizeof(*cmd);
+	cmd = vmw_fifo_reserve(dev_priv, fifo_size);
+	/* The hardware has hung, nothing we can do about it here. */
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Fifo reserve failed.\n");
+		return -ENOMEM;
+	}
+
+	memset(cmd, 0, fifo_size);
+	cmd->header.cmdType = SVGA_CMD_DEFINE_SCREEN;
+	cmd->obj.structSize = sizeof(SVGAScreenObject);
+	cmd->obj.id = sou->base.unit;
+	cmd->obj.flags = SVGA_SCREEN_HAS_ROOT |
+		(sou->base.unit == 0 ? SVGA_SCREEN_IS_PRIMARY : 0);
+	cmd->obj.size.width = mode->hdisplay;
+	cmd->obj.size.height = mode->vdisplay;
+	cmd->obj.root.x = x;
+	cmd->obj.root.y = y;
+
+	/* Ok to assume that buffer is pinned in vram */
+	vmw_bo_get_guest_ptr(&sou->buffer->base, &cmd->obj.backingStore.ptr);
+	cmd->obj.backingStore.pitch = mode->hdisplay * 4;
+
+	vmw_fifo_commit(dev_priv, fifo_size);
+
+	sou->defined = true;
+
+	return 0;
+}
+
+/**
+ * Send the fifo command to destroy a screen.
+ */
+static int vmw_sou_fifo_destroy(struct vmw_private *dev_priv,
+				struct vmw_screen_object_unit *sou)
+{
+	size_t fifo_size;
+	int ret;
+
+	struct {
+		struct {
+			uint32_t cmdType;
+		} header;
+		SVGAFifoCmdDestroyScreen body;
+	} *cmd;
+
+	/* no need to do anything */
+	if (unlikely(!sou->defined))
+		return 0;
+
+	fifo_size = sizeof(*cmd);
+	cmd = vmw_fifo_reserve(dev_priv, fifo_size);
+	/* the hardware has hung, nothing we can do about it here */
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Fifo reserve failed.\n");
+		return -ENOMEM;
+	}
+
+	memset(cmd, 0, fifo_size);
+	cmd->header.cmdType = SVGA_CMD_DESTROY_SCREEN;
+	cmd->body.screenId = sou->base.unit;
+
+	vmw_fifo_commit(dev_priv, fifo_size);
+
+	/* Force sync */
+	ret = vmw_fallback_wait(dev_priv, false, true, 0, false, 3*HZ);
+	if (unlikely(ret != 0))
+		DRM_ERROR("Failed to sync with HW");
+	else
+		sou->defined = false;
+
+	return ret;
+}
+
+/**
+ * Free the backing store.
+ */
+static void vmw_sou_backing_free(struct vmw_private *dev_priv,
+				 struct vmw_screen_object_unit *sou)
+{
+	struct ttm_buffer_object *bo;
+
+	if (unlikely(sou->buffer == NULL))
+		return;
+
+	bo = &sou->buffer->base;
+	ttm_bo_unref(&bo);
+	sou->buffer = NULL;
+	sou->buffer_size = 0;
+}
+
+/**
+ * Allocate the backing store for the buffer.
+ */
+static int vmw_sou_backing_alloc(struct vmw_private *dev_priv,
+				 struct vmw_screen_object_unit *sou,
+				 unsigned long size)
+{
+	int ret;
+
+	if (sou->buffer_size == size)
+		return 0;
+
+	if (sou->buffer)
+		vmw_sou_backing_free(dev_priv, sou);
+
+	sou->buffer = kzalloc(sizeof(*sou->buffer), GFP_KERNEL);
+	if (unlikely(sou->buffer == NULL))
+		return -ENOMEM;
+
+	/* After we have alloced the backing store might not be able to
+	 * resume the overlays, this is preferred to failing to alloc.
+	 */
+	vmw_overlay_pause_all(dev_priv);
+	ret = vmw_dmabuf_init(dev_priv, sou->buffer, size,
+			      &vmw_vram_ne_placement,
+			      false, &vmw_dmabuf_bo_free);
+	vmw_overlay_resume_all(dev_priv);
+
+	if (unlikely(ret != 0))
+		sou->buffer = NULL; /* vmw_dmabuf_init frees on error */
+	else
+		sou->buffer_size = size;
+
+	return ret;
+}
+
+static int vmw_sou_crtc_set_config(struct drm_mode_set *set)
+{
+	struct vmw_private *dev_priv;
+	struct vmw_screen_object_unit *sou;
+	struct drm_connector *connector;
+	struct drm_display_mode *mode;
+	struct drm_encoder *encoder;
+	struct vmw_framebuffer *vfb;
+	struct drm_framebuffer *fb;
+	struct drm_crtc *crtc;
+	int ret = 0;
+
+	if (!set)
+		return -EINVAL;
+
+	if (!set->crtc)
+		return -EINVAL;
+
+	/* get the sou */
+	crtc = set->crtc;
+	sou = vmw_crtc_to_sou(crtc);
+	vfb = set->fb ? vmw_framebuffer_to_vfb(set->fb) : NULL;
+	dev_priv = vmw_priv(crtc->dev);
+
+	if (set->num_connectors > 1) {
+		DRM_ERROR("to many connectors\n");
+		return -EINVAL;
+	}
+
+	if (set->num_connectors == 1 &&
+	    set->connectors[0] != &sou->base.connector) {
+		DRM_ERROR("connector doesn't match %p %p\n",
+			set->connectors[0], &sou->base.connector);
+		return -EINVAL;
+	}
+
+	/* sou only supports one fb active at the time */
+	if (dev_priv->sou_priv->fb && vfb &&
+	    !(dev_priv->sou_priv->num_active == 1 &&
+	      !list_empty(&sou->active)) &&
+	    dev_priv->sou_priv->fb != vfb) {
+		DRM_ERROR("Multiple framebuffers not supported\n");
+		return -EINVAL;
+	}
+
+	/* since they always map one to one these are safe */
+	connector = &sou->base.connector;
+	encoder = &sou->base.encoder;
+
+	/* should we turn the crtc off */
+	if (set->num_connectors == 0 || !set->mode || !set->fb) {
+		ret = vmw_sou_fifo_destroy(dev_priv, sou);
+		/* the hardware has hung don't do anything more */
+		if (unlikely(ret != 0))
+			return ret;
+
+		connector->encoder = NULL;
+		encoder->crtc = NULL;
+		crtc->fb = NULL;
+		crtc->x = 0;
+		crtc->y = 0;
+
+		vmw_sou_del_active(dev_priv, sou);
+
+		vmw_sou_backing_free(dev_priv, sou);
+
+		return 0;
+	}
+
+
+	/* we now know we want to set a mode */
+	mode = set->mode;
+	fb = set->fb;
+
+	if (set->x + mode->hdisplay > fb->width ||
+	    set->y + mode->vdisplay > fb->height) {
+		DRM_ERROR("set outside of framebuffer\n");
+		return -EINVAL;
+	}
+
+	vmw_fb_off(dev_priv);
+
+	if (mode->hdisplay != crtc->mode.hdisplay ||
+	    mode->vdisplay != crtc->mode.vdisplay) {
+		/* no need to check if depth is different, because backing
+		 * store depth is forced to 4 by the device.
+		 */
+
+		ret = vmw_sou_fifo_destroy(dev_priv, sou);
+		/* the hardware has hung don't do anything more */
+		if (unlikely(ret != 0))
+			return ret;
+
+		vmw_sou_backing_free(dev_priv, sou);
+	}
+
+	if (!sou->buffer) {
+		/* forced to depth 4 by the device */
+		size_t size = mode->hdisplay * mode->vdisplay * 4;
+		ret = vmw_sou_backing_alloc(dev_priv, sou, size);
+		if (unlikely(ret != 0))
+			return ret;
+	}
+
+	ret = vmw_sou_fifo_create(dev_priv, sou, set->x, set->y, mode);
+	if (unlikely(ret != 0)) {
+		/*
+		 * We are in a bit of a situation here, the hardware has
+		 * hung and we may or may not have a buffer hanging of
+		 * the screen object, best thing to do is not do anything
+		 * if we where defined, if not just turn the crtc of.
+		 * Not what userspace wants but it needs to htfu.
+		 */
+		if (sou->defined)
+			return ret;
+
+		connector->encoder = NULL;
+		encoder->crtc = NULL;
+		crtc->fb = NULL;
+		crtc->x = 0;
+		crtc->y = 0;
+
+		return ret;
+	}
+
+	vmw_sou_add_active(dev_priv, sou, vfb);
+
+	connector->encoder = encoder;
+	encoder->crtc = crtc;
+	crtc->mode = *mode;
+	crtc->fb = fb;
+	crtc->x = set->x;
+	crtc->y = set->y;
+
+	return 0;
+}
+
+static struct drm_crtc_funcs vmw_screen_object_crtc_funcs = {
+	.save = vmw_du_crtc_save,
+	.restore = vmw_du_crtc_restore,
+	.cursor_set = vmw_du_crtc_cursor_set,
+	.cursor_move = vmw_du_crtc_cursor_move,
+	.gamma_set = vmw_du_crtc_gamma_set,
+	.destroy = vmw_sou_crtc_destroy,
+	.set_config = vmw_sou_crtc_set_config,
+};
+
+/*
+ * Screen Object Display Unit encoder functions
+ */
+
+static void vmw_sou_encoder_destroy(struct drm_encoder *encoder)
+{
+	vmw_sou_destroy(vmw_encoder_to_sou(encoder));
+}
+
+static struct drm_encoder_funcs vmw_screen_object_encoder_funcs = {
+	.destroy = vmw_sou_encoder_destroy,
+};
+
+/*
+ * Screen Object Display Unit connector functions
+ */
+
+static void vmw_sou_connector_destroy(struct drm_connector *connector)
+{
+	vmw_sou_destroy(vmw_connector_to_sou(connector));
+}
+
+static struct drm_connector_funcs vmw_legacy_connector_funcs = {
+	.dpms = vmw_du_connector_dpms,
+	.save = vmw_du_connector_save,
+	.restore = vmw_du_connector_restore,
+	.detect = vmw_du_connector_detect,
+	.fill_modes = vmw_du_connector_fill_modes,
+	.set_property = vmw_du_connector_set_property,
+	.destroy = vmw_sou_connector_destroy,
+};
+
+static int vmw_sou_init(struct vmw_private *dev_priv, unsigned unit)
+{
+	struct vmw_screen_object_unit *sou;
+	struct drm_device *dev = dev_priv->dev;
+	struct drm_connector *connector;
+	struct drm_encoder *encoder;
+	struct drm_crtc *crtc;
+
+	sou = kzalloc(sizeof(*sou), GFP_KERNEL);
+	if (!sou)
+		return -ENOMEM;
+
+	sou->base.unit = unit;
+	crtc = &sou->base.crtc;
+	encoder = &sou->base.encoder;
+	connector = &sou->base.connector;
+
+	INIT_LIST_HEAD(&sou->active);
+
+	sou->base.pref_active = (unit == 0);
+	sou->base.pref_width = 800;
+	sou->base.pref_height = 600;
+	sou->base.pref_mode = NULL;
+
+	drm_connector_init(dev, connector, &vmw_legacy_connector_funcs,
+			   DRM_MODE_CONNECTOR_LVDS);
+	connector->status = vmw_du_connector_detect(connector, true);
+
+	drm_encoder_init(dev, encoder, &vmw_screen_object_encoder_funcs,
+			 DRM_MODE_ENCODER_LVDS);
+	drm_mode_connector_attach_encoder(connector, encoder);
+	encoder->possible_crtcs = (1 << unit);
+	encoder->possible_clones = 0;
+
+	drm_crtc_init(dev, crtc, &vmw_screen_object_crtc_funcs);
+
+	drm_mode_crtc_set_gamma_size(crtc, 256);
+
+	drm_connector_attach_property(connector,
+				      dev->mode_config.dirty_info_property,
+				      1);
+
+	return 0;
+}
+
+int vmw_kms_init_screen_object_display(struct vmw_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+	int i, ret;
+
+	if (dev_priv->sou_priv) {
+		DRM_INFO("sou system already on\n");
+		return -EINVAL;
+	}
+
+	if (!(dev_priv->fifo.capabilities & SVGA_FIFO_CAP_SCREEN_OBJECT_2)) {
+		DRM_INFO("Not using screen objects,"
+			 " missing cap SCREEN_OBJECT_2\n");
+		return -ENOSYS;
+	}
+
+	ret = -ENOMEM;
+	dev_priv->sou_priv = kmalloc(sizeof(*dev_priv->sou_priv), GFP_KERNEL);
+	if (unlikely(!dev_priv->sou_priv))
+		goto err_no_mem;
+
+	INIT_LIST_HEAD(&dev_priv->sou_priv->active);
+	dev_priv->sou_priv->num_active = 0;
+	dev_priv->sou_priv->last_num_active = 0;
+	dev_priv->sou_priv->fb = NULL;
+
+	ret = drm_vblank_init(dev, VMWGFX_NUM_DISPLAY_UNITS);
+	if (unlikely(ret != 0))
+		goto err_free;
+
+	ret = drm_mode_create_dirty_info_property(dev);
+	if (unlikely(ret != 0))
+		goto err_vblank_cleanup;
+
+	for (i = 0; i < VMWGFX_NUM_DISPLAY_UNITS; ++i)
+		vmw_sou_init(dev_priv, i);
+
+	DRM_INFO("Screen objects system initialized\n");
+
+	return 0;
+
+err_vblank_cleanup:
+	drm_vblank_cleanup(dev);
+err_free:
+	kfree(dev_priv->sou_priv);
+	dev_priv->sou_priv = NULL;
+err_no_mem:
+	return ret;
+}
+
+int vmw_kms_close_screen_object_display(struct vmw_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+
+	if (!dev_priv->sou_priv)
+		return -ENOSYS;
+
+	drm_vblank_cleanup(dev);
+
+	if (!list_empty(&dev_priv->sou_priv->active))
+		DRM_ERROR("Still have active outputs when unloading driver");
+
+	kfree(dev_priv->sou_priv);
+
+	return 0;
+}
diff --git a/drivers/hid/hid-wacom.c b/drivers/hid/hid-wacom.c
index a597039d0755..72ca689b6474 100644
--- a/drivers/hid/hid-wacom.c
+++ b/drivers/hid/hid-wacom.c
@@ -373,6 +373,8 @@ static int wacom_probe(struct hid_device *hdev,
 	hidinput = list_entry(hdev->inputs.next, struct hid_input, list);
 	input = hidinput->input;
 
+	__set_bit(INPUT_PROP_POINTER, input->propbit);
+
 	/* Basics */
 	input->evbit[0] |= BIT(EV_KEY) | BIT(EV_ABS) | BIT(EV_REL);
 
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 59d83e83da7f..932383786642 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -36,17 +36,25 @@
 #include <linux/cpu.h>
 #include <linux/pci.h>
 #include <linux/smp.h>
+#include <linux/moduleparam.h>
 #include <asm/msr.h>
 #include <asm/processor.h>
 
 #define DRVNAME	"coretemp"
 
+/*
+ * force_tjmax only matters when TjMax can't be read from the CPU itself.
+ * When set, it replaces the driver's suboptimal heuristic.
+ */
+static int force_tjmax;
+module_param_named(tjmax, force_tjmax, int, 0444);
+MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
+
 #define BASE_SYSFS_ATTR_NO	2	/* Sysfs Base attr no for coretemp */
 #define NUM_REAL_CORES		16	/* Number of Real cores per cpu */
 #define CORETEMP_NAME_LENGTH	17	/* String Length of attrs */
 #define MAX_CORE_ATTRS		4	/* Maximum no of basic attrs */
-#define MAX_THRESH_ATTRS	3	/* Maximum no of Threshold attrs */
-#define TOTAL_ATTRS		(MAX_CORE_ATTRS + MAX_THRESH_ATTRS)
+#define TOTAL_ATTRS		(MAX_CORE_ATTRS + 1)
 #define MAX_CORE_DATA		(NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)
 
 #ifdef CONFIG_SMP
@@ -69,8 +77,6 @@
  *		This value is passed as "id" field to rdmsr/wrmsr functions.
  * @status_reg: One of IA32_THERM_STATUS or IA32_PACKAGE_THERM_STATUS,
  *		from where the temperature values should be read.
- * @intrpt_reg: One of IA32_THERM_INTERRUPT or IA32_PACKAGE_THERM_INTERRUPT,
- *		from where the thresholds are read.
  * @attr_size:  Total number of pre-core attrs displayed in the sysfs.
  * @is_pkg_data: If this is 1, the temp_data holds pkgtemp data.
  *		Otherwise, temp_data holds coretemp data.
@@ -79,13 +85,11 @@
 struct temp_data {
 	int temp;
 	int ttarget;
-	int tmin;
 	int tjmax;
 	unsigned long last_updated;
 	unsigned int cpu;
 	u32 cpu_core_id;
 	u32 status_reg;
-	u32 intrpt_reg;
 	int attr_size;
 	bool is_pkg_data;
 	bool valid;
@@ -143,19 +147,6 @@ static ssize_t show_crit_alarm(struct device *dev,
 	return sprintf(buf, "%d\n", (eax >> 5) & 1);
 }
 
-static ssize_t show_max_alarm(struct device *dev,
-				struct device_attribute *devattr, char *buf)
-{
-	u32 eax, edx;
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
-	struct platform_data *pdata = dev_get_drvdata(dev);
-	struct temp_data *tdata = pdata->core_data[attr->index];
-
-	rdmsr_on_cpu(tdata->cpu, tdata->status_reg, &eax, &edx);
-
-	return sprintf(buf, "%d\n", !!(eax & THERM_STATUS_THRESHOLD1));
-}
-
 static ssize_t show_tjmax(struct device *dev,
 			struct device_attribute *devattr, char *buf)
 {
@@ -174,83 +165,6 @@ static ssize_t show_ttarget(struct device *dev,
 	return sprintf(buf, "%d\n", pdata->core_data[attr->index]->ttarget);
 }
 
-static ssize_t store_ttarget(struct device *dev,
-				struct device_attribute *devattr,
-				const char *buf, size_t count)
-{
-	struct platform_data *pdata = dev_get_drvdata(dev);
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
-	struct temp_data *tdata = pdata->core_data[attr->index];
-	u32 eax, edx;
-	unsigned long val;
-	int diff;
-
-	if (strict_strtoul(buf, 10, &val))
-		return -EINVAL;
-
-	/*
-	 * THERM_MASK_THRESHOLD1 is 7 bits wide. Values are entered in terms
-	 * of milli degree celsius. Hence don't accept val > (127 * 1000)
-	 */
-	if (val > tdata->tjmax || val > 127000)
-		return -EINVAL;
-
-	diff = (tdata->tjmax - val) / 1000;
-
-	mutex_lock(&tdata->update_lock);
-	rdmsr_on_cpu(tdata->cpu, tdata->intrpt_reg, &eax, &edx);
-	eax = (eax & ~THERM_MASK_THRESHOLD1) |
-				(diff << THERM_SHIFT_THRESHOLD1);
-	wrmsr_on_cpu(tdata->cpu, tdata->intrpt_reg, eax, edx);
-	tdata->ttarget = val;
-	mutex_unlock(&tdata->update_lock);
-
-	return count;
-}
-
-static ssize_t show_tmin(struct device *dev,
-			struct device_attribute *devattr, char *buf)
-{
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
-	struct platform_data *pdata = dev_get_drvdata(dev);
-
-	return sprintf(buf, "%d\n", pdata->core_data[attr->index]->tmin);
-}
-
-static ssize_t store_tmin(struct device *dev,
-				struct device_attribute *devattr,
-				const char *buf, size_t count)
-{
-	struct platform_data *pdata = dev_get_drvdata(dev);
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
-	struct temp_data *tdata = pdata->core_data[attr->index];
-	u32 eax, edx;
-	unsigned long val;
-	int diff;
-
-	if (strict_strtoul(buf, 10, &val))
-		return -EINVAL;
-
-	/*
-	 * THERM_MASK_THRESHOLD0 is 7 bits wide. Values are entered in terms
-	 * of milli degree celsius. Hence don't accept val > (127 * 1000)
-	 */
-	if (val > tdata->tjmax || val > 127000)
-		return -EINVAL;
-
-	diff = (tdata->tjmax - val) / 1000;
-
-	mutex_lock(&tdata->update_lock);
-	rdmsr_on_cpu(tdata->cpu, tdata->intrpt_reg, &eax, &edx);
-	eax = (eax & ~THERM_MASK_THRESHOLD0) |
-				(diff << THERM_SHIFT_THRESHOLD0);
-	wrmsr_on_cpu(tdata->cpu, tdata->intrpt_reg, eax, edx);
-	tdata->tmin = val;
-	mutex_unlock(&tdata->update_lock);
-
-	return count;
-}
-
 static ssize_t show_temp(struct device *dev,
 			struct device_attribute *devattr, char *buf)
 {
@@ -374,7 +288,6 @@ static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev)
 
 static int get_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev)
 {
-	/* The 100C is default for both mobile and non mobile CPUs */
 	int err;
 	u32 eax, edx;
 	u32 val;
@@ -385,7 +298,8 @@ static int get_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev)
 	 */
 	err = rdmsr_safe_on_cpu(id, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx);
 	if (err) {
-		dev_warn(dev, "Unable to read TjMax from CPU.\n");
+		if (c->x86_model > 0xe && c->x86_model != 0x1c)
+			dev_warn(dev, "Unable to read TjMax from CPU %u\n", id);
 	} else {
 		val = (eax >> 16) & 0xff;
 		/*
@@ -393,11 +307,17 @@ static int get_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev)
 		 * will be used
 		 */
 		if (val) {
-			dev_info(dev, "TjMax is %d C.\n", val);
+			dev_dbg(dev, "TjMax is %d degrees C\n", val);
 			return val * 1000;
 		}
 	}
 
+	if (force_tjmax) {
+		dev_notice(dev, "TjMax forced to %d degrees C by user\n",
+			   force_tjmax);
+		return force_tjmax * 1000;
+	}
+
 	/*
 	 * An assumption is made for early CPUs and unreadable MSR.
 	 * NOTE: the calculated value may not be correct.
@@ -414,21 +334,6 @@ static void __devinit get_ucode_rev_on_cpu(void *edx)
 	rdmsr(MSR_IA32_UCODE_REV, eax, *(u32 *)edx);
 }
 
-static int get_pkg_tjmax(unsigned int cpu, struct device *dev)
-{
-	int err;
-	u32 eax, edx, val;
-
-	err = rdmsr_safe_on_cpu(cpu, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx);
-	if (!err) {
-		val = (eax >> 16) & 0xff;
-		if (val)
-			return val * 1000;
-	}
-	dev_warn(dev, "Unable to read Pkg-TjMax from CPU:%u\n", cpu);
-	return 100000; /* Default TjMax: 100 degree celsius */
-}
-
 static int create_name_attr(struct platform_data *pdata, struct device *dev)
 {
 	sysfs_attr_init(&pdata->name_attr.attr);
@@ -442,19 +347,14 @@ static int create_core_attrs(struct temp_data *tdata, struct device *dev,
 				int attr_no)
 {
 	int err, i;
-	static ssize_t (*rd_ptr[TOTAL_ATTRS]) (struct device *dev,
+	static ssize_t (*const rd_ptr[TOTAL_ATTRS]) (struct device *dev,
 			struct device_attribute *devattr, char *buf) = {
 			show_label, show_crit_alarm, show_temp, show_tjmax,
-			show_max_alarm, show_ttarget, show_tmin };
-	static ssize_t (*rw_ptr[TOTAL_ATTRS]) (struct device *dev,
-			struct device_attribute *devattr, const char *buf,
-			size_t count) = { NULL, NULL, NULL, NULL, NULL,
-					store_ttarget, store_tmin };
-	static const char *names[TOTAL_ATTRS] = {
+			show_ttarget };
+	static const char *const names[TOTAL_ATTRS] = {
 					"temp%d_label", "temp%d_crit_alarm",
 					"temp%d_input", "temp%d_crit",
-					"temp%d_max_alarm", "temp%d_max",
-					"temp%d_max_hyst" };
+					"temp%d_max" };
 
 	for (i = 0; i < tdata->attr_size; i++) {
 		snprintf(tdata->attr_name[i], CORETEMP_NAME_LENGTH, names[i],
@@ -462,10 +362,6 @@ static int create_core_attrs(struct temp_data *tdata, struct device *dev,
 		sysfs_attr_init(&tdata->sd_attrs[i].dev_attr.attr);
 		tdata->sd_attrs[i].dev_attr.attr.name = tdata->attr_name[i];
 		tdata->sd_attrs[i].dev_attr.attr.mode = S_IRUGO;
-		if (rw_ptr[i]) {
-			tdata->sd_attrs[i].dev_attr.attr.mode |= S_IWUSR;
-			tdata->sd_attrs[i].dev_attr.store = rw_ptr[i];
-		}
 		tdata->sd_attrs[i].dev_attr.show = rd_ptr[i];
 		tdata->sd_attrs[i].index = attr_no;
 		err = device_create_file(dev, &tdata->sd_attrs[i].dev_attr);
@@ -481,9 +377,9 @@ exit_free:
 }
 
 
-static int __devinit chk_ucode_version(struct platform_device *pdev)
+static int __cpuinit chk_ucode_version(unsigned int cpu)
 {
-	struct cpuinfo_x86 *c = &cpu_data(pdev->id);
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	int err;
 	u32 edx;
 
@@ -494,17 +390,15 @@ static int __devinit chk_ucode_version(struct platform_device *pdev)
 	 */
 	if (c->x86_model == 0xe && c->x86_mask < 0xc) {
 		/* check for microcode update */
-		err = smp_call_function_single(pdev->id, get_ucode_rev_on_cpu,
+		err = smp_call_function_single(cpu, get_ucode_rev_on_cpu,
 					       &edx, 1);
 		if (err) {
-			dev_err(&pdev->dev,
-				"Cannot determine microcode revision of "
-				"CPU#%u (%d)!\n", pdev->id, err);
+			pr_err("Cannot determine microcode revision of "
+			       "CPU#%u (%d)!\n", cpu, err);
 			return -ENODEV;
 		} else if (edx < 0x39) {
-			dev_err(&pdev->dev,
-				"Errata AE18 not fixed, update BIOS or "
-				"microcode of the CPU!\n");
+			pr_err("Errata AE18 not fixed, update BIOS or "
+			       "microcode of the CPU!\n");
 			return -ENODEV;
 		}
 	}
@@ -538,8 +432,6 @@ static struct temp_data *init_temp_data(unsigned int cpu, int pkg_flag)
 
 	tdata->status_reg = pkg_flag ? MSR_IA32_PACKAGE_THERM_STATUS :
 							MSR_IA32_THERM_STATUS;
-	tdata->intrpt_reg = pkg_flag ? MSR_IA32_PACKAGE_THERM_INTERRUPT :
-						MSR_IA32_THERM_INTERRUPT;
 	tdata->is_pkg_data = pkg_flag;
 	tdata->cpu = cpu;
 	tdata->cpu_core_id = TO_CORE_ID(cpu);
@@ -548,11 +440,11 @@ static struct temp_data *init_temp_data(unsigned int cpu, int pkg_flag)
 	return tdata;
 }
 
-static int create_core_data(struct platform_data *pdata,
-				struct platform_device *pdev,
+static int create_core_data(struct platform_device *pdev,
 				unsigned int cpu, int pkg_flag)
 {
 	struct temp_data *tdata;
+	struct platform_data *pdata = platform_get_drvdata(pdev);
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	u32 eax, edx;
 	int err, attr_no;
@@ -588,20 +480,21 @@ static int create_core_data(struct platform_data *pdata,
 		goto exit_free;
 
 	/* We can access status register. Get Critical Temperature */
-	if (pkg_flag)
-		tdata->tjmax = get_pkg_tjmax(pdev->id, &pdev->dev);
-	else
-		tdata->tjmax = get_tjmax(c, cpu, &pdev->dev);
+	tdata->tjmax = get_tjmax(c, cpu, &pdev->dev);
 
 	/*
-	 * Test if we can access the intrpt register. If so, increase the
-	 * 'size' enough to have ttarget/tmin/max_alarm interfaces.
-	 * Initialize ttarget with bits 16:22 of MSR_IA32_THERM_INTERRUPT
+	 * Read the still undocumented bits 8:15 of IA32_TEMPERATURE_TARGET.
+	 * The target temperature is available on older CPUs but not in this
+	 * register. Atoms don't have the register at all.
 	 */
-	err = rdmsr_safe_on_cpu(cpu, tdata->intrpt_reg, &eax, &edx);
-	if (!err) {
-		tdata->attr_size += MAX_THRESH_ATTRS;
-		tdata->ttarget = tdata->tjmax - ((eax >> 16) & 0x7f) * 1000;
+	if (c->x86_model > 0xe && c->x86_model != 0x1c) {
+		err = rdmsr_safe_on_cpu(cpu, MSR_IA32_TEMPERATURE_TARGET,
+					&eax, &edx);
+		if (!err) {
+			tdata->ttarget
+			  = tdata->tjmax - ((eax >> 8) & 0xff) * 1000;
+			tdata->attr_size++;
+		}
 	}
 
 	pdata->core_data[attr_no] = tdata;
@@ -613,22 +506,20 @@ static int create_core_data(struct platform_data *pdata,
 
 	return 0;
 exit_free:
+	pdata->core_data[attr_no] = NULL;
 	kfree(tdata);
 	return err;
 }
 
 static void coretemp_add_core(unsigned int cpu, int pkg_flag)
 {
-	struct platform_data *pdata;
 	struct platform_device *pdev = coretemp_get_pdev(cpu);
 	int err;
 
 	if (!pdev)
 		return;
 
-	pdata = platform_get_drvdata(pdev);
-
-	err = create_core_data(pdata, pdev, cpu, pkg_flag);
+	err = create_core_data(pdev, cpu, pkg_flag);
 	if (err)
 		dev_err(&pdev->dev, "Adding Core %u failed\n", cpu);
 }
@@ -652,11 +543,6 @@ static int __devinit coretemp_probe(struct platform_device *pdev)
 	struct platform_data *pdata;
 	int err;
 
-	/* Check the microcode version of the CPU */
-	err = chk_ucode_version(pdev);
-	if (err)
-		return err;
-
 	/* Initialize the per-package data structures */
 	pdata = kzalloc(sizeof(struct platform_data), GFP_KERNEL);
 	if (!pdata)
@@ -666,7 +552,7 @@ static int __devinit coretemp_probe(struct platform_device *pdev)
 	if (err)
 		goto exit_free;
 
-	pdata->phys_proc_id = TO_PHYS_ID(pdev->id);
+	pdata->phys_proc_id = pdev->id;
 	platform_set_drvdata(pdev, pdata);
 
 	pdata->hwmon_dev = hwmon_device_register(&pdev->dev);
@@ -718,7 +604,7 @@ static int __cpuinit coretemp_device_add(unsigned int cpu)
 
 	mutex_lock(&pdev_list_mutex);
 
-	pdev = platform_device_alloc(DRVNAME, cpu);
+	pdev = platform_device_alloc(DRVNAME, TO_PHYS_ID(cpu));
 	if (!pdev) {
 		err = -ENOMEM;
 		pr_err("Device allocation failed\n");
@@ -738,7 +624,7 @@ static int __cpuinit coretemp_device_add(unsigned int cpu)
 	}
 
 	pdev_entry->pdev = pdev;
-	pdev_entry->phys_proc_id = TO_PHYS_ID(cpu);
+	pdev_entry->phys_proc_id = pdev->id;
 
 	list_add_tail(&pdev_entry->list, &pdev_list);
 	mutex_unlock(&pdev_list_mutex);
@@ -799,6 +685,10 @@ static void __cpuinit get_core_online(unsigned int cpu)
 		return;
 
 	if (!pdev) {
+		/* Check the microcode version of the CPU */
+		if (chk_ucode_version(cpu))
+			return;
+
 		/*
 		 * Alright, we have DTS support.
 		 * We are bringing the _first_ core in this pkg
diff --git a/drivers/hwmon/ds620.c b/drivers/hwmon/ds620.c
index 257957c69d92..4f7c3fc40a89 100644
--- a/drivers/hwmon/ds620.c
+++ b/drivers/hwmon/ds620.c
@@ -72,7 +72,7 @@ struct ds620_data {
 	char valid;		/* !=0 if following fields are valid */
 	unsigned long last_updated;	/* In jiffies */
 
-	u16 temp[3];		/* Register values, word */
+	s16 temp[3];		/* Register values, word */
 };
 
 /*
diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
index a561c3a0e916..397fc59b5682 100644
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -978,6 +978,8 @@ static void pmbus_find_max_attr(struct i2c_client *client,
 struct pmbus_limit_attr {
 	u16 reg;		/* Limit register */
 	bool update;		/* True if register needs updates */
+	bool low;		/* True if low limit; for limits with compare
+				   functions only */
 	const char *attr;	/* Attribute name */
 	const char *alarm;	/* Alarm attribute name */
 	u32 sbit;		/* Alarm attribute status bit */
@@ -1029,7 +1031,8 @@ static bool pmbus_add_limit_attrs(struct i2c_client *client,
 				if (attr->compare) {
 					pmbus_add_boolean_cmp(data, name,
 						l->alarm, index,
-						cbase, cindex,
+						l->low ? cindex : cbase,
+						l->low ? cbase : cindex,
 						attr->sbase + page, l->sbit);
 				} else {
 					pmbus_add_boolean_reg(data, name,
@@ -1366,11 +1369,13 @@ static const struct pmbus_sensor_attr power_attributes[] = {
 static const struct pmbus_limit_attr temp_limit_attrs[] = {
 	{
 		.reg = PMBUS_UT_WARN_LIMIT,
+		.low = true,
 		.attr = "min",
 		.alarm = "min_alarm",
 		.sbit = PB_TEMP_UT_WARNING,
 	}, {
 		.reg = PMBUS_UT_FAULT_LIMIT,
+		.low = true,
 		.attr = "lcrit",
 		.alarm = "lcrit_alarm",
 		.sbit = PB_TEMP_UT_FAULT,
@@ -1399,11 +1404,13 @@ static const struct pmbus_limit_attr temp_limit_attrs[] = {
 static const struct pmbus_limit_attr temp_limit_attrs23[] = {
 	{
 		.reg = PMBUS_UT_WARN_LIMIT,
+		.low = true,
 		.attr = "min",
 		.alarm = "min_alarm",
 		.sbit = PB_TEMP_UT_WARNING,
 	}, {
 		.reg = PMBUS_UT_FAULT_LIMIT,
+		.low = true,
 		.attr = "lcrit",
 		.alarm = "lcrit_alarm",
 		.sbit = PB_TEMP_UT_FAULT,
diff --git a/drivers/hwmon/w83627ehf.c b/drivers/hwmon/w83627ehf.c
index f2b377c56a3a..7b0260dc76fb 100644
--- a/drivers/hwmon/w83627ehf.c
+++ b/drivers/hwmon/w83627ehf.c
@@ -1715,7 +1715,8 @@ static void w83627ehf_device_remove_files(struct device *dev)
 }
 
 /* Get the monitoring functions started */
-static inline void __devinit w83627ehf_init_device(struct w83627ehf_data *data)
+static inline void __devinit w83627ehf_init_device(struct w83627ehf_data *data,
+						   enum kinds kind)
 {
 	int i;
 	u8 tmp, diode;
@@ -1746,10 +1747,16 @@ static inline void __devinit w83627ehf_init_device(struct w83627ehf_data *data)
 		w83627ehf_write_value(data, W83627EHF_REG_VBAT, tmp | 0x01);
 
 	/* Get thermal sensor types */
-	diode = w83627ehf_read_value(data, W83627EHF_REG_DIODE);
+	switch (kind) {
+	case w83627ehf:
+		diode = w83627ehf_read_value(data, W83627EHF_REG_DIODE);
+		break;
+	default:
+		diode = 0x70;
+	}
 	for (i = 0; i < 3; i++) {
 		if ((tmp & (0x02 << i)))
-			data->temp_type[i] = (diode & (0x10 << i)) ? 1 : 2;
+			data->temp_type[i] = (diode & (0x10 << i)) ? 1 : 3;
 		else
 			data->temp_type[i] = 4; /* thermistor */
 	}
@@ -2016,7 +2023,7 @@ static int __devinit w83627ehf_probe(struct platform_device *pdev)
 	}
 
 	/* Initialize the chip */
-	w83627ehf_init_device(data);
+	w83627ehf_init_device(data, sio_data->kind);
 
 	data->vrm = vid_which_vrm();
 	superio_enter(sio_data->sioreg);
diff --git a/drivers/hwmon/w83791d.c b/drivers/hwmon/w83791d.c
index 17cf1ab95521..8c2844e5691c 100644
--- a/drivers/hwmon/w83791d.c
+++ b/drivers/hwmon/w83791d.c
@@ -329,8 +329,8 @@ static int w83791d_detect(struct i2c_client *client,
 			  struct i2c_board_info *info);
 static int w83791d_remove(struct i2c_client *client);
 
-static int w83791d_read(struct i2c_client *client, u8 register);
-static int w83791d_write(struct i2c_client *client, u8 register, u8 value);
+static int w83791d_read(struct i2c_client *client, u8 reg);
+static int w83791d_write(struct i2c_client *client, u8 reg, u8 value);
 static struct w83791d_data *w83791d_update_device(struct device *dev);
 
 #ifdef DEBUG
diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 9827c5e686cb..811dbbd9306c 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -327,7 +327,7 @@ config BLK_DEV_OPTI621
 	select BLK_DEV_IDEPCI
 	help
 	  This is a driver for the OPTi 82C621 EIDE controller.
-	  Please read the comments at the top of <file:drivers/ide/pci/opti621.c>.
+	  Please read the comments at the top of <file:drivers/ide/opti621.c>.
 
 config BLK_DEV_RZ1000
 	tristate "RZ1000 chipset bugfix/support"
@@ -365,7 +365,7 @@ config BLK_DEV_ALI15X3
 	  normal dual channel support.
 
 	  Please read the comments at the top of
-	  <file:drivers/ide/pci/alim15x3.c>.
+	  <file:drivers/ide/alim15x3.c>.
 
 	  If unsure, say N.
 
@@ -528,7 +528,7 @@ config BLK_DEV_NS87415
 	  This driver adds detection and support for the NS87415 chip
 	  (used mainly on SPARC64 and PA-RISC machines).
 
-	  Please read the comments at the top of <file:drivers/ide/pci/ns87415.c>.
+	  Please read the comments at the top of <file:drivers/ide/ns87415.c>.
 
 config BLK_DEV_PDC202XX_OLD
 	tristate "PROMISE PDC202{46|62|65|67} support"
@@ -547,7 +547,7 @@ config BLK_DEV_PDC202XX_OLD
 	  for more than one card.
 
 	  Please read the comments at the top of
-	  <file:drivers/ide/pci/pdc202xx_old.c>.
+	  <file:drivers/ide/pdc202xx_old.c>.
 
 	  If unsure, say N.
 
@@ -593,7 +593,7 @@ config BLK_DEV_SIS5513
 	  ATA100: SiS635, SiS645, SiS650, SiS730, SiS735, SiS740,
 	  SiS745, SiS750
 
-	  Please read the comments at the top of <file:drivers/ide/pci/sis5513.c>.
+	  Please read the comments at the top of <file:drivers/ide/sis5513.c>.
 
 config BLK_DEV_SL82C105
 	tristate "Winbond SL82c105 support"
@@ -616,7 +616,7 @@ config BLK_DEV_SLC90E66
 	  look-a-like to the PIIX4 it should be a nice addition.
 
 	  Please read the comments at the top of
-	  <file:drivers/ide/pci/slc90e66.c>.
+	  <file:drivers/ide/slc90e66.c>.
 
 config BLK_DEV_TRM290
 	tristate "Tekram TRM290 chipset support"
@@ -625,7 +625,7 @@ config BLK_DEV_TRM290
 	  This driver adds support for bus master DMA transfers
 	  using the Tekram TRM290 PCI IDE chip. Volunteers are
 	  needed for further tweaking and development.
-	  Please read the comments at the top of <file:drivers/ide/pci/trm290.c>.
+	  Please read the comments at the top of <file:drivers/ide/trm290.c>.
 
 config BLK_DEV_VIA82CXXX
 	tristate "VIA82CXXX chipset support"
@@ -836,7 +836,7 @@ config BLK_DEV_ALI14XX
 	  of the ALI M1439/1443/1445/1487/1489 chipsets, and permits faster
 	  I/O speeds to be set as well.
 	  See the files <file:Documentation/ide/ide.txt> and
-	  <file:drivers/ide/legacy/ali14xx.c> for more info.
+	  <file:drivers/ide/ali14xx.c> for more info.
 
 config BLK_DEV_DTC2278
 	tristate "DTC-2278 support"
@@ -847,7 +847,7 @@ config BLK_DEV_DTC2278
 	  boot parameter. It enables support for the secondary IDE interface
 	  of the DTC-2278 card, and permits faster I/O speeds to be set as
 	  well. See the <file:Documentation/ide/ide.txt> and
-	  <file:drivers/ide/legacy/dtc2278.c> files for more info.
+	  <file:drivers/ide/dtc2278.c> files for more info.
 
 config BLK_DEV_HT6560B
 	tristate "Holtek HT6560B support"
@@ -858,7 +858,7 @@ config BLK_DEV_HT6560B
 	  boot parameter. It enables support for the secondary IDE interface
 	  of the Holtek card, and permits faster I/O speeds to be set as well.
 	  See the <file:Documentation/ide/ide.txt> and
-	  <file:drivers/ide/legacy/ht6560b.c> files for more info.
+	  <file:drivers/ide/ht6560b.c> files for more info.
 
 config BLK_DEV_QD65XX
 	tristate "QDI QD65xx support"
@@ -867,7 +867,7 @@ config BLK_DEV_QD65XX
 	help
 	  This driver is enabled at runtime using the "qd65xx.probe" kernel
 	  boot parameter.  It permits faster I/O speeds to be set.  See the
-	  <file:Documentation/ide/ide.txt> and <file:drivers/ide/legacy/qd65xx.c>
+	  <file:Documentation/ide/ide.txt> and <file:drivers/ide/qd65xx.c>
 	  for more info.
 
 config BLK_DEV_UMC8672
@@ -879,7 +879,7 @@ config BLK_DEV_UMC8672
 	  boot parameter. It enables support for the secondary IDE interface
 	  of the UMC-8672, and permits faster I/O speeds to be set as well.
 	  See the files <file:Documentation/ide/ide.txt> and
-	  <file:drivers/ide/legacy/umc8672.c> for more info.
+	  <file:drivers/ide/umc8672.c> for more info.
 
 endif
 
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 274798068a54..16f69be820c7 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -435,7 +435,12 @@ static int idedisk_prep_fn(struct request_queue *q, struct request *rq)
 	if (!(rq->cmd_flags & REQ_FLUSH))
 		return BLKPREP_OK;
 
-	cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC);
+	if (rq->special) {
+		cmd = rq->special;
+		memset(cmd, 0, sizeof(*cmd));
+	} else {
+		cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC);
+	}
 
 	/* FIXME: map struct ide_taskfile on rq->cmd[] */
 	BUG_ON(cmd == NULL);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 17bf9d95463c..6cd642aaa4de 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -287,7 +287,7 @@ void __free_ep(struct kref *kref)
 	if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
 		cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
 		dst_release(ep->dst);
-		l2t_release(L2DATA(ep->com.tdev), ep->l2t);
+		l2t_release(ep->com.tdev, ep->l2t);
 	}
 	kfree(ep);
 }
@@ -1178,7 +1178,7 @@ static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 		release_tid(ep->com.tdev, GET_TID(rpl), NULL);
 	cxgb3_free_atid(ep->com.tdev, ep->atid);
 	dst_release(ep->dst);
-	l2t_release(L2DATA(ep->com.tdev), ep->l2t);
+	l2t_release(ep->com.tdev, ep->l2t);
 	put_ep(&ep->com);
 	return CPL_RET_BUF_DONE;
 }
@@ -1377,7 +1377,7 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 	if (!child_ep) {
 		printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n",
 		       __func__);
-		l2t_release(L2DATA(tdev), l2t);
+		l2t_release(tdev, l2t);
 		dst_release(dst);
 		goto reject;
 	}
@@ -1956,7 +1956,7 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	if (!err)
 		goto out;
 
-	l2t_release(L2DATA(h->rdev.t3cdev_p), ep->l2t);
+	l2t_release(h->rdev.t3cdev_p, ep->l2t);
 fail4:
 	dst_release(ep->dst);
 fail3:
@@ -2127,7 +2127,7 @@ int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
 	PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
 	     l2t);
 	dst_hold(new);
-	l2t_release(L2DATA(ep->com.tdev), ep->l2t);
+	l2t_release(ep->com.tdev, ep->l2t);
 	ep->l2t = l2t;
 	dst_release(old);
 	ep->dst = new;
diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c
index 7b404e5443ed..e34eeb8ae371 100644
--- a/drivers/input/keyboard/adp5588-keys.c
+++ b/drivers/input/keyboard/adp5588-keys.c
@@ -668,4 +668,3 @@ module_exit(adp5588_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
 MODULE_DESCRIPTION("ADP5588/87 Keypad driver");
-MODULE_ALIAS("platform:adp5588-keys");
diff --git a/drivers/input/misc/cm109.c b/drivers/input/misc/cm109.c
index b09c7d127219..ab860511f016 100644
--- a/drivers/input/misc/cm109.c
+++ b/drivers/input/misc/cm109.c
@@ -475,7 +475,7 @@ static void cm109_toggle_buzzer_sync(struct cm109_dev *dev, int on)
 				le16_to_cpu(dev->ctl_req->wIndex),
 				dev->ctl_data,
 				USB_PKT_LEN, USB_CTRL_SET_TIMEOUT);
-	if (error && error != EINTR)
+	if (error < 0 && error != -EINTR)
 		err("%s: usb_control_msg() failed %d", __func__, error);
 }
 
diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c
index da280189ef07..5ec617e28f7e 100644
--- a/drivers/input/mouse/bcm5974.c
+++ b/drivers/input/mouse/bcm5974.c
@@ -67,6 +67,10 @@
 #define USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI	0x0245
 #define USB_DEVICE_ID_APPLE_WELLSPRING5_ISO	0x0246
 #define USB_DEVICE_ID_APPLE_WELLSPRING5_JIS	0x0247
+/* MacbookAir4,1 (unibody, July 2011) */
+#define USB_DEVICE_ID_APPLE_WELLSPRING6A_ANSI	0x0249
+#define USB_DEVICE_ID_APPLE_WELLSPRING6A_ISO	0x024a
+#define USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS	0x024b
 /* MacbookAir4,2 (unibody, July 2011) */
 #define USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI	0x024c
 #define USB_DEVICE_ID_APPLE_WELLSPRING6_ISO	0x024d
@@ -112,6 +116,10 @@ static const struct usb_device_id bcm5974_table[] = {
 	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI),
 	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_ISO),
 	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_JIS),
+	/* MacbookAir4,1 */
+	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6A_ANSI),
+	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6A_ISO),
+	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS),
 	/* MacbookAir4,2 */
 	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI),
 	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ISO),
@@ -334,6 +342,18 @@ static const struct bcm5974_config bcm5974_config_table[] = {
 		{ DIM_X, DIM_X / SN_COORD, -4750, 5280 },
 		{ DIM_Y, DIM_Y / SN_COORD, -150, 6730 }
 	},
+	{
+		USB_DEVICE_ID_APPLE_WELLSPRING6A_ANSI,
+		USB_DEVICE_ID_APPLE_WELLSPRING6A_ISO,
+		USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS,
+		HAS_INTEGRATED_BUTTON,
+		0x84, sizeof(struct bt_data),
+		0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS,
+		{ DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 300 },
+		{ DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 },
+		{ DIM_X, DIM_X / SN_COORD, -4620, 5140 },
+		{ DIM_Y, DIM_Y / SN_COORD, -150, 6600 }
+	},
 	{}
 };
 
diff --git a/drivers/input/tablet/wacom_sys.c b/drivers/input/tablet/wacom_sys.c
index d27c9d91630b..958b4eb6369d 100644
--- a/drivers/input/tablet/wacom_sys.c
+++ b/drivers/input/tablet/wacom_sys.c
@@ -229,13 +229,6 @@ static int wacom_parse_hid(struct usb_interface *intf, struct hid_descriptor *hi
 							get_unaligned_le16(&report[i + 3]);
 						i += 4;
 					}
-				} else if (usage == WCM_DIGITIZER) {
-					/* max pressure isn't reported
-					features->pressure_max = (unsigned short)
-							(report[i+4] << 8  | report[i + 3]);
-					*/
-					features->pressure_max = 255;
-					i += 4;
 				}
 				break;
 
@@ -291,13 +284,6 @@ static int wacom_parse_hid(struct usb_interface *intf, struct hid_descriptor *hi
 				pen = 1;
 				i++;
 				break;
-
-			case HID_USAGE_UNDEFINED:
-				if (usage == WCM_DESKTOP && finger) /* capacity */
-					features->pressure_max =
-						get_unaligned_le16(&report[i + 3]);
-				i += 4;
-				break;
 			}
 			break;
 
diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c
index c1c2f7b28d89..9dea71849f40 100644
--- a/drivers/input/tablet/wacom_wac.c
+++ b/drivers/input/tablet/wacom_wac.c
@@ -800,25 +800,26 @@ static int wacom_bpt_touch(struct wacom_wac *wacom)
 	int i;
 
 	for (i = 0; i < 2; i++) {
-		int p = data[9 * i + 2];
-		bool touch = p && !wacom->shared->stylus_in_proximity;
+		int offset = (data[1] & 0x80) ? (8 * i) : (9 * i);
+		bool touch = data[offset + 3] & 0x80;
 
-		input_mt_slot(input, i);
-		input_mt_report_slot_state(input, MT_TOOL_FINGER, touch);
 		/*
 		 * Touch events need to be disabled while stylus is
 		 * in proximity because user's hand is resting on touchpad
 		 * and sending unwanted events.  User expects tablet buttons
 		 * to continue working though.
 		 */
+		touch = touch && !wacom->shared->stylus_in_proximity;
+
+		input_mt_slot(input, i);
+		input_mt_report_slot_state(input, MT_TOOL_FINGER, touch);
 		if (touch) {
-			int x = get_unaligned_be16(&data[9 * i + 3]) & 0x7ff;
-			int y = get_unaligned_be16(&data[9 * i + 5]) & 0x7ff;
+			int x = get_unaligned_be16(&data[offset + 3]) & 0x7ff;
+			int y = get_unaligned_be16(&data[offset + 5]) & 0x7ff;
 			if (features->quirks & WACOM_QUIRK_BBTOUCH_LOWRES) {
 				x <<= 5;
 				y <<= 5;
 			}
-			input_report_abs(input, ABS_MT_PRESSURE, p);
 			input_report_abs(input, ABS_MT_POSITION_X, x);
 			input_report_abs(input, ABS_MT_POSITION_Y, y);
 		}
@@ -1056,10 +1057,11 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev,
 			     features->x_fuzz, 0);
 	input_set_abs_params(input_dev, ABS_Y, 0, features->y_max,
 			     features->y_fuzz, 0);
-	input_set_abs_params(input_dev, ABS_PRESSURE, 0, features->pressure_max,
-			     features->pressure_fuzz, 0);
 
 	if (features->device_type == BTN_TOOL_PEN) {
+		input_set_abs_params(input_dev, ABS_PRESSURE, 0, features->pressure_max,
+			     features->pressure_fuzz, 0);
+
 		/* penabled devices have fixed resolution for each model */
 		input_abs_set_res(input_dev, ABS_X, features->x_resolution);
 		input_abs_set_res(input_dev, ABS_Y, features->y_resolution);
@@ -1098,6 +1100,8 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev,
 		__set_bit(BTN_TOOL_MOUSE, input_dev->keybit);
 		__set_bit(BTN_STYLUS, input_dev->keybit);
 		__set_bit(BTN_STYLUS2, input_dev->keybit);
+
+		__set_bit(INPUT_PROP_POINTER, input_dev->propbit);
 		break;
 
 	case WACOM_21UX2:
@@ -1120,12 +1124,12 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev,
 		for (i = 0; i < 8; i++)
 			__set_bit(BTN_0 + i, input_dev->keybit);
 
-		if (wacom_wac->features.type != WACOM_21UX2) {
-			input_set_abs_params(input_dev, ABS_RX, 0, 4096, 0, 0);
-			input_set_abs_params(input_dev, ABS_RY, 0, 4096, 0, 0);
-		}
-
+		input_set_abs_params(input_dev, ABS_RX, 0, 4096, 0, 0);
+		input_set_abs_params(input_dev, ABS_RY, 0, 4096, 0, 0);
 		input_set_abs_params(input_dev, ABS_Z, -900, 899, 0, 0);
+
+		__set_bit(INPUT_PROP_DIRECT, input_dev->propbit);
+
 		wacom_setup_cintiq(wacom_wac);
 		break;
 
@@ -1150,6 +1154,8 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev,
 		/* fall through */
 
 	case INTUOS:
+		__set_bit(INPUT_PROP_POINTER, input_dev->propbit);
+
 		wacom_setup_intuos(wacom_wac);
 		break;
 
@@ -1165,6 +1171,8 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev,
 
 		input_set_abs_params(input_dev, ABS_Z, -900, 899, 0, 0);
 		wacom_setup_intuos(wacom_wac);
+
+		__set_bit(INPUT_PROP_POINTER, input_dev->propbit);
 		break;
 
 	case TABLETPC2FG:
@@ -1183,26 +1191,40 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev,
 	case TABLETPC:
 		__clear_bit(ABS_MISC, input_dev->absbit);
 
+		__set_bit(INPUT_PROP_DIRECT, input_dev->propbit);
+
 		if (features->device_type != BTN_TOOL_PEN)
 			break;  /* no need to process stylus stuff */
 
 		/* fall through */
 
 	case PL:
-	case PTU:
 	case DTU:
 		__set_bit(BTN_TOOL_PEN, input_dev->keybit);
+		__set_bit(BTN_TOOL_RUBBER, input_dev->keybit);
 		__set_bit(BTN_STYLUS, input_dev->keybit);
 		__set_bit(BTN_STYLUS2, input_dev->keybit);
+
+		__set_bit(INPUT_PROP_DIRECT, input_dev->propbit);
+		break;
+
+	case PTU:
+		__set_bit(BTN_STYLUS2, input_dev->keybit);
 		/* fall through */
 
 	case PENPARTNER:
+		__set_bit(BTN_TOOL_PEN, input_dev->keybit);
 		__set_bit(BTN_TOOL_RUBBER, input_dev->keybit);
+		__set_bit(BTN_STYLUS, input_dev->keybit);
+
+		__set_bit(INPUT_PROP_POINTER, input_dev->propbit);
 		break;
 
 	case BAMBOO_PT:
 		__clear_bit(ABS_MISC, input_dev->absbit);
 
+		__set_bit(INPUT_PROP_POINTER, input_dev->propbit);
+
 		if (features->device_type == BTN_TOOL_DOUBLETAP) {
 			__set_bit(BTN_LEFT, input_dev->keybit);
 			__set_bit(BTN_FORWARD, input_dev->keybit);
diff --git a/drivers/input/touchscreen/wacom_w8001.c b/drivers/input/touchscreen/wacom_w8001.c
index c14412ef4648..9941d39df43d 100644
--- a/drivers/input/touchscreen/wacom_w8001.c
+++ b/drivers/input/touchscreen/wacom_w8001.c
@@ -383,6 +383,8 @@ static int w8001_setup(struct w8001 *w8001)
 	dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 	strlcat(w8001->name, "Wacom Serial", sizeof(w8001->name));
 
+	__set_bit(INPUT_PROP_DIRECT, dev->propbit);
+
 	/* penabled? */
 	error = w8001_command(w8001, W8001_CMD_QUERY, true);
 	if (!error) {
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 3dc9befa5aec..6dcc7e2d54de 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1388,7 +1388,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu)
 		return ret;
 	}
 
-	ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
+	ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
 	if (ret)
 		printk(KERN_ERR "IOMMU: can't request irq\n");
 	return ret;
diff --git a/drivers/leds/ledtrig-timer.c b/drivers/leds/ledtrig-timer.c
index d87c9d02f786..328c64c0841c 100644
--- a/drivers/leds/ledtrig-timer.c
+++ b/drivers/leds/ledtrig-timer.c
@@ -41,6 +41,7 @@ static ssize_t led_delay_on_store(struct device *dev,
 
 	if (count == size) {
 		led_blink_set(led_cdev, &state, &led_cdev->blink_delay_off);
+		led_cdev->blink_delay_on = state;
 		ret = count;
 	}
 
@@ -69,6 +70,7 @@ static ssize_t led_delay_off_store(struct device *dev,
 
 	if (count == size) {
 		led_blink_set(led_cdev, &led_cdev->blink_delay_on, &state);
+		led_cdev->blink_delay_off = state;
 		ret = count;
 	}
 
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 49da55c1528a..8c2a000cf3f5 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1698,6 +1698,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 
 	ti->num_flush_requests = 1;
+	ti->discard_zeroes_data_unsupported = 1;
+
 	return 0;
 
 bad:
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 89f73ca22cfa..f84c08029b21 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -81,8 +81,10 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
 		 * corrupt_bio_byte <Nth_byte> <direction> <value> <bio_flags>
 		 */
 		if (!strcasecmp(arg_name, "corrupt_bio_byte")) {
-			if (!argc)
+			if (!argc) {
 				ti->error = "Feature corrupt_bio_byte requires parameters";
+				return -EINVAL;
+			}
 
 			r = dm_read_arg(_args + 1, as, &fc->corrupt_bio_byte, &ti->error);
 			if (r)
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index a002dd85db1e..86df8b2cf927 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -449,7 +449,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
 				rs->ti->error = "write_mostly option is only valid for RAID1";
 				return -EINVAL;
 			}
-			if (value > rs->md.raid_disks) {
+			if (value >= rs->md.raid_disks) {
 				rs->ti->error = "Invalid write_mostly drive index given";
 				return -EINVAL;
 			}
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 986b8754bb08..bc04518e9d8b 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1238,14 +1238,15 @@ static void dm_table_set_integrity(struct dm_table *t)
 		return;
 
 	template_disk = dm_table_get_integrity_disk(t, true);
-	if (!template_disk &&
-	    blk_integrity_is_initialized(dm_disk(t->md))) {
+	if (template_disk)
+		blk_integrity_register(dm_disk(t->md),
+				       blk_get_integrity(template_disk));
+	else if (blk_integrity_is_initialized(dm_disk(t->md)))
 		DMWARN("%s: device no longer has a valid integrity profile",
 		       dm_device_name(t->md));
-		return;
-	}
-	blk_integrity_register(dm_disk(t->md),
-			       blk_get_integrity(template_disk));
+	else
+		DMWARN("%s: unable to establish an integrity profile",
+		       dm_device_name(t->md));
 }
 
 static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev,
@@ -1282,6 +1283,22 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned flush)
 	return 0;
 }
 
+static bool dm_table_discard_zeroes_data(struct dm_table *t)
+{
+	struct dm_target *ti;
+	unsigned i = 0;
+
+	/* Ensure that all targets supports discard_zeroes_data. */
+	while (i < dm_table_get_num_targets(t)) {
+		ti = dm_table_get_target(t, i++);
+
+		if (ti->discard_zeroes_data_unsupported)
+			return 0;
+	}
+
+	return 1;
+}
+
 void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 			       struct queue_limits *limits)
 {
@@ -1304,6 +1321,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	}
 	blk_queue_flush(q, flush);
 
+	if (!dm_table_discard_zeroes_data(t))
+		q->limits.discard_zeroes_data = 0;
+
 	dm_table_set_integrity(t);
 
 	/*
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 5404b2295820..5c95ccb59500 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -61,6 +61,11 @@
 static void autostart_arrays(int part);
 #endif
 
+/* pers_list is a list of registered personalities protected
+ * by pers_lock.
+ * pers_lock does extra service to protect accesses to
+ * mddev->thread when the mutex cannot be held.
+ */
 static LIST_HEAD(pers_list);
 static DEFINE_SPINLOCK(pers_lock);
 
@@ -739,7 +744,12 @@ static void mddev_unlock(mddev_t * mddev)
 	} else
 		mutex_unlock(&mddev->reconfig_mutex);
 
+	/* was we've dropped the mutex we need a spinlock to
+	 * make sur the thread doesn't disappear
+	 */
+	spin_lock(&pers_lock);
 	md_wakeup_thread(mddev->thread);
+	spin_unlock(&pers_lock);
 }
 
 static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
@@ -6429,11 +6439,18 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
 	return thread;
 }
 
-void md_unregister_thread(mdk_thread_t *thread)
+void md_unregister_thread(mdk_thread_t **threadp)
 {
+	mdk_thread_t *thread = *threadp;
 	if (!thread)
 		return;
 	dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
+	/* Locking ensures that mddev_unlock does not wake_up a
+	 * non-existent thread
+	 */
+	spin_lock(&pers_lock);
+	*threadp = NULL;
+	spin_unlock(&pers_lock);
 
 	kthread_stop(thread->tsk);
 	kfree(thread);
@@ -7340,8 +7357,7 @@ static void reap_sync_thread(mddev_t *mddev)
 	mdk_rdev_t *rdev;
 
 	/* resync has finished, collect result */
-	md_unregister_thread(mddev->sync_thread);
-	mddev->sync_thread = NULL;
+	md_unregister_thread(&mddev->sync_thread);
 	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
 	    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
 		/* success...*/
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 1e586bb4452e..0a309dc29b45 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -560,7 +560,7 @@ extern int register_md_personality(struct mdk_personality *p);
 extern int unregister_md_personality(struct mdk_personality *p);
 extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev),
 				mddev_t *mddev, const char *name);
-extern void md_unregister_thread(mdk_thread_t *thread);
+extern void md_unregister_thread(mdk_thread_t **threadp);
 extern void md_wakeup_thread(mdk_thread_t *thread);
 extern void md_check_recovery(mddev_t *mddev);
 extern void md_write_start(mddev_t *mddev, struct bio *bi);
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 3535c23af288..d5b5fb300171 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -514,8 +514,7 @@ static int multipath_stop (mddev_t *mddev)
 {
 	multipath_conf_t *conf = mddev->private;
 
-	md_unregister_thread(mddev->thread);
-	mddev->thread = NULL;
+	md_unregister_thread(&mddev->thread);
 	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
 	mempool_destroy(conf->pool);
 	kfree(conf->multipaths);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index f4622dd8fc59..d9587dffe533 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -2562,8 +2562,7 @@ static int stop(mddev_t *mddev)
 	raise_barrier(conf);
 	lower_barrier(conf);
 
-	md_unregister_thread(mddev->thread);
-	mddev->thread = NULL;
+	md_unregister_thread(&mddev->thread);
 	if (conf->r1bio_pool)
 		mempool_destroy(conf->r1bio_pool);
 	kfree(conf->mirrors);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index d7a8468ddeab..0cd9672cf9cb 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2955,7 +2955,7 @@ static int run(mddev_t *mddev)
 	return 0;
 
 out_free_conf:
-	md_unregister_thread(mddev->thread);
+	md_unregister_thread(&mddev->thread);
 	if (conf->r10bio_pool)
 		mempool_destroy(conf->r10bio_pool);
 	safe_put_page(conf->tmppage);
@@ -2973,8 +2973,7 @@ static int stop(mddev_t *mddev)
 	raise_barrier(conf, 0);
 	lower_barrier(conf);
 
-	md_unregister_thread(mddev->thread);
-	mddev->thread = NULL;
+	md_unregister_thread(&mddev->thread);
 	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
 	if (conf->r10bio_pool)
 		mempool_destroy(conf->r10bio_pool);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 43709fa6b6df..ac5e8b57e50f 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4941,8 +4941,7 @@ static int run(mddev_t *mddev)
 
 	return 0;
 abort:
-	md_unregister_thread(mddev->thread);
-	mddev->thread = NULL;
+	md_unregister_thread(&mddev->thread);
 	if (conf) {
 		print_raid5_conf(conf);
 		free_conf(conf);
@@ -4956,8 +4955,7 @@ static int stop(mddev_t *mddev)
 {
 	raid5_conf_t *conf = mddev->private;
 
-	md_unregister_thread(mddev->thread);
-	mddev->thread = NULL;
+	md_unregister_thread(&mddev->thread);
 	if (mddev->queue)
 		mddev->queue->backing_dev_info.congested_fn = NULL;
 	free_conf(conf);
diff --git a/drivers/media/video/omap/omap_vout.c b/drivers/media/video/omap/omap_vout.c
index b5ef36222440..b3a5ecdb33ac 100644
--- a/drivers/media/video/omap/omap_vout.c
+++ b/drivers/media/video/omap/omap_vout.c
@@ -2194,19 +2194,6 @@ static int __init omap_vout_probe(struct platform_device *pdev)
 					"'%s' Display already enabled\n",
 					def_display->name);
 			}
-			/* set the update mode */
-			if (def_display->caps &
-					OMAP_DSS_DISPLAY_CAP_MANUAL_UPDATE) {
-				if (dssdrv->enable_te)
-					dssdrv->enable_te(def_display, 0);
-				if (dssdrv->set_update_mode)
-					dssdrv->set_update_mode(def_display,
-							OMAP_DSS_UPDATE_MANUAL);
-			} else {
-				if (dssdrv->set_update_mode)
-					dssdrv->set_update_mode(def_display,
-							OMAP_DSS_UPDATE_AUTO);
-			}
 		}
 	}
 
diff --git a/drivers/media/video/omap3isp/ispccdc.c b/drivers/media/video/omap3isp/ispccdc.c
index 9d3459de04b2..80796eb0c53e 100644
--- a/drivers/media/video/omap3isp/ispccdc.c
+++ b/drivers/media/video/omap3isp/ispccdc.c
@@ -31,6 +31,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <media/v4l2-event.h>
 
 #include "isp.h"
diff --git a/drivers/media/video/uvc/uvc_driver.c b/drivers/media/video/uvc/uvc_driver.c
index d29f9c2d0854..e4100b1f68df 100644
--- a/drivers/media/video/uvc/uvc_driver.c
+++ b/drivers/media/video/uvc/uvc_driver.c
@@ -1961,7 +1961,7 @@ static int __uvc_resume(struct usb_interface *intf, int reset)
 
 	list_for_each_entry(stream, &dev->streams, list) {
 		if (stream->intf == intf)
-			return uvc_video_resume(stream);
+			return uvc_video_resume(stream, reset);
 	}
 
 	uvc_trace(UVC_TRACE_SUSPEND, "Resume: video streaming USB interface "
diff --git a/drivers/media/video/uvc/uvc_entity.c b/drivers/media/video/uvc/uvc_entity.c
index 48fea373c25a..29e239911d0e 100644
--- a/drivers/media/video/uvc/uvc_entity.c
+++ b/drivers/media/video/uvc/uvc_entity.c
@@ -49,7 +49,7 @@ static int uvc_mc_register_entity(struct uvc_video_chain *chain,
 		if (remote == NULL)
 			return -EINVAL;
 
-		source = (UVC_ENTITY_TYPE(remote) != UVC_TT_STREAMING)
+		source = (UVC_ENTITY_TYPE(remote) == UVC_TT_STREAMING)
 		       ? (remote->vdev ? &remote->vdev->entity : NULL)
 		       : &remote->subdev.entity;
 		if (source == NULL)
diff --git a/drivers/media/video/uvc/uvc_video.c b/drivers/media/video/uvc/uvc_video.c
index 8244167c8915..ffd1158628b6 100644
--- a/drivers/media/video/uvc/uvc_video.c
+++ b/drivers/media/video/uvc/uvc_video.c
@@ -1104,10 +1104,18 @@ int uvc_video_suspend(struct uvc_streaming *stream)
  * buffers, making sure userspace applications are notified of the problem
  * instead of waiting forever.
  */
-int uvc_video_resume(struct uvc_streaming *stream)
+int uvc_video_resume(struct uvc_streaming *stream, int reset)
 {
 	int ret;
 
+	/* If the bus has been reset on resume, set the alternate setting to 0.
+	 * This should be the default value, but some devices crash or otherwise
+	 * misbehave if they don't receive a SET_INTERFACE request before any
+	 * other video control request.
+	 */
+	if (reset)
+		usb_set_interface(stream->dev->udev, stream->intfnum, 0);
+
 	stream->frozen = 0;
 
 	ret = uvc_commit_video(stream, &stream->ctrl);
diff --git a/drivers/media/video/uvc/uvcvideo.h b/drivers/media/video/uvc/uvcvideo.h
index df32a43ca86a..cbdd49bf8b67 100644
--- a/drivers/media/video/uvc/uvcvideo.h
+++ b/drivers/media/video/uvc/uvcvideo.h
@@ -638,7 +638,7 @@ extern void uvc_mc_cleanup_entity(struct uvc_entity *entity);
 /* Video */
 extern int uvc_video_init(struct uvc_streaming *stream);
 extern int uvc_video_suspend(struct uvc_streaming *stream);
-extern int uvc_video_resume(struct uvc_streaming *stream);
+extern int uvc_video_resume(struct uvc_streaming *stream, int reset);
 extern int uvc_video_enable(struct uvc_streaming *stream, int enable);
 extern int uvc_probe_video(struct uvc_streaming *stream,
 		struct uvc_streaming_control *probe);
diff --git a/drivers/media/video/v4l2-dev.c b/drivers/media/video/v4l2-dev.c
index 06f14008b346..d72156517726 100644
--- a/drivers/media/video/v4l2-dev.c
+++ b/drivers/media/video/v4l2-dev.c
@@ -173,6 +173,17 @@ static void v4l2_device_release(struct device *cd)
 		media_device_unregister_entity(&vdev->entity);
 #endif
 
+	/* Do not call v4l2_device_put if there is no release callback set.
+	 * Drivers that have no v4l2_device release callback might free the
+	 * v4l2_dev instance in the video_device release callback below, so we
+	 * must perform this check here.
+	 *
+	 * TODO: In the long run all drivers that use v4l2_device should use the
+	 * v4l2_device release callback. This check will then be unnecessary.
+	 */
+	if (v4l2_dev->release == NULL)
+		v4l2_dev = NULL;
+
 	/* Release video_device and perform other
 	   cleanups as needed. */
 	vdev->release(vdev);
diff --git a/drivers/media/video/v4l2-device.c b/drivers/media/video/v4l2-device.c
index c72856c41434..e6a2c3b302d4 100644
--- a/drivers/media/video/v4l2-device.c
+++ b/drivers/media/video/v4l2-device.c
@@ -38,6 +38,7 @@ int v4l2_device_register(struct device *dev, struct v4l2_device *v4l2_dev)
 	mutex_init(&v4l2_dev->ioctl_lock);
 	v4l2_prio_init(&v4l2_dev->prio);
 	kref_init(&v4l2_dev->ref);
+	get_device(dev);
 	v4l2_dev->dev = dev;
 	if (dev == NULL) {
 		/* If dev == NULL, then name must be filled in by the caller */
@@ -93,6 +94,7 @@ void v4l2_device_disconnect(struct v4l2_device *v4l2_dev)
 
 	if (dev_get_drvdata(v4l2_dev->dev) == v4l2_dev)
 		dev_set_drvdata(v4l2_dev->dev, NULL);
+	put_device(v4l2_dev->dev);
 	v4l2_dev->dev = NULL;
 }
 EXPORT_SYMBOL_GPL(v4l2_device_disconnect);
diff --git a/drivers/mfd/jz4740-adc.c b/drivers/mfd/jz4740-adc.c
index 21131c7b0f1e..563654c9b19e 100644
--- a/drivers/mfd/jz4740-adc.c
+++ b/drivers/mfd/jz4740-adc.c
@@ -273,7 +273,7 @@ static int __devinit jz4740_adc_probe(struct platform_device *pdev)
 	ct->regs.ack = JZ_REG_ADC_STATUS;
 	ct->chip.irq_mask = irq_gc_mask_set_bit;
 	ct->chip.irq_unmask = irq_gc_mask_clr_bit;
-	ct->chip.irq_ack = irq_gc_ack;
+	ct->chip.irq_ack = irq_gc_ack_set_bit;
 
 	irq_setup_generic_chip(gc, IRQ_MSK(5), 0, 0, IRQ_NOPROBE | IRQ_LEVEL);
 
diff --git a/drivers/mfd/max8997.c b/drivers/mfd/max8997.c
index 5d1fca0277ef..f83103b8970d 100644
--- a/drivers/mfd/max8997.c
+++ b/drivers/mfd/max8997.c
@@ -135,10 +135,13 @@ static int max8997_i2c_probe(struct i2c_client *i2c,
 	max8997->dev = &i2c->dev;
 	max8997->i2c = i2c;
 	max8997->type = id->driver_data;
+	max8997->irq = i2c->irq;
 
 	if (!pdata)
 		goto err;
 
+	max8997->irq_base = pdata->irq_base;
+	max8997->ono = pdata->ono;
 	max8997->wakeup = pdata->wakeup;
 
 	mutex_init(&max8997->iolock);
@@ -152,6 +155,8 @@ static int max8997_i2c_probe(struct i2c_client *i2c,
 
 	pm_runtime_set_active(max8997->dev);
 
+	max8997_irq_init(max8997);
+
 	mfd_add_devices(max8997->dev, -1, max8997_devs,
 			ARRAY_SIZE(max8997_devs),
 			NULL, 0);
diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c
index 29601e7d606d..86e14583a082 100644
--- a/drivers/mfd/omap-usb-host.c
+++ b/drivers/mfd/omap-usb-host.c
@@ -17,6 +17,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
@@ -676,7 +677,6 @@ static void usbhs_omap_tll_init(struct device *dev, u8 tll_channel_count)
 				| OMAP_TLL_CHANNEL_CONF_ULPINOBITSTUFF
 				| OMAP_TLL_CHANNEL_CONF_ULPIDDRMODE);
 
-			reg |= (1 << (i + 1));
 		} else
 			continue;
 
diff --git a/drivers/mfd/tps65910-irq.c b/drivers/mfd/tps65910-irq.c
index 2bfad5c86cc7..a56be931551c 100644
--- a/drivers/mfd/tps65910-irq.c
+++ b/drivers/mfd/tps65910-irq.c
@@ -178,8 +178,10 @@ int tps65910_irq_init(struct tps65910 *tps65910, int irq,
 	switch (tps65910_chip_id(tps65910)) {
 	case TPS65910:
 		tps65910->irq_num = TPS65910_NUM_IRQ;
+		break;
 	case TPS65911:
 		tps65910->irq_num = TPS65911_NUM_IRQ;
+		break;
 	}
 
 	/* Register with genirq */
diff --git a/drivers/mfd/twl4030-madc.c b/drivers/mfd/twl4030-madc.c
index b5d598c3aa71..7cbf2aa9e64f 100644
--- a/drivers/mfd/twl4030-madc.c
+++ b/drivers/mfd/twl4030-madc.c
@@ -510,8 +510,9 @@ int twl4030_madc_conversion(struct twl4030_madc_request *req)
 	u8 ch_msb, ch_lsb;
 	int ret;
 
-	if (!req)
+	if (!req || !twl4030_madc)
 		return -EINVAL;
+
 	mutex_lock(&twl4030_madc->lock);
 	if (req->method < TWL4030_MADC_RT || req->method > TWL4030_MADC_SW2) {
 		ret = -EINVAL;
@@ -706,6 +707,8 @@ static int __devinit twl4030_madc_probe(struct platform_device *pdev)
 	if (!madc)
 		return -ENOMEM;
 
+	madc->dev = &pdev->dev;
+
 	/*
 	 * Phoenix provides 2 interrupt lines. The first one is connected to
 	 * the OMAP. The other one can be connected to the other processor such
diff --git a/drivers/mfd/wm8350-gpio.c b/drivers/mfd/wm8350-gpio.c
index ebf99bef392f..d584f6b4d6e2 100644
--- a/drivers/mfd/wm8350-gpio.c
+++ b/drivers/mfd/wm8350-gpio.c
@@ -37,7 +37,7 @@ static int gpio_set_dir(struct wm8350 *wm8350, int gpio, int dir)
 	return ret;
 }
 
-static int gpio_set_debounce(struct wm8350 *wm8350, int gpio, int db)
+static int wm8350_gpio_set_debounce(struct wm8350 *wm8350, int gpio, int db)
 {
 	if (db == WM8350_GPIO_DEBOUNCE_ON)
 		return wm8350_set_bits(wm8350, WM8350_GPIO_DEBOUNCE,
@@ -210,7 +210,7 @@ int wm8350_gpio_config(struct wm8350 *wm8350, int gpio, int dir, int func,
 		goto err;
 	if (gpio_set_polarity(wm8350, gpio, pol))
 		goto err;
-	if (gpio_set_debounce(wm8350, gpio, debounce))
+	if (wm8350_gpio_set_debounce(wm8350, gpio, debounce))
 		goto err;
 	if (gpio_set_dir(wm8350, gpio, dir))
 		goto err;
diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c
index b928bc14e97b..8b51cd62d067 100644
--- a/drivers/misc/lis3lv02d/lis3lv02d.c
+++ b/drivers/misc/lis3lv02d/lis3lv02d.c
@@ -375,12 +375,14 @@ void lis3lv02d_poweron(struct lis3lv02d *lis3)
 	 *      both have been read. So the value read will always be correct.
 	 * Set BOOT bit to refresh factory tuning values.
 	 */
-	lis3->read(lis3, CTRL_REG2, &reg);
-	if (lis3->whoami ==  WAI_12B)
-		reg |= CTRL2_BDU | CTRL2_BOOT;
-	else
-		reg |= CTRL2_BOOT_8B;
-	lis3->write(lis3, CTRL_REG2, reg);
+	if (lis3->pdata) {
+		lis3->read(lis3, CTRL_REG2, &reg);
+		if (lis3->whoami ==  WAI_12B)
+			reg |= CTRL2_BDU | CTRL2_BOOT;
+		else
+			reg |= CTRL2_BOOT_8B;
+		lis3->write(lis3, CTRL_REG2, reg);
+	}
 
 	/* LIS3 power on delay is quite long */
 	msleep(lis3->pwron_delay / lis3lv02d_get_odr());
diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c
index 06df1877ad0f..0b56e3f43573 100644
--- a/drivers/misc/pti.c
+++ b/drivers/misc/pti.c
@@ -165,6 +165,11 @@ static void pti_write_to_aperture(struct pti_masterchannel *mc,
 static void pti_control_frame_built_and_sent(struct pti_masterchannel *mc,
 					     const char *thread_name)
 {
+	/*
+	 * Since we access the comm member in current's task_struct, we only
+	 * need to be as large as what 'comm' in that structure is.
+	 */
+	char comm[TASK_COMM_LEN];
 	struct pti_masterchannel mccontrol = {.master = CONTROL_ID,
 					      .channel = 0};
 	const char *thread_name_p;
@@ -172,13 +177,6 @@ static void pti_control_frame_built_and_sent(struct pti_masterchannel *mc,
 	u8 control_frame[CONTROL_FRAME_LEN];
 
 	if (!thread_name) {
-		/*
-		 * Since we access the comm member in current's task_struct,
-		 * we only need to be as large as what 'comm' in that
-		 * structure is.
-		 */
-		char comm[TASK_COMM_LEN];
-
 		if (!in_interrupt())
 			get_task_comm(comm, current);
 		else
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 1ff5486213fb..4c1a648d00fc 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -926,6 +926,9 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
 	/*
 	 * Reliable writes are used to implement Forced Unit Access and
 	 * REQ_META accesses, and are supported only on MMCs.
+	 *
+	 * XXX: this really needs a good explanation of why REQ_META
+	 * is treated special.
 	 */
 	bool do_rel_wr = ((req->cmd_flags & REQ_FUA) ||
 			  (req->cmd_flags & REQ_META)) &&
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 8d0314dbd946..a44874e24f2a 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2535,7 +2535,7 @@ config S6GMAC
 source "drivers/net/stmmac/Kconfig"
 
 config PCH_GBE
-	tristate "Intel EG20T PCH / OKI SEMICONDUCTOR ML7223 IOH GbE"
+	tristate "Intel EG20T PCH/OKI SEMICONDUCTOR IOH(ML7223/ML7831) GbE"
 	depends on PCI
 	select MII
 	---help---
@@ -2548,10 +2548,11 @@ config PCH_GBE
 	  This driver enables Gigabit Ethernet function.
 
 	  This driver also can be used for OKI SEMICONDUCTOR IOH(Input/
-	  Output Hub), ML7223.
-	  ML7223 IOH is for MP(Media Phone) use.
-	  ML7223 is companion chip for Intel Atom E6xx series.
-	  ML7223 is completely compatible for Intel EG20T PCH.
+	  Output Hub), ML7223/ML7831.
+	  ML7223 IOH is for MP(Media Phone) use. ML7831 IOH is for general
+	  purpose use.
+	  ML7223/ML7831 is companion chip for Intel Atom E6xx series.
+	  ML7223/ML7831 is completely compatible for Intel EG20T PCH.
 
 config FTGMAC100
 	tristate "Faraday FTGMAC100 Gigabit Ethernet support"
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index c423504a755f..9a7eb3b36cf3 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -239,13 +239,19 @@ void bnx2x_int_disable(struct bnx2x *bp);
  *  FUNC_N_CLID_X = N * NUM_SPECIAL_CLIENTS + FUNC_0_CLID_X
  *
  */
-/* iSCSI L2 */
-#define BNX2X_ISCSI_ETH_CL_ID_IDX	1
-#define BNX2X_ISCSI_ETH_CID		49
+enum {
+	BNX2X_ISCSI_ETH_CL_ID_IDX,
+	BNX2X_FCOE_ETH_CL_ID_IDX,
+	BNX2X_MAX_CNIC_ETH_CL_ID_IDX,
+};
 
-/* FCoE L2 */
-#define BNX2X_FCOE_ETH_CL_ID_IDX	2
-#define BNX2X_FCOE_ETH_CID		50
+#define BNX2X_CNIC_START_ETH_CID	48
+enum {
+	/* iSCSI L2 */
+	BNX2X_ISCSI_ETH_CID = BNX2X_CNIC_START_ETH_CID,
+	/* FCoE L2 */
+	BNX2X_FCOE_ETH_CID,
+};
 
 /** Additional rings budgeting */
 #ifdef BCM_CNIC
@@ -315,6 +321,14 @@ union db_prod {
 	u32		raw;
 };
 
+/* dropless fc FW/HW related params */
+#define BRB_SIZE(bp)		(CHIP_IS_E3(bp) ? 1024 : 512)
+#define MAX_AGG_QS(bp)		(CHIP_IS_E1(bp) ? \
+					ETH_MAX_AGGREGATION_QUEUES_E1 :\
+					ETH_MAX_AGGREGATION_QUEUES_E1H_E2)
+#define FW_DROP_LEVEL(bp)	(3 + MAX_SPQ_PENDING + MAX_AGG_QS(bp))
+#define FW_PREFETCH_CNT		16
+#define DROPLESS_FC_HEADROOM	100
 
 /* MC hsi */
 #define BCM_PAGE_SHIFT		12
@@ -331,15 +345,35 @@ union db_prod {
 /* SGE ring related macros */
 #define NUM_RX_SGE_PAGES	2
 #define RX_SGE_CNT		(BCM_PAGE_SIZE / sizeof(struct eth_rx_sge))
-#define MAX_RX_SGE_CNT		(RX_SGE_CNT - 2)
+#define NEXT_PAGE_SGE_DESC_CNT	2
+#define MAX_RX_SGE_CNT		(RX_SGE_CNT - NEXT_PAGE_SGE_DESC_CNT)
 /* RX_SGE_CNT is promised to be a power of 2 */
 #define RX_SGE_MASK		(RX_SGE_CNT - 1)
 #define NUM_RX_SGE		(RX_SGE_CNT * NUM_RX_SGE_PAGES)
 #define MAX_RX_SGE		(NUM_RX_SGE - 1)
 #define NEXT_SGE_IDX(x)		((((x) & RX_SGE_MASK) == \
-				  (MAX_RX_SGE_CNT - 1)) ? (x) + 3 : (x) + 1)
+				  (MAX_RX_SGE_CNT - 1)) ? \
+					(x) + 1 + NEXT_PAGE_SGE_DESC_CNT : \
+					(x) + 1)
 #define RX_SGE(x)		((x) & MAX_RX_SGE)
 
+/*
+ * Number of required  SGEs is the sum of two:
+ * 1. Number of possible opened aggregations (next packet for
+ *    these aggregations will probably consume SGE immidiatelly)
+ * 2. Rest of BRB blocks divided by 2 (block will consume new SGE only
+ *    after placement on BD for new TPA aggregation)
+ *
+ * Takes into account NEXT_PAGE_SGE_DESC_CNT "next" elements on each page
+ */
+#define NUM_SGE_REQ		(MAX_AGG_QS(bp) + \
+					(BRB_SIZE(bp) - MAX_AGG_QS(bp)) / 2)
+#define NUM_SGE_PG_REQ		((NUM_SGE_REQ + MAX_RX_SGE_CNT - 1) / \
+						MAX_RX_SGE_CNT)
+#define SGE_TH_LO(bp)		(NUM_SGE_REQ + \
+				 NUM_SGE_PG_REQ * NEXT_PAGE_SGE_DESC_CNT)
+#define SGE_TH_HI(bp)		(SGE_TH_LO(bp) + DROPLESS_FC_HEADROOM)
+
 /* Manipulate a bit vector defined as an array of u64 */
 
 /* Number of bits in one sge_mask array element */
@@ -551,24 +585,43 @@ struct bnx2x_fastpath {
 
 #define NUM_TX_RINGS		16
 #define TX_DESC_CNT		(BCM_PAGE_SIZE / sizeof(union eth_tx_bd_types))
-#define MAX_TX_DESC_CNT		(TX_DESC_CNT - 1)
+#define NEXT_PAGE_TX_DESC_CNT	1
+#define MAX_TX_DESC_CNT		(TX_DESC_CNT - NEXT_PAGE_TX_DESC_CNT)
 #define NUM_TX_BD		(TX_DESC_CNT * NUM_TX_RINGS)
 #define MAX_TX_BD		(NUM_TX_BD - 1)
 #define MAX_TX_AVAIL		(MAX_TX_DESC_CNT * NUM_TX_RINGS - 2)
 #define NEXT_TX_IDX(x)		((((x) & MAX_TX_DESC_CNT) == \
-				  (MAX_TX_DESC_CNT - 1)) ? (x) + 2 : (x) + 1)
+				  (MAX_TX_DESC_CNT - 1)) ? \
+					(x) + 1 + NEXT_PAGE_TX_DESC_CNT : \
+					(x) + 1)
 #define TX_BD(x)		((x) & MAX_TX_BD)
 #define TX_BD_POFF(x)		((x) & MAX_TX_DESC_CNT)
 
 /* The RX BD ring is special, each bd is 8 bytes but the last one is 16 */
 #define NUM_RX_RINGS		8
 #define RX_DESC_CNT		(BCM_PAGE_SIZE / sizeof(struct eth_rx_bd))
-#define MAX_RX_DESC_CNT		(RX_DESC_CNT - 2)
+#define NEXT_PAGE_RX_DESC_CNT	2
+#define MAX_RX_DESC_CNT		(RX_DESC_CNT - NEXT_PAGE_RX_DESC_CNT)
 #define RX_DESC_MASK		(RX_DESC_CNT - 1)
 #define NUM_RX_BD		(RX_DESC_CNT * NUM_RX_RINGS)
 #define MAX_RX_BD		(NUM_RX_BD - 1)
 #define MAX_RX_AVAIL		(MAX_RX_DESC_CNT * NUM_RX_RINGS - 2)
-#define MIN_RX_AVAIL		128
+
+/* dropless fc calculations for BDs
+ *
+ * Number of BDs should as number of buffers in BRB:
+ * Low threshold takes into account NEXT_PAGE_RX_DESC_CNT
+ * "next" elements on each page
+ */
+#define NUM_BD_REQ		BRB_SIZE(bp)
+#define NUM_BD_PG_REQ		((NUM_BD_REQ + MAX_RX_DESC_CNT - 1) / \
+					      MAX_RX_DESC_CNT)
+#define BD_TH_LO(bp)		(NUM_BD_REQ + \
+				 NUM_BD_PG_REQ * NEXT_PAGE_RX_DESC_CNT + \
+				 FW_DROP_LEVEL(bp))
+#define BD_TH_HI(bp)		(BD_TH_LO(bp) + DROPLESS_FC_HEADROOM)
+
+#define MIN_RX_AVAIL		((bp)->dropless_fc ? BD_TH_HI(bp) + 128 : 128)
 
 #define MIN_RX_SIZE_TPA_HW	(CHIP_IS_E1(bp) ? \
 					ETH_MIN_RX_CQES_WITH_TPA_E1 : \
@@ -579,7 +632,9 @@ struct bnx2x_fastpath {
 								MIN_RX_AVAIL))
 
 #define NEXT_RX_IDX(x)		((((x) & RX_DESC_MASK) == \
-				  (MAX_RX_DESC_CNT - 1)) ? (x) + 3 : (x) + 1)
+				  (MAX_RX_DESC_CNT - 1)) ? \
+					(x) + 1 + NEXT_PAGE_RX_DESC_CNT : \
+					(x) + 1)
 #define RX_BD(x)		((x) & MAX_RX_BD)
 
 /*
@@ -589,14 +644,31 @@ struct bnx2x_fastpath {
 #define CQE_BD_REL	(sizeof(union eth_rx_cqe) / sizeof(struct eth_rx_bd))
 #define NUM_RCQ_RINGS		(NUM_RX_RINGS * CQE_BD_REL)
 #define RCQ_DESC_CNT		(BCM_PAGE_SIZE / sizeof(union eth_rx_cqe))
-#define MAX_RCQ_DESC_CNT	(RCQ_DESC_CNT - 1)
+#define NEXT_PAGE_RCQ_DESC_CNT	1
+#define MAX_RCQ_DESC_CNT	(RCQ_DESC_CNT - NEXT_PAGE_RCQ_DESC_CNT)
 #define NUM_RCQ_BD		(RCQ_DESC_CNT * NUM_RCQ_RINGS)
 #define MAX_RCQ_BD		(NUM_RCQ_BD - 1)
 #define MAX_RCQ_AVAIL		(MAX_RCQ_DESC_CNT * NUM_RCQ_RINGS - 2)
 #define NEXT_RCQ_IDX(x)		((((x) & MAX_RCQ_DESC_CNT) == \
-				  (MAX_RCQ_DESC_CNT - 1)) ? (x) + 2 : (x) + 1)
+				  (MAX_RCQ_DESC_CNT - 1)) ? \
+					(x) + 1 + NEXT_PAGE_RCQ_DESC_CNT : \
+					(x) + 1)
 #define RCQ_BD(x)		((x) & MAX_RCQ_BD)
 
+/* dropless fc calculations for RCQs
+ *
+ * Number of RCQs should be as number of buffers in BRB:
+ * Low threshold takes into account NEXT_PAGE_RCQ_DESC_CNT
+ * "next" elements on each page
+ */
+#define NUM_RCQ_REQ		BRB_SIZE(bp)
+#define NUM_RCQ_PG_REQ		((NUM_BD_REQ + MAX_RCQ_DESC_CNT - 1) / \
+					      MAX_RCQ_DESC_CNT)
+#define RCQ_TH_LO(bp)		(NUM_RCQ_REQ + \
+				 NUM_RCQ_PG_REQ * NEXT_PAGE_RCQ_DESC_CNT + \
+				 FW_DROP_LEVEL(bp))
+#define RCQ_TH_HI(bp)		(RCQ_TH_LO(bp) + DROPLESS_FC_HEADROOM)
+
 
 /* This is needed for determining of last_max */
 #define SUB_S16(a, b)		(s16)((s16)(a) - (s16)(b))
@@ -685,24 +757,17 @@ struct bnx2x_fastpath {
 #define FP_CSB_FUNC_OFF	\
 			offsetof(struct cstorm_status_block_c, func)
 
-#define HC_INDEX_TOE_RX_CQ_CONS		0 /* Formerly Ustorm TOE CQ index */
-					  /* (HC_INDEX_U_TOE_RX_CQ_CONS)  */
-#define HC_INDEX_ETH_RX_CQ_CONS		1 /* Formerly Ustorm ETH CQ index */
-					  /* (HC_INDEX_U_ETH_RX_CQ_CONS)  */
-#define HC_INDEX_ETH_RX_BD_CONS		2 /* Formerly Ustorm ETH BD index */
-					  /* (HC_INDEX_U_ETH_RX_BD_CONS)  */
-
-#define HC_INDEX_TOE_TX_CQ_CONS		4 /* Formerly Cstorm TOE CQ index   */
-					  /* (HC_INDEX_C_TOE_TX_CQ_CONS)    */
-#define HC_INDEX_ETH_TX_CQ_CONS_COS0	5 /* Formerly Cstorm ETH CQ index   */
-					  /* (HC_INDEX_C_ETH_TX_CQ_CONS)    */
-#define HC_INDEX_ETH_TX_CQ_CONS_COS1	6 /* Formerly Cstorm ETH CQ index   */
-					  /* (HC_INDEX_C_ETH_TX_CQ_CONS)    */
-#define HC_INDEX_ETH_TX_CQ_CONS_COS2	7 /* Formerly Cstorm ETH CQ index   */
-					  /* (HC_INDEX_C_ETH_TX_CQ_CONS)    */
+#define HC_INDEX_ETH_RX_CQ_CONS		1
 
-#define HC_INDEX_ETH_FIRST_TX_CQ_CONS	HC_INDEX_ETH_TX_CQ_CONS_COS0
+#define HC_INDEX_OOO_TX_CQ_CONS		4
+
+#define HC_INDEX_ETH_TX_CQ_CONS_COS0	5
+
+#define HC_INDEX_ETH_TX_CQ_CONS_COS1	6
 
+#define HC_INDEX_ETH_TX_CQ_CONS_COS2	7
+
+#define HC_INDEX_ETH_FIRST_TX_CQ_CONS	HC_INDEX_ETH_TX_CQ_CONS_COS0
 
 #define BNX2X_RX_SB_INDEX \
 	(&fp->sb_index_values[HC_INDEX_ETH_RX_CQ_CONS])
@@ -1100,11 +1165,12 @@ struct bnx2x {
 #define BP_PORT(bp)			(bp->pfid & 1)
 #define BP_FUNC(bp)			(bp->pfid)
 #define BP_ABS_FUNC(bp)			(bp->pf_num)
-#define BP_E1HVN(bp)			(bp->pfid >> 1)
-#define BP_VN(bp)			(BP_E1HVN(bp)) /*remove when approved*/
-#define BP_L_ID(bp)			(BP_E1HVN(bp) << 2)
-#define BP_FW_MB_IDX(bp)		(BP_PORT(bp) +\
-	  BP_VN(bp) * ((CHIP_IS_E1x(bp) || (CHIP_MODE_IS_4_PORT(bp))) ? 2  : 1))
+#define BP_VN(bp)			((bp)->pfid >> 1)
+#define BP_MAX_VN_NUM(bp)		(CHIP_MODE_IS_4_PORT(bp) ? 2 : 4)
+#define BP_L_ID(bp)			(BP_VN(bp) << 2)
+#define BP_FW_MB_IDX_VN(bp, vn)		(BP_PORT(bp) +\
+	  (vn) * ((CHIP_IS_E1x(bp) || (CHIP_MODE_IS_4_PORT(bp))) ? 2  : 1))
+#define BP_FW_MB_IDX(bp)		BP_FW_MB_IDX_VN(bp, BP_VN(bp))
 
 	struct net_device	*dev;
 	struct pci_dev		*pdev;
@@ -1767,7 +1833,7 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms,
 
 #define MAX_DMAE_C_PER_PORT		8
 #define INIT_DMAE_C(bp)			(BP_PORT(bp) * MAX_DMAE_C_PER_PORT + \
-					 BP_E1HVN(bp))
+					 BP_VN(bp))
 #define PMF_DMAE_C(bp)			(BP_PORT(bp) * MAX_DMAE_C_PER_PORT + \
 					 E1HVN_MAX)
 
@@ -1793,7 +1859,7 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms,
 
 /* must be used on a CID before placing it on a HW ring */
 #define HW_CID(bp, x)			((BP_PORT(bp) << 23) | \
-					 (BP_E1HVN(bp) << BNX2X_SWCID_SHIFT) | \
+					 (BP_VN(bp) << BNX2X_SWCID_SHIFT) | \
 					 (x))
 
 #define SP_DESC_CNT		(BCM_PAGE_SIZE / sizeof(struct eth_spe))
diff --git a/drivers/net/bnx2x/bnx2x_cmn.c b/drivers/net/bnx2x/bnx2x_cmn.c
index 37e5790681ad..c4cbf9736414 100644
--- a/drivers/net/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/bnx2x/bnx2x_cmn.c
@@ -987,8 +987,6 @@ void __bnx2x_link_report(struct bnx2x *bp)
 void bnx2x_init_rx_rings(struct bnx2x *bp)
 {
 	int func = BP_FUNC(bp);
-	int max_agg_queues = CHIP_IS_E1(bp) ? ETH_MAX_AGGREGATION_QUEUES_E1 :
-					      ETH_MAX_AGGREGATION_QUEUES_E1H_E2;
 	u16 ring_prod;
 	int i, j;
 
@@ -1001,7 +999,7 @@ void bnx2x_init_rx_rings(struct bnx2x *bp)
 
 		if (!fp->disable_tpa) {
 			/* Fill the per-aggregtion pool */
-			for (i = 0; i < max_agg_queues; i++) {
+			for (i = 0; i < MAX_AGG_QS(bp); i++) {
 				struct bnx2x_agg_info *tpa_info =
 					&fp->tpa_info[i];
 				struct sw_rx_bd *first_buf =
@@ -1041,7 +1039,7 @@ void bnx2x_init_rx_rings(struct bnx2x *bp)
 					bnx2x_free_rx_sge_range(bp, fp,
 								ring_prod);
 					bnx2x_free_tpa_pool(bp, fp,
-							    max_agg_queues);
+							    MAX_AGG_QS(bp));
 					fp->disable_tpa = 1;
 					ring_prod = 0;
 					break;
@@ -1137,9 +1135,7 @@ static void bnx2x_free_rx_skbs(struct bnx2x *bp)
 		bnx2x_free_rx_bds(fp);
 
 		if (!fp->disable_tpa)
-			bnx2x_free_tpa_pool(bp, fp, CHIP_IS_E1(bp) ?
-					    ETH_MAX_AGGREGATION_QUEUES_E1 :
-					    ETH_MAX_AGGREGATION_QUEUES_E1H_E2);
+			bnx2x_free_tpa_pool(bp, fp, MAX_AGG_QS(bp));
 	}
 }
 
@@ -3095,15 +3091,20 @@ static int bnx2x_alloc_fp_mem_at(struct bnx2x *bp, int index)
 	struct bnx2x_fastpath *fp = &bp->fp[index];
 	int ring_size = 0;
 	u8 cos;
+	int rx_ring_size = 0;
 
 	/* if rx_ring_size specified - use it */
-	int rx_ring_size = bp->rx_ring_size ? bp->rx_ring_size :
-			   MAX_RX_AVAIL/BNX2X_NUM_RX_QUEUES(bp);
+	if (!bp->rx_ring_size) {
 
-	/* allocate at least number of buffers required by FW */
-	rx_ring_size = max_t(int, bp->disable_tpa ? MIN_RX_SIZE_NONTPA :
-						    MIN_RX_SIZE_TPA,
-				  rx_ring_size);
+		rx_ring_size = MAX_RX_AVAIL/BNX2X_NUM_RX_QUEUES(bp);
+
+		/* allocate at least number of buffers required by FW */
+		rx_ring_size = max_t(int, bp->disable_tpa ? MIN_RX_SIZE_NONTPA :
+				     MIN_RX_SIZE_TPA, rx_ring_size);
+
+		bp->rx_ring_size = rx_ring_size;
+	} else
+		rx_ring_size = bp->rx_ring_size;
 
 	/* Common */
 	sb = &bnx2x_fp(bp, index, status_blk);
diff --git a/drivers/net/bnx2x/bnx2x_cmn.h b/drivers/net/bnx2x/bnx2x_cmn.h
index 223bfeebc597..2dc1199239d0 100644
--- a/drivers/net/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/bnx2x/bnx2x_cmn.h
@@ -1297,7 +1297,7 @@ static inline void bnx2x_init_txdata(struct bnx2x *bp,
 static inline u8 bnx2x_cnic_eth_cl_id(struct bnx2x *bp, u8 cl_idx)
 {
 	return bp->cnic_base_cl_id + cl_idx +
-		(bp->pf_num >> 1) * NON_ETH_CONTEXT_USE;
+		(bp->pf_num >> 1) * BNX2X_MAX_CNIC_ETH_CL_ID_IDX;
 }
 
 static inline u8 bnx2x_cnic_fw_sb_id(struct bnx2x *bp)
diff --git a/drivers/net/bnx2x/bnx2x_dcb.c b/drivers/net/bnx2x/bnx2x_dcb.c
index a1e004a82f7a..0b4acf67e0c6 100644
--- a/drivers/net/bnx2x/bnx2x_dcb.c
+++ b/drivers/net/bnx2x/bnx2x_dcb.c
@@ -2120,6 +2120,7 @@ static u8 bnx2x_dcbnl_get_cap(struct net_device *netdev, int capid, u8 *cap)
 			break;
 		case DCB_CAP_ATTR_DCBX:
 			*cap = BNX2X_DCBX_CAPS;
+			break;
 		default:
 			rval = -EINVAL;
 			break;
diff --git a/drivers/net/bnx2x/bnx2x_ethtool.c b/drivers/net/bnx2x/bnx2x_ethtool.c
index 221863059dae..cf3e47914dd7 100644
--- a/drivers/net/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/bnx2x/bnx2x_ethtool.c
@@ -363,13 +363,50 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 		}
 
 		/* advertise the requested speed and duplex if supported */
-		cmd->advertising &= bp->port.supported[cfg_idx];
+		if (cmd->advertising & ~(bp->port.supported[cfg_idx])) {
+			DP(NETIF_MSG_LINK, "Advertisement parameters "
+					   "are not supported\n");
+			return -EINVAL;
+		}
 
 		bp->link_params.req_line_speed[cfg_idx] = SPEED_AUTO_NEG;
-		bp->link_params.req_duplex[cfg_idx] = DUPLEX_FULL;
-		bp->port.advertising[cfg_idx] |= (ADVERTISED_Autoneg |
+		bp->link_params.req_duplex[cfg_idx] = cmd->duplex;
+		bp->port.advertising[cfg_idx] = (ADVERTISED_Autoneg |
 					 cmd->advertising);
+		if (cmd->advertising) {
+
+			bp->link_params.speed_cap_mask[cfg_idx] = 0;
+			if (cmd->advertising & ADVERTISED_10baseT_Half) {
+				bp->link_params.speed_cap_mask[cfg_idx] |=
+				PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_HALF;
+			}
+			if (cmd->advertising & ADVERTISED_10baseT_Full)
+				bp->link_params.speed_cap_mask[cfg_idx] |=
+				PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_FULL;
 
+			if (cmd->advertising & ADVERTISED_100baseT_Full)
+				bp->link_params.speed_cap_mask[cfg_idx] |=
+				PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_FULL;
+
+			if (cmd->advertising & ADVERTISED_100baseT_Half) {
+				bp->link_params.speed_cap_mask[cfg_idx] |=
+				     PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_HALF;
+			}
+			if (cmd->advertising & ADVERTISED_1000baseT_Half) {
+				bp->link_params.speed_cap_mask[cfg_idx] |=
+					PORT_HW_CFG_SPEED_CAPABILITY_D0_1G;
+			}
+			if (cmd->advertising & (ADVERTISED_1000baseT_Full |
+						ADVERTISED_1000baseKX_Full))
+				bp->link_params.speed_cap_mask[cfg_idx] |=
+					PORT_HW_CFG_SPEED_CAPABILITY_D0_1G;
+
+			if (cmd->advertising & (ADVERTISED_10000baseT_Full |
+						ADVERTISED_10000baseKX4_Full |
+						ADVERTISED_10000baseKR_Full))
+				bp->link_params.speed_cap_mask[cfg_idx] |=
+					PORT_HW_CFG_SPEED_CAPABILITY_D0_10G;
+		}
 	} else { /* forced speed */
 		/* advertise the requested speed and duplex if supported */
 		switch (speed) {
@@ -1310,10 +1347,7 @@ static void bnx2x_get_ringparam(struct net_device *dev,
 	if (bp->rx_ring_size)
 		ering->rx_pending = bp->rx_ring_size;
 	else
-		if (bp->state == BNX2X_STATE_OPEN && bp->num_queues)
-			ering->rx_pending = MAX_RX_AVAIL/bp->num_queues;
-		else
-			ering->rx_pending = MAX_RX_AVAIL;
+		ering->rx_pending = MAX_RX_AVAIL;
 
 	ering->rx_mini_pending = 0;
 	ering->rx_jumbo_pending = 0;
diff --git a/drivers/net/bnx2x/bnx2x_link.c b/drivers/net/bnx2x/bnx2x_link.c
index d45b1555a602..ba15bdc5a1a9 100644
--- a/drivers/net/bnx2x/bnx2x_link.c
+++ b/drivers/net/bnx2x/bnx2x_link.c
@@ -778,9 +778,9 @@ static int bnx2x_ets_e3b0_set_cos_bw(struct bnx2x *bp,
 {
 	u32 nig_reg_adress_crd_weight = 0;
 	u32 pbf_reg_adress_crd_weight = 0;
-	/* Calculate and set BW for this COS*/
-	const u32 cos_bw_nig = (bw * min_w_val_nig) / total_bw;
-	const u32 cos_bw_pbf = (bw * min_w_val_pbf) / total_bw;
+	/* Calculate and set BW for this COS - use 1 instead of 0 for BW */
+	const u32 cos_bw_nig = ((bw ? bw : 1) * min_w_val_nig) / total_bw;
+	const u32 cos_bw_pbf = ((bw ? bw : 1) * min_w_val_pbf) / total_bw;
 
 	switch (cos_entry) {
 	case 0:
@@ -852,18 +852,12 @@ static int bnx2x_ets_e3b0_get_total_bw(
 	/* Calculate total BW requested */
 	for (cos_idx = 0; cos_idx < ets_params->num_of_cos; cos_idx++) {
 		if (bnx2x_cos_state_bw == ets_params->cos[cos_idx].state) {
-
-			if (0 == ets_params->cos[cos_idx].params.bw_params.bw) {
-				DP(NETIF_MSG_LINK, "bnx2x_ets_E3B0_config BW"
-						   "was set to 0\n");
-			return -EINVAL;
+			*total_bw +=
+				ets_params->cos[cos_idx].params.bw_params.bw;
 		}
-		*total_bw +=
-		    ets_params->cos[cos_idx].params.bw_params.bw;
-	    }
 	}
 
-	/*Check taotl BW is valid */
+	/* Check total BW is valid */
 	if ((100 != *total_bw) || (0 == *total_bw)) {
 		if (0 == *total_bw) {
 			DP(NETIF_MSG_LINK, "bnx2x_ets_E3B0_config toatl BW"
@@ -1726,7 +1720,7 @@ static int bnx2x_xmac_enable(struct link_params *params,
 
 	/* Check loopback mode */
 	if (lb)
-		val |= XMAC_CTRL_REG_CORE_LOCAL_LPBK;
+		val |= XMAC_CTRL_REG_LINE_LOCAL_LPBK;
 	REG_WR(bp, xmac_base + XMAC_REG_CTRL, val);
 	bnx2x_set_xumac_nig(params,
 			    ((vars->flow_ctrl & BNX2X_FLOW_CTRL_TX) != 0), 1);
@@ -3630,6 +3624,12 @@ static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy,
 	bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
 			 MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT1, val16);
 
+	/* Advertised and set FEC (Forward Error Correction) */
+	bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
+			 MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT2,
+			 (MDIO_WC_REG_AN_IEEE1BLK_AN_ADV2_FEC_ABILITY |
+			  MDIO_WC_REG_AN_IEEE1BLK_AN_ADV2_FEC_REQ));
+
 	/* Enable CL37 BAM */
 	if (REG_RD(bp, params->shmem_base +
 		   offsetof(struct shmem_region, dev_info.
@@ -5924,7 +5924,7 @@ int bnx2x_set_led(struct link_params *params,
 					(tmp | EMAC_LED_OVERRIDE));
 				/*
 				 * return here without enabling traffic
-				 * LED blink andsetting rate in ON mode.
+				 * LED blink and setting rate in ON mode.
 				 * In oper mode, enabling LED blink
 				 * and setting rate is needed.
 				 */
@@ -5936,7 +5936,11 @@ int bnx2x_set_led(struct link_params *params,
 			 * This is a work-around for HW issue found when link
 			 * is up in CL73
 			 */
-			REG_WR(bp, NIG_REG_LED_10G_P0 + port*4, 1);
+			if ((!CHIP_IS_E3(bp)) ||
+			    (CHIP_IS_E3(bp) &&
+			     mode == LED_MODE_ON))
+				REG_WR(bp, NIG_REG_LED_10G_P0 + port*4, 1);
+
 			if (CHIP_IS_E1x(bp) ||
 			    CHIP_IS_E2(bp) ||
 			    (mode == LED_MODE_ON))
@@ -10638,8 +10642,7 @@ static struct bnx2x_phy phy_warpcore = {
 	.type		= PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT,
 	.addr		= 0xff,
 	.def_md_devad	= 0,
-	.flags		= (FLAGS_HW_LOCK_REQUIRED |
-			   FLAGS_TX_ERROR_CHECK),
+	.flags		= FLAGS_HW_LOCK_REQUIRED,
 	.rx_preemphasis	= {0xffff, 0xffff, 0xffff, 0xffff},
 	.tx_preemphasis	= {0xffff, 0xffff, 0xffff, 0xffff},
 	.mdio_ctrl	= 0,
@@ -10765,8 +10768,7 @@ static struct bnx2x_phy phy_8706 = {
 	.type		= PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8706,
 	.addr		= 0xff,
 	.def_md_devad	= 0,
-	.flags		= (FLAGS_INIT_XGXS_FIRST |
-			   FLAGS_TX_ERROR_CHECK),
+	.flags		= FLAGS_INIT_XGXS_FIRST,
 	.rx_preemphasis	= {0xffff, 0xffff, 0xffff, 0xffff},
 	.tx_preemphasis	= {0xffff, 0xffff, 0xffff, 0xffff},
 	.mdio_ctrl	= 0,
@@ -10797,8 +10799,7 @@ static struct bnx2x_phy phy_8726 = {
 	.addr		= 0xff,
 	.def_md_devad	= 0,
 	.flags		= (FLAGS_HW_LOCK_REQUIRED |
-			   FLAGS_INIT_XGXS_FIRST |
-			   FLAGS_TX_ERROR_CHECK),
+			   FLAGS_INIT_XGXS_FIRST),
 	.rx_preemphasis	= {0xffff, 0xffff, 0xffff, 0xffff},
 	.tx_preemphasis	= {0xffff, 0xffff, 0xffff, 0xffff},
 	.mdio_ctrl	= 0,
@@ -10829,8 +10830,7 @@ static struct bnx2x_phy phy_8727 = {
 	.type		= PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8727,
 	.addr		= 0xff,
 	.def_md_devad	= 0,
-	.flags		= (FLAGS_FAN_FAILURE_DET_REQ |
-			   FLAGS_TX_ERROR_CHECK),
+	.flags		= FLAGS_FAN_FAILURE_DET_REQ,
 	.rx_preemphasis	= {0xffff, 0xffff, 0xffff, 0xffff},
 	.tx_preemphasis	= {0xffff, 0xffff, 0xffff, 0xffff},
 	.mdio_ctrl	= 0,
diff --git a/drivers/net/bnx2x/bnx2x_main.c b/drivers/net/bnx2x/bnx2x_main.c
index f74582a22c68..15f800085bb2 100644
--- a/drivers/net/bnx2x/bnx2x_main.c
+++ b/drivers/net/bnx2x/bnx2x_main.c
@@ -407,8 +407,8 @@ u32 bnx2x_dmae_opcode(struct bnx2x *bp, u8 src_type, u8 dst_type,
 	opcode |= (DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET);
 
 	opcode |= (BP_PORT(bp) ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0);
-	opcode |= ((BP_E1HVN(bp) << DMAE_CMD_E1HVN_SHIFT) |
-		   (BP_E1HVN(bp) << DMAE_COMMAND_DST_VN_SHIFT));
+	opcode |= ((BP_VN(bp) << DMAE_CMD_E1HVN_SHIFT) |
+		   (BP_VN(bp) << DMAE_COMMAND_DST_VN_SHIFT));
 	opcode |= (DMAE_COM_SET_ERR << DMAE_COMMAND_ERR_POLICY_SHIFT);
 
 #ifdef __BIG_ENDIAN
@@ -1419,7 +1419,7 @@ static void bnx2x_hc_int_enable(struct bnx2x *bp)
 	if (!CHIP_IS_E1(bp)) {
 		/* init leading/trailing edge */
 		if (IS_MF(bp)) {
-			val = (0xee0f | (1 << (BP_E1HVN(bp) + 4)));
+			val = (0xee0f | (1 << (BP_VN(bp) + 4)));
 			if (bp->port.pmf)
 				/* enable nig and gpio3 attention */
 				val |= 0x1100;
@@ -1471,7 +1471,7 @@ static void bnx2x_igu_int_enable(struct bnx2x *bp)
 
 	/* init leading/trailing edge */
 	if (IS_MF(bp)) {
-		val = (0xee0f | (1 << (BP_E1HVN(bp) + 4)));
+		val = (0xee0f | (1 << (BP_VN(bp) + 4)));
 		if (bp->port.pmf)
 			/* enable nig and gpio3 attention */
 			val |= 0x1100;
@@ -2287,7 +2287,7 @@ static void bnx2x_calc_vn_weight_sum(struct bnx2x *bp)
 	int vn;
 
 	bp->vn_weight_sum = 0;
-	for (vn = VN_0; vn < E1HVN_MAX; vn++) {
+	for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++) {
 		u32 vn_cfg = bp->mf_config[vn];
 		u32 vn_min_rate = ((vn_cfg & FUNC_MF_CFG_MIN_BW_MASK) >>
 				   FUNC_MF_CFG_MIN_BW_SHIFT) * 100;
@@ -2320,12 +2320,18 @@ static void bnx2x_calc_vn_weight_sum(struct bnx2x *bp)
 					CMNG_FLAGS_PER_PORT_FAIRNESS_VN;
 }
 
+/* returns func by VN for current port */
+static inline int func_by_vn(struct bnx2x *bp, int vn)
+{
+	return 2 * vn + BP_PORT(bp);
+}
+
 static void bnx2x_init_vn_minmax(struct bnx2x *bp, int vn)
 {
 	struct rate_shaping_vars_per_vn m_rs_vn;
 	struct fairness_vars_per_vn m_fair_vn;
 	u32 vn_cfg = bp->mf_config[vn];
-	int func = 2*vn + BP_PORT(bp);
+	int func = func_by_vn(bp, vn);
 	u16 vn_min_rate, vn_max_rate;
 	int i;
 
@@ -2422,7 +2428,7 @@ void bnx2x_read_mf_cfg(struct bnx2x *bp)
 	 *
 	 *      and there are 2 functions per port
 	 */
-	for (vn = VN_0; vn < E1HVN_MAX; vn++) {
+	for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++) {
 		int /*abs*/func = n * (2 * vn + BP_PORT(bp)) + BP_PATH(bp);
 
 		if (func >= E1H_FUNC_MAX)
@@ -2454,7 +2460,7 @@ static void bnx2x_cmng_fns_init(struct bnx2x *bp, u8 read_cfg, u8 cmng_type)
 
 		/* calculate and set min-max rate for each vn */
 		if (bp->port.pmf)
-			for (vn = VN_0; vn < E1HVN_MAX; vn++)
+			for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++)
 				bnx2x_init_vn_minmax(bp, vn);
 
 		/* always enable rate shaping and fairness */
@@ -2473,16 +2479,15 @@ static void bnx2x_cmng_fns_init(struct bnx2x *bp, u8 read_cfg, u8 cmng_type)
 
 static inline void bnx2x_link_sync_notify(struct bnx2x *bp)
 {
-	int port = BP_PORT(bp);
 	int func;
 	int vn;
 
 	/* Set the attention towards other drivers on the same port */
-	for (vn = VN_0; vn < E1HVN_MAX; vn++) {
-		if (vn == BP_E1HVN(bp))
+	for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++) {
+		if (vn == BP_VN(bp))
 			continue;
 
-		func = ((vn << 1) | port);
+		func = func_by_vn(bp, vn);
 		REG_WR(bp, MISC_REG_AEU_GENERAL_ATTN_0 +
 		       (LINK_SYNC_ATTENTION_BIT_FUNC_0 + func)*4, 1);
 	}
@@ -2577,7 +2582,7 @@ static void bnx2x_pmf_update(struct bnx2x *bp)
 	bnx2x_dcbx_pmf_update(bp);
 
 	/* enable nig attention */
-	val = (0xff0f | (1 << (BP_E1HVN(bp) + 4)));
+	val = (0xff0f | (1 << (BP_VN(bp) + 4)));
 	if (bp->common.int_block == INT_BLOCK_HC) {
 		REG_WR(bp, HC_REG_TRAILING_EDGE_0 + port*8, val);
 		REG_WR(bp, HC_REG_LEADING_EDGE_0 + port*8, val);
@@ -2756,8 +2761,14 @@ static void bnx2x_pf_rx_q_prep(struct bnx2x *bp,
 	u16 tpa_agg_size = 0;
 
 	if (!fp->disable_tpa) {
-		pause->sge_th_hi = 250;
-		pause->sge_th_lo = 150;
+		pause->sge_th_lo = SGE_TH_LO(bp);
+		pause->sge_th_hi = SGE_TH_HI(bp);
+
+		/* validate SGE ring has enough to cross high threshold */
+		WARN_ON(bp->dropless_fc &&
+				pause->sge_th_hi + FW_PREFETCH_CNT >
+				MAX_RX_SGE_CNT * NUM_RX_SGE_PAGES);
+
 		tpa_agg_size = min_t(u32,
 			(min_t(u32, 8, MAX_SKB_FRAGS) *
 			SGE_PAGE_SIZE * PAGES_PER_SGE), 0xffff);
@@ -2771,10 +2782,21 @@ static void bnx2x_pf_rx_q_prep(struct bnx2x *bp,
 
 	/* pause - not for e1 */
 	if (!CHIP_IS_E1(bp)) {
-		pause->bd_th_hi = 350;
-		pause->bd_th_lo = 250;
-		pause->rcq_th_hi = 350;
-		pause->rcq_th_lo = 250;
+		pause->bd_th_lo = BD_TH_LO(bp);
+		pause->bd_th_hi = BD_TH_HI(bp);
+
+		pause->rcq_th_lo = RCQ_TH_LO(bp);
+		pause->rcq_th_hi = RCQ_TH_HI(bp);
+		/*
+		 * validate that rings have enough entries to cross
+		 * high thresholds
+		 */
+		WARN_ON(bp->dropless_fc &&
+				pause->bd_th_hi + FW_PREFETCH_CNT >
+				bp->rx_ring_size);
+		WARN_ON(bp->dropless_fc &&
+				pause->rcq_th_hi + FW_PREFETCH_CNT >
+				NUM_RCQ_RINGS * MAX_RCQ_DESC_CNT);
 
 		pause->pri_map = 1;
 	}
@@ -2802,9 +2824,7 @@ static void bnx2x_pf_rx_q_prep(struct bnx2x *bp,
 	 * For PF Clients it should be the maximum avaliable number.
 	 * VF driver(s) may want to define it to a smaller value.
 	 */
-	rxq_init->max_tpa_queues =
-		(CHIP_IS_E1(bp) ? ETH_MAX_AGGREGATION_QUEUES_E1 :
-		ETH_MAX_AGGREGATION_QUEUES_E1H_E2);
+	rxq_init->max_tpa_queues = MAX_AGG_QS(bp);
 
 	rxq_init->cache_line_log = BNX2X_RX_ALIGN_SHIFT;
 	rxq_init->fw_sb_id = fp->fw_sb_id;
@@ -4808,6 +4828,37 @@ void bnx2x_setup_ndsb_state_machine(struct hc_status_block_sm *hc_sm,
 	hc_sm->time_to_expire = 0xFFFFFFFF;
 }
 
+
+/* allocates state machine ids. */
+static inline
+void bnx2x_map_sb_state_machines(struct hc_index_data *index_data)
+{
+	/* zero out state machine indices */
+	/* rx indices */
+	index_data[HC_INDEX_ETH_RX_CQ_CONS].flags &= ~HC_INDEX_DATA_SM_ID;
+
+	/* tx indices */
+	index_data[HC_INDEX_OOO_TX_CQ_CONS].flags &= ~HC_INDEX_DATA_SM_ID;
+	index_data[HC_INDEX_ETH_TX_CQ_CONS_COS0].flags &= ~HC_INDEX_DATA_SM_ID;
+	index_data[HC_INDEX_ETH_TX_CQ_CONS_COS1].flags &= ~HC_INDEX_DATA_SM_ID;
+	index_data[HC_INDEX_ETH_TX_CQ_CONS_COS2].flags &= ~HC_INDEX_DATA_SM_ID;
+
+	/* map indices */
+	/* rx indices */
+	index_data[HC_INDEX_ETH_RX_CQ_CONS].flags |=
+		SM_RX_ID << HC_INDEX_DATA_SM_ID_SHIFT;
+
+	/* tx indices */
+	index_data[HC_INDEX_OOO_TX_CQ_CONS].flags |=
+		SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT;
+	index_data[HC_INDEX_ETH_TX_CQ_CONS_COS0].flags |=
+		SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT;
+	index_data[HC_INDEX_ETH_TX_CQ_CONS_COS1].flags |=
+		SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT;
+	index_data[HC_INDEX_ETH_TX_CQ_CONS_COS2].flags |=
+		SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT;
+}
+
 static void bnx2x_init_sb(struct bnx2x *bp, dma_addr_t mapping, int vfid,
 			  u8 vf_valid, int fw_sb_id, int igu_sb_id)
 {
@@ -4839,6 +4890,7 @@ static void bnx2x_init_sb(struct bnx2x *bp, dma_addr_t mapping, int vfid,
 		hc_sm_p = sb_data_e2.common.state_machine;
 		sb_data_p = (u32 *)&sb_data_e2;
 		data_size = sizeof(struct hc_status_block_data_e2)/sizeof(u32);
+		bnx2x_map_sb_state_machines(sb_data_e2.index_data);
 	} else {
 		memset(&sb_data_e1x, 0,
 		       sizeof(struct hc_status_block_data_e1x));
@@ -4853,6 +4905,7 @@ static void bnx2x_init_sb(struct bnx2x *bp, dma_addr_t mapping, int vfid,
 		hc_sm_p = sb_data_e1x.common.state_machine;
 		sb_data_p = (u32 *)&sb_data_e1x;
 		data_size = sizeof(struct hc_status_block_data_e1x)/sizeof(u32);
+		bnx2x_map_sb_state_machines(sb_data_e1x.index_data);
 	}
 
 	bnx2x_setup_ndsb_state_machine(&hc_sm_p[SM_RX_ID],
@@ -4890,7 +4943,7 @@ static void bnx2x_init_def_sb(struct bnx2x *bp)
 	int igu_seg_id;
 	int port = BP_PORT(bp);
 	int func = BP_FUNC(bp);
-	int reg_offset;
+	int reg_offset, reg_offset_en5;
 	u64 section;
 	int index;
 	struct hc_sp_status_block_data sp_sb_data;
@@ -4913,6 +4966,8 @@ static void bnx2x_init_def_sb(struct bnx2x *bp)
 
 	reg_offset = (port ? MISC_REG_AEU_ENABLE1_FUNC_1_OUT_0 :
 			     MISC_REG_AEU_ENABLE1_FUNC_0_OUT_0);
+	reg_offset_en5 = (port ? MISC_REG_AEU_ENABLE5_FUNC_1_OUT_0 :
+				 MISC_REG_AEU_ENABLE5_FUNC_0_OUT_0);
 	for (index = 0; index < MAX_DYNAMIC_ATTN_GRPS; index++) {
 		int sindex;
 		/* take care of sig[0]..sig[4] */
@@ -4927,7 +4982,7 @@ static void bnx2x_init_def_sb(struct bnx2x *bp)
 			 * and not 16 between the different groups
 			 */
 			bp->attn_group[index].sig[4] = REG_RD(bp,
-					reg_offset + 0x10 + 0x4*index);
+					reg_offset_en5 + 0x4*index);
 		else
 			bp->attn_group[index].sig[4] = 0;
 	}
@@ -5802,7 +5857,7 @@ static int bnx2x_init_hw_common(struct bnx2x *bp)
 	 * take the UNDI lock to protect undi_unload flow from accessing
 	 * registers while we're resetting the chip
 	 */
-	bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_UNDI);
+	bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_RESET);
 
 	bnx2x_reset_common(bp);
 	REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, 0xffffffff);
@@ -5814,7 +5869,7 @@ static int bnx2x_init_hw_common(struct bnx2x *bp)
 	}
 	REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET, val);
 
-	bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_UNDI);
+	bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_RESET);
 
 	bnx2x_init_block(bp, BLOCK_MISC, PHASE_COMMON);
 
@@ -6671,12 +6726,16 @@ static int bnx2x_init_hw_func(struct bnx2x *bp)
 			if (CHIP_MODE_IS_4_PORT(bp))
 				dsb_idx = BP_FUNC(bp);
 			else
-				dsb_idx = BP_E1HVN(bp);
+				dsb_idx = BP_VN(bp);
 
 			prod_offset = (CHIP_INT_MODE_IS_BC(bp) ?
 				       IGU_BC_BASE_DSB_PROD + dsb_idx :
 				       IGU_NORM_BASE_DSB_PROD + dsb_idx);
 
+			/*
+			 * igu prods come in chunks of E1HVN_MAX (4) -
+			 * does not matters what is the current chip mode
+			 */
 			for (i = 0; i < (num_segs * E1HVN_MAX);
 			     i += E1HVN_MAX) {
 				addr = IGU_REG_PROD_CONS_MEMORY +
@@ -7568,9 +7627,12 @@ u32 bnx2x_send_unload_req(struct bnx2x *bp, int unload_mode)
 		u32 emac_base = port ? GRCBASE_EMAC1 : GRCBASE_EMAC0;
 		u8 *mac_addr = bp->dev->dev_addr;
 		u32 val;
+		u16 pmc;
+
 		/* The mac address is written to entries 1-4 to
-		   preserve entry 0 which is used by the PMF */
-		u8 entry = (BP_E1HVN(bp) + 1)*8;
+		 * preserve entry 0 which is used by the PMF
+		 */
+		u8 entry = (BP_VN(bp) + 1)*8;
 
 		val = (mac_addr[0] << 8) | mac_addr[1];
 		EMAC_WR(bp, EMAC_REG_EMAC_MAC_MATCH + entry, val);
@@ -7579,6 +7641,11 @@ u32 bnx2x_send_unload_req(struct bnx2x *bp, int unload_mode)
 		      (mac_addr[4] << 8) | mac_addr[5];
 		EMAC_WR(bp, EMAC_REG_EMAC_MAC_MATCH + entry + 4, val);
 
+		/* Enable the PME and clear the status */
+		pci_read_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL, &pmc);
+		pmc |= PCI_PM_CTRL_PME_ENABLE | PCI_PM_CTRL_PME_STATUS;
+		pci_write_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL, pmc);
+
 		reset_code = DRV_MSG_CODE_UNLOAD_REQ_WOL_EN;
 
 	} else
@@ -8546,10 +8613,12 @@ static void __devinit bnx2x_undi_unload(struct bnx2x *bp)
 	/* Check if there is any driver already loaded */
 	val = REG_RD(bp, MISC_REG_UNPREPARED);
 	if (val == 0x1) {
-		/* Check if it is the UNDI driver
+
+		bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_RESET);
+		/*
+		 * Check if it is the UNDI driver
 		 * UNDI driver initializes CID offset for normal bell to 0x7
 		 */
-		bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_UNDI);
 		val = REG_RD(bp, DORQ_REG_NORM_CID_OFST);
 		if (val == 0x7) {
 			u32 reset_code = DRV_MSG_CODE_UNLOAD_REQ_WOL_DIS;
@@ -8587,9 +8656,6 @@ static void __devinit bnx2x_undi_unload(struct bnx2x *bp)
 				bnx2x_fw_command(bp, reset_code, 0);
 			}
 
-			/* now it's safe to release the lock */
-			bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_UNDI);
-
 			bnx2x_undi_int_disable(bp);
 			port = BP_PORT(bp);
 
@@ -8639,8 +8705,10 @@ static void __devinit bnx2x_undi_unload(struct bnx2x *bp)
 			bp->fw_seq =
 			      (SHMEM_RD(bp, func_mb[bp->pf_num].drv_mb_header) &
 				DRV_MSG_SEQ_NUMBER_MASK);
-		} else
-			bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_UNDI);
+		}
+
+		/* now it's safe to release the lock */
+		bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_RESET);
 	}
 }
 
@@ -8777,13 +8845,13 @@ static void __devinit bnx2x_get_common_hwinfo(struct bnx2x *bp)
 static void __devinit bnx2x_get_igu_cam_info(struct bnx2x *bp)
 {
 	int pfid = BP_FUNC(bp);
-	int vn = BP_E1HVN(bp);
 	int igu_sb_id;
 	u32 val;
 	u8 fid, igu_sb_cnt = 0;
 
 	bp->igu_base_sb = 0xff;
 	if (CHIP_INT_MODE_IS_BC(bp)) {
+		int vn = BP_VN(bp);
 		igu_sb_cnt = bp->igu_sb_cnt;
 		bp->igu_base_sb = (CHIP_MODE_IS_4_PORT(bp) ? pfid : vn) *
 			FP_SB_MAX_E1x;
@@ -9416,6 +9484,10 @@ static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp)
 		bp->igu_base_sb = 0;
 	} else {
 		bp->common.int_block = INT_BLOCK_IGU;
+
+		/* do not allow device reset during IGU info preocessing */
+		bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_RESET);
+
 		val = REG_RD(bp, IGU_REG_BLOCK_CONFIGURATION);
 
 		if (val & IGU_BLOCK_CONFIGURATION_REG_BACKWARD_COMP_EN) {
@@ -9447,6 +9519,7 @@ static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp)
 
 		bnx2x_get_igu_cam_info(bp);
 
+		bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_RESET);
 	}
 
 	/*
@@ -9473,7 +9546,7 @@ static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp)
 
 	bp->mf_ov = 0;
 	bp->mf_mode = 0;
-	vn = BP_E1HVN(bp);
+	vn = BP_VN(bp);
 
 	if (!CHIP_IS_E1(bp) && !BP_NOMCP(bp)) {
 		BNX2X_DEV_INFO("shmem2base 0x%x, size %d, mfcfg offset %d\n",
@@ -9593,13 +9666,6 @@ static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp)
 	/* port info */
 	bnx2x_get_port_hwinfo(bp);
 
-	if (!BP_NOMCP(bp)) {
-		bp->fw_seq =
-			(SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_mb_header) &
-			 DRV_MSG_SEQ_NUMBER_MASK);
-		BNX2X_DEV_INFO("fw_seq 0x%08x\n", bp->fw_seq);
-	}
-
 	/* Get MAC addresses */
 	bnx2x_get_mac_hwinfo(bp);
 
@@ -9765,6 +9831,14 @@ static int __devinit bnx2x_init_bp(struct bnx2x *bp)
 	if (!BP_NOMCP(bp))
 		bnx2x_undi_unload(bp);
 
+	/* init fw_seq after undi_unload! */
+	if (!BP_NOMCP(bp)) {
+		bp->fw_seq =
+			(SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_mb_header) &
+			 DRV_MSG_SEQ_NUMBER_MASK);
+		BNX2X_DEV_INFO("fw_seq 0x%08x\n", bp->fw_seq);
+	}
+
 	if (CHIP_REV_IS_FPGA(bp))
 		dev_err(&bp->pdev->dev, "FPGA detected\n");
 
@@ -10259,17 +10333,21 @@ static int __devinit bnx2x_init_dev(struct pci_dev *pdev,
 	/* clean indirect addresses */
 	pci_write_config_dword(bp->pdev, PCICFG_GRC_ADDRESS,
 			       PCICFG_VENDOR_ID_OFFSET);
-	/* Clean the following indirect addresses for all functions since it
+	/*
+	 * Clean the following indirect addresses for all functions since it
 	 * is not used by the driver.
 	 */
 	REG_WR(bp, PXP2_REG_PGL_ADDR_88_F0, 0);
 	REG_WR(bp, PXP2_REG_PGL_ADDR_8C_F0, 0);
 	REG_WR(bp, PXP2_REG_PGL_ADDR_90_F0, 0);
 	REG_WR(bp, PXP2_REG_PGL_ADDR_94_F0, 0);
-	REG_WR(bp, PXP2_REG_PGL_ADDR_88_F1, 0);
-	REG_WR(bp, PXP2_REG_PGL_ADDR_8C_F1, 0);
-	REG_WR(bp, PXP2_REG_PGL_ADDR_90_F1, 0);
-	REG_WR(bp, PXP2_REG_PGL_ADDR_94_F1, 0);
+
+	if (CHIP_IS_E1x(bp)) {
+		REG_WR(bp, PXP2_REG_PGL_ADDR_88_F1, 0);
+		REG_WR(bp, PXP2_REG_PGL_ADDR_8C_F1, 0);
+		REG_WR(bp, PXP2_REG_PGL_ADDR_90_F1, 0);
+		REG_WR(bp, PXP2_REG_PGL_ADDR_94_F1, 0);
+	}
 
 	/*
 	 * Enable internal target-read (in case we are probed after PF FLR).
diff --git a/drivers/net/bnx2x/bnx2x_reg.h b/drivers/net/bnx2x/bnx2x_reg.h
index 40266c14e6dc..fc7bd0f23c0b 100644
--- a/drivers/net/bnx2x/bnx2x_reg.h
+++ b/drivers/net/bnx2x/bnx2x_reg.h
@@ -1384,6 +1384,18 @@
    Latched ump_tx_parity; [31] MCP Latched scpad_parity; */
 #define MISC_REG_AEU_ENABLE4_PXP_0				 0xa108
 #define MISC_REG_AEU_ENABLE4_PXP_1				 0xa1a8
+/* [RW 32] fifth 32b for enabling the output for function 0 output0. Mapped
+ * as follows: [0] PGLUE config_space; [1] PGLUE misc_flr; [2] PGLUE B RBC
+ * attention [3] PGLUE B RBC parity; [4] ATC attention; [5] ATC parity; [6]
+ * mstat0 attention; [7] mstat0 parity; [8] mstat1 attention; [9] mstat1
+ * parity; [31-10] Reserved; */
+#define MISC_REG_AEU_ENABLE5_FUNC_0_OUT_0			 0xa688
+/* [RW 32] Fifth 32b for enabling the output for function 1 output0. Mapped
+ * as follows: [0] PGLUE config_space; [1] PGLUE misc_flr; [2] PGLUE B RBC
+ * attention [3] PGLUE B RBC parity; [4] ATC attention; [5] ATC parity; [6]
+ * mstat0 attention; [7] mstat0 parity; [8] mstat1 attention; [9] mstat1
+ * parity; [31-10] Reserved; */
+#define MISC_REG_AEU_ENABLE5_FUNC_1_OUT_0			 0xa6b0
 /* [RW 1] set/clr general attention 0; this will set/clr bit 94 in the aeu
    128 bit vector */
 #define MISC_REG_AEU_GENERAL_ATTN_0				 0xa000
@@ -5320,7 +5332,7 @@
 #define XCM_REG_XX_OVFL_EVNT_ID 				 0x20058
 #define XMAC_CLEAR_RX_LSS_STATUS_REG_CLEAR_LOCAL_FAULT_STATUS	 (0x1<<0)
 #define XMAC_CLEAR_RX_LSS_STATUS_REG_CLEAR_REMOTE_FAULT_STATUS	 (0x1<<1)
-#define XMAC_CTRL_REG_CORE_LOCAL_LPBK				 (0x1<<3)
+#define XMAC_CTRL_REG_LINE_LOCAL_LPBK				 (0x1<<2)
 #define XMAC_CTRL_REG_RX_EN					 (0x1<<1)
 #define XMAC_CTRL_REG_SOFT_RESET				 (0x1<<6)
 #define XMAC_CTRL_REG_TX_EN					 (0x1<<0)
@@ -5766,7 +5778,7 @@
 #define HW_LOCK_RESOURCE_RECOVERY_LEADER_0			 8
 #define HW_LOCK_RESOURCE_RECOVERY_LEADER_1			 9
 #define HW_LOCK_RESOURCE_SPIO					 2
-#define HW_LOCK_RESOURCE_UNDI					 5
+#define HW_LOCK_RESOURCE_RESET					 5
 #define AEU_INPUTS_ATTN_BITS_ATC_HW_INTERRUPT			 (0x1<<4)
 #define AEU_INPUTS_ATTN_BITS_ATC_PARITY_ERROR			 (0x1<<5)
 #define AEU_INPUTS_ATTN_BITS_BRB_PARITY_ERROR			 (0x1<<18)
@@ -6853,6 +6865,9 @@ Theotherbitsarereservedandshouldbezero*/
 #define MDIO_WC_REG_IEEE0BLK_AUTONEGNP			0x7
 #define MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT0	0x10
 #define MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT1	0x11
+#define MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT2	0x12
+#define MDIO_WC_REG_AN_IEEE1BLK_AN_ADV2_FEC_ABILITY	0x4000
+#define MDIO_WC_REG_AN_IEEE1BLK_AN_ADV2_FEC_REQ		0x8000
 #define MDIO_WC_REG_PMD_IEEE9BLK_TENGBASE_KR_PMD_CONTROL_REGISTER_150  0x96
 #define MDIO_WC_REG_XGXSBLK0_XGXSCONTROL		0x8000
 #define MDIO_WC_REG_XGXSBLK0_MISCCONTROL1		0x800e
diff --git a/drivers/net/bnx2x/bnx2x_stats.c b/drivers/net/bnx2x/bnx2x_stats.c
index 771f6803b238..9908f2bbcf73 100644
--- a/drivers/net/bnx2x/bnx2x_stats.c
+++ b/drivers/net/bnx2x/bnx2x_stats.c
@@ -710,7 +710,8 @@ static int bnx2x_hw_stats_update(struct bnx2x *bp)
 		break;
 
 	case MAC_TYPE_NONE: /* unreached */
-		BNX2X_ERR("stats updated by DMAE but no MAC active\n");
+		DP(BNX2X_MSG_STATS,
+		   "stats updated by DMAE but no MAC active\n");
 		return -1;
 
 	default: /* unreached */
@@ -1391,7 +1392,7 @@ static void bnx2x_port_stats_base_init(struct bnx2x *bp)
 
 static void bnx2x_func_stats_base_init(struct bnx2x *bp)
 {
-	int vn, vn_max = IS_MF(bp) ? E1HVN_MAX : E1VN_MAX;
+	int vn, vn_max = IS_MF(bp) ? BP_MAX_VN_NUM(bp) : E1VN_MAX;
 	u32 func_stx;
 
 	/* sanity */
@@ -1404,7 +1405,7 @@ static void bnx2x_func_stats_base_init(struct bnx2x *bp)
 	func_stx = bp->func_stx;
 
 	for (vn = VN_0; vn < vn_max; vn++) {
-		int mb_idx = CHIP_IS_E1x(bp) ? 2*vn + BP_PORT(bp) : vn;
+		int mb_idx = BP_FW_MB_IDX_VN(bp, vn);
 
 		bp->func_stx = SHMEM_RD(bp, func_mb[mb_idx].fw_mb_param);
 		bnx2x_func_stats_init(bp);
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index a047eb973e3b..47b928ed08f8 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2168,7 +2168,8 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
 	}
 
 re_arm:
-	queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks);
+	if (!bond->kill_timers)
+		queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks);
 out:
 	read_unlock(&bond->lock);
 }
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 7f8b20a34ee3..d4fbd2e62616 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -1440,7 +1440,8 @@ void bond_alb_monitor(struct work_struct *work)
 	}
 
 re_arm:
-	queue_delayed_work(bond->wq, &bond->alb_work, alb_delta_in_ticks);
+	if (!bond->kill_timers)
+		queue_delayed_work(bond->wq, &bond->alb_work, alb_delta_in_ticks);
 out:
 	read_unlock(&bond->lock);
 }
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 43f2ea541088..6d79b78cfc75 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -777,6 +777,9 @@ static void bond_resend_igmp_join_requests(struct bonding *bond)
 
 	read_lock(&bond->lock);
 
+	if (bond->kill_timers)
+		goto out;
+
 	/* rejoin all groups on bond device */
 	__bond_resend_igmp_join_requests(bond->dev);
 
@@ -790,9 +793,9 @@ static void bond_resend_igmp_join_requests(struct bonding *bond)
 			__bond_resend_igmp_join_requests(vlan_dev);
 	}
 
-	if (--bond->igmp_retrans > 0)
+	if ((--bond->igmp_retrans > 0) && !bond->kill_timers)
 		queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5);
-
+out:
 	read_unlock(&bond->lock);
 }
 
@@ -2538,7 +2541,7 @@ void bond_mii_monitor(struct work_struct *work)
 	}
 
 re_arm:
-	if (bond->params.miimon)
+	if (bond->params.miimon && !bond->kill_timers)
 		queue_delayed_work(bond->wq, &bond->mii_work,
 				   msecs_to_jiffies(bond->params.miimon));
 out:
@@ -2886,7 +2889,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
 	}
 
 re_arm:
-	if (bond->params.arp_interval)
+	if (bond->params.arp_interval && !bond->kill_timers)
 		queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
 out:
 	read_unlock(&bond->lock);
@@ -3154,7 +3157,7 @@ void bond_activebackup_arp_mon(struct work_struct *work)
 	bond_ab_arp_probe(bond);
 
 re_arm:
-	if (bond->params.arp_interval)
+	if (bond->params.arp_interval && !bond->kill_timers)
 		queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
 out:
 	read_unlock(&bond->lock);
diff --git a/drivers/net/can/mscan/mscan.c b/drivers/net/can/mscan/mscan.c
index 92feac68b66e..4cc6f44c2ba2 100644
--- a/drivers/net/can/mscan/mscan.c
+++ b/drivers/net/can/mscan/mscan.c
@@ -261,11 +261,13 @@ static netdev_tx_t mscan_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		void __iomem *data = &regs->tx.dsr1_0;
 		u16 *payload = (u16 *)frame->data;
 
-		/* It is safe to write into dsr[dlc+1] */
-		for (i = 0; i < (frame->can_dlc + 1) / 2; i++) {
+		for (i = 0; i < frame->can_dlc / 2; i++) {
 			out_be16(data, *payload++);
 			data += 2 + _MSCAN_RESERVED_DSR_SIZE;
 		}
+		/* write remaining byte if necessary */
+		if (frame->can_dlc & 1)
+			out_8(data, frame->data[frame->can_dlc - 1]);
 	}
 
 	out_8(&regs->tx.dlr, frame->can_dlc);
@@ -330,10 +332,13 @@ static void mscan_get_rx_frame(struct net_device *dev, struct can_frame *frame)
 		void __iomem *data = &regs->rx.dsr1_0;
 		u16 *payload = (u16 *)frame->data;
 
-		for (i = 0; i < (frame->can_dlc + 1) / 2; i++) {
+		for (i = 0; i < frame->can_dlc / 2; i++) {
 			*payload++ = in_be16(data);
 			data += 2 + _MSCAN_RESERVED_DSR_SIZE;
 		}
+		/* read remaining byte if necessary */
+		if (frame->can_dlc & 1)
+			frame->data[frame->can_dlc - 1] = in_8(data);
 	}
 
 	out_8(&regs->canrflg, MSCAN_RXF);
diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
index a81249246ece..2adc294f512a 100644
--- a/drivers/net/can/ti_hecc.c
+++ b/drivers/net/can/ti_hecc.c
@@ -46,6 +46,7 @@
 #include <linux/skbuff.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>
+#include <linux/io.h>
 
 #include <linux/can/dev.h>
 #include <linux/can/error.h>
diff --git a/drivers/net/cxgb3/cxgb3_offload.c b/drivers/net/cxgb3/cxgb3_offload.c
index 805076c54f1b..da5a5d9b8aff 100644
--- a/drivers/net/cxgb3/cxgb3_offload.c
+++ b/drivers/net/cxgb3/cxgb3_offload.c
@@ -1146,12 +1146,14 @@ static void cxgb_redirect(struct dst_entry *old, struct dst_entry *new)
 		if (te && te->ctx && te->client && te->client->redirect) {
 			update_tcb = te->client->redirect(te->ctx, old, new, e);
 			if (update_tcb) {
+				rcu_read_lock();
 				l2t_hold(L2DATA(tdev), e);
+				rcu_read_unlock();
 				set_l2t_ix(tdev, tid, e);
 			}
 		}
 	}
-	l2t_release(L2DATA(tdev), e);
+	l2t_release(tdev, e);
 }
 
 /*
@@ -1264,7 +1266,7 @@ int cxgb3_offload_activate(struct adapter *adapter)
 		goto out_free;
 
 	err = -ENOMEM;
-	L2DATA(dev) = t3_init_l2t(l2t_capacity);
+	RCU_INIT_POINTER(dev->l2opt, t3_init_l2t(l2t_capacity));
 	if (!L2DATA(dev))
 		goto out_free;
 
@@ -1298,16 +1300,24 @@ int cxgb3_offload_activate(struct adapter *adapter)
 
 out_free_l2t:
 	t3_free_l2t(L2DATA(dev));
-	L2DATA(dev) = NULL;
+	rcu_assign_pointer(dev->l2opt, NULL);
 out_free:
 	kfree(t);
 	return err;
 }
 
+static void clean_l2_data(struct rcu_head *head)
+{
+	struct l2t_data *d = container_of(head, struct l2t_data, rcu_head);
+	t3_free_l2t(d);
+}
+
+
 void cxgb3_offload_deactivate(struct adapter *adapter)
 {
 	struct t3cdev *tdev = &adapter->tdev;
 	struct t3c_data *t = T3C_DATA(tdev);
+	struct l2t_data *d;
 
 	remove_adapter(adapter);
 	if (list_empty(&adapter_list))
@@ -1315,8 +1325,11 @@ void cxgb3_offload_deactivate(struct adapter *adapter)
 
 	free_tid_maps(&t->tid_maps);
 	T3C_DATA(tdev) = NULL;
-	t3_free_l2t(L2DATA(tdev));
-	L2DATA(tdev) = NULL;
+	rcu_read_lock();
+	d = L2DATA(tdev);
+	rcu_read_unlock();
+	rcu_assign_pointer(tdev->l2opt, NULL);
+	call_rcu(&d->rcu_head, clean_l2_data);
 	if (t->nofail_skb)
 		kfree_skb(t->nofail_skb);
 	kfree(t);
diff --git a/drivers/net/cxgb3/l2t.c b/drivers/net/cxgb3/l2t.c
index f452c4003253..41540978a173 100644
--- a/drivers/net/cxgb3/l2t.c
+++ b/drivers/net/cxgb3/l2t.c
@@ -300,14 +300,21 @@ static inline void reuse_entry(struct l2t_entry *e, struct neighbour *neigh)
 struct l2t_entry *t3_l2t_get(struct t3cdev *cdev, struct neighbour *neigh,
 			     struct net_device *dev)
 {
-	struct l2t_entry *e;
-	struct l2t_data *d = L2DATA(cdev);
+	struct l2t_entry *e = NULL;
+	struct l2t_data *d;
+	int hash;
 	u32 addr = *(u32 *) neigh->primary_key;
 	int ifidx = neigh->dev->ifindex;
-	int hash = arp_hash(addr, ifidx, d);
 	struct port_info *p = netdev_priv(dev);
 	int smt_idx = p->port_id;
 
+	rcu_read_lock();
+	d = L2DATA(cdev);
+	if (!d)
+		goto done_rcu;
+
+	hash = arp_hash(addr, ifidx, d);
+
 	write_lock_bh(&d->lock);
 	for (e = d->l2tab[hash].first; e; e = e->next)
 		if (e->addr == addr && e->ifindex == ifidx &&
@@ -338,6 +345,8 @@ struct l2t_entry *t3_l2t_get(struct t3cdev *cdev, struct neighbour *neigh,
 	}
 done:
 	write_unlock_bh(&d->lock);
+done_rcu:
+	rcu_read_unlock();
 	return e;
 }
 
diff --git a/drivers/net/cxgb3/l2t.h b/drivers/net/cxgb3/l2t.h
index 7a12d52ed4fc..c5f54796e2cb 100644
--- a/drivers/net/cxgb3/l2t.h
+++ b/drivers/net/cxgb3/l2t.h
@@ -76,6 +76,7 @@ struct l2t_data {
 	atomic_t nfree;		/* number of free entries */
 	rwlock_t lock;
 	struct l2t_entry l2tab[0];
+	struct rcu_head rcu_head;	/* to handle rcu cleanup */
 };
 
 typedef void (*arp_failure_handler_func)(struct t3cdev * dev,
@@ -99,7 +100,7 @@ static inline void set_arp_failure_handler(struct sk_buff *skb,
 /*
  * Getting to the L2 data from an offload device.
  */
-#define L2DATA(dev) ((dev)->l2opt)
+#define L2DATA(cdev) (rcu_dereference((cdev)->l2opt))
 
 #define W_TCB_L2T_IX    0
 #define S_TCB_L2T_IX    7
@@ -126,15 +127,22 @@ static inline int l2t_send(struct t3cdev *dev, struct sk_buff *skb,
 	return t3_l2t_send_slow(dev, skb, e);
 }
 
-static inline void l2t_release(struct l2t_data *d, struct l2t_entry *e)
+static inline void l2t_release(struct t3cdev *t, struct l2t_entry *e)
 {
-	if (atomic_dec_and_test(&e->refcnt))
+	struct l2t_data *d;
+
+	rcu_read_lock();
+	d = L2DATA(t);
+
+	if (atomic_dec_and_test(&e->refcnt) && d)
 		t3_l2e_free(d, e);
+
+	rcu_read_unlock();
 }
 
 static inline void l2t_hold(struct l2t_data *d, struct l2t_entry *e)
 {
-	if (atomic_add_return(1, &e->refcnt) == 1)	/* 0 -> 1 transition */
+	if (d && atomic_add_return(1, &e->refcnt) == 1)	/* 0 -> 1 transition */
 		atomic_dec(&d->nfree);
 }
 
diff --git a/drivers/net/cxgb4/cxgb4_main.c b/drivers/net/cxgb4/cxgb4_main.c
index c9957b7f17b5..b4efa292fd6f 100644
--- a/drivers/net/cxgb4/cxgb4_main.c
+++ b/drivers/net/cxgb4/cxgb4_main.c
@@ -3712,6 +3712,9 @@ static int __devinit init_one(struct pci_dev *pdev,
 		setup_debugfs(adapter);
 	}
 
+	/* PCIe EEH recovery on powerpc platforms needs fundamental reset */
+	pdev->needs_freset = 1;
+
 	if (is_offload(adapter))
 		attach_ulds(adapter);
 
diff --git a/drivers/net/e1000/e1000_hw.c b/drivers/net/e1000/e1000_hw.c
index 8545c7aa93eb..a5a89ecb6f36 100644
--- a/drivers/net/e1000/e1000_hw.c
+++ b/drivers/net/e1000/e1000_hw.c
@@ -4026,6 +4026,12 @@ s32 e1000_validate_eeprom_checksum(struct e1000_hw *hw)
 		checksum += eeprom_data;
 	}
 
+#ifdef CONFIG_PARISC
+	/* This is a signature and not a checksum on HP c8000 */
+	if ((hw->subsystem_vendor_id == 0x103C) && (eeprom_data == 0x16d6))
+		return E1000_SUCCESS;
+
+#endif
 	if (checksum == (u16) EEPROM_SUM)
 		return E1000_SUCCESS;
 	else {
diff --git a/drivers/net/gianfar_ethtool.c b/drivers/net/gianfar_ethtool.c
index 25a8c2adb001..0caf3c323ec0 100644
--- a/drivers/net/gianfar_ethtool.c
+++ b/drivers/net/gianfar_ethtool.c
@@ -1669,10 +1669,10 @@ static int gfar_get_cls_all(struct gfar_private *priv,
 	u32 i = 0;
 
 	list_for_each_entry(comp, &priv->rx_list.list, list) {
-		if (i <= cmd->rule_cnt) {
-			rule_locs[i] = comp->fs.location;
-			i++;
-		}
+		if (i == cmd->rule_cnt)
+			return -EMSGSIZE;
+		rule_locs[i] = comp->fs.location;
+		i++;
 	}
 
 	cmd->data = MAX_FILER_IDX;
diff --git a/drivers/net/greth.c b/drivers/net/greth.c
index 16ce45c11934..52a39000c42c 100644
--- a/drivers/net/greth.c
+++ b/drivers/net/greth.c
@@ -428,6 +428,7 @@ greth_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	dma_sync_single_for_device(greth->dev, dma_addr, skb->len, DMA_TO_DEVICE);
 
 	status = GRETH_BD_EN | GRETH_BD_IE | (skb->len & GRETH_BD_LEN);
+	greth->tx_bufs_length[greth->tx_next] = skb->len & GRETH_BD_LEN;
 
 	/* Wrap around descriptor ring */
 	if (greth->tx_next == GRETH_TXBD_NUM_MASK) {
@@ -490,7 +491,8 @@ greth_start_xmit_gbit(struct sk_buff *skb, struct net_device *dev)
 	if (nr_frags != 0)
 		status = GRETH_TXBD_MORE;
 
-	status |= GRETH_TXBD_CSALL;
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		status |= GRETH_TXBD_CSALL;
 	status |= skb_headlen(skb) & GRETH_BD_LEN;
 	if (greth->tx_next == GRETH_TXBD_NUM_MASK)
 		status |= GRETH_BD_WR;
@@ -513,7 +515,9 @@ greth_start_xmit_gbit(struct sk_buff *skb, struct net_device *dev)
 		greth->tx_skbuff[curr_tx] = NULL;
 		bdp = greth->tx_bd_base + curr_tx;
 
-		status = GRETH_TXBD_CSALL | GRETH_BD_EN;
+		status = GRETH_BD_EN;
+		if (skb->ip_summed == CHECKSUM_PARTIAL)
+			status |= GRETH_TXBD_CSALL;
 		status |= frag->size & GRETH_BD_LEN;
 
 		/* Wrap around descriptor ring */
@@ -641,6 +645,7 @@ static void greth_clean_tx(struct net_device *dev)
 				dev->stats.tx_fifo_errors++;
 		}
 		dev->stats.tx_packets++;
+		dev->stats.tx_bytes += greth->tx_bufs_length[greth->tx_last];
 		greth->tx_last = NEXT_TX(greth->tx_last);
 		greth->tx_free++;
 	}
@@ -695,6 +700,7 @@ static void greth_clean_tx_gbit(struct net_device *dev)
 		greth->tx_skbuff[greth->tx_last] = NULL;
 
 		greth_update_tx_stats(dev, stat);
+		dev->stats.tx_bytes += skb->len;
 
 		bdp = greth->tx_bd_base + greth->tx_last;
 
@@ -796,6 +802,7 @@ static int greth_rx(struct net_device *dev, int limit)
 				memcpy(skb_put(skb, pkt_len), phys_to_virt(dma_addr), pkt_len);
 
 				skb->protocol = eth_type_trans(skb, dev);
+				dev->stats.rx_bytes += pkt_len;
 				dev->stats.rx_packets++;
 				netif_receive_skb(skb);
 			}
@@ -910,6 +917,7 @@ static int greth_rx_gbit(struct net_device *dev, int limit)
 
 				skb->protocol = eth_type_trans(skb, dev);
 				dev->stats.rx_packets++;
+				dev->stats.rx_bytes += pkt_len;
 				netif_receive_skb(skb);
 
 				greth->rx_skbuff[greth->rx_cur] = newskb;
diff --git a/drivers/net/greth.h b/drivers/net/greth.h
index 9a0040dee4da..232a622a85b7 100644
--- a/drivers/net/greth.h
+++ b/drivers/net/greth.h
@@ -103,6 +103,7 @@ struct greth_private {
 
 	unsigned char *tx_bufs[GRETH_TXBD_NUM];
 	unsigned char *rx_bufs[GRETH_RXBD_NUM];
+	u16 tx_bufs_length[GRETH_TXBD_NUM];
 
 	u16 tx_next;
 	u16 tx_last;
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index 3e6679269400..d393f1e764ed 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -636,8 +636,8 @@ static int ibmveth_open(struct net_device *netdev)
 		netdev_err(netdev, "unable to request irq 0x%x, rc %d\n",
 			   netdev->irq, rc);
 		do {
-			rc = h_free_logical_lan(adapter->vdev->unit_address);
-		} while (H_IS_LONG_BUSY(rc) || (rc == H_BUSY));
+			lpar_rc = h_free_logical_lan(adapter->vdev->unit_address);
+		} while (H_IS_LONG_BUSY(lpar_rc) || (lpar_rc == H_BUSY));
 
 		goto err_out;
 	}
@@ -757,7 +757,7 @@ static int ibmveth_set_csum_offload(struct net_device *dev, u32 data)
 	struct ibmveth_adapter *adapter = netdev_priv(dev);
 	unsigned long set_attr, clr_attr, ret_attr;
 	unsigned long set_attr6, clr_attr6;
-	long ret, ret6;
+	long ret, ret4, ret6;
 	int rc1 = 0, rc2 = 0;
 	int restart = 0;
 
@@ -770,6 +770,8 @@ static int ibmveth_set_csum_offload(struct net_device *dev, u32 data)
 
 	set_attr = 0;
 	clr_attr = 0;
+	set_attr6 = 0;
+	clr_attr6 = 0;
 
 	if (data) {
 		set_attr = IBMVETH_ILLAN_IPV4_TCP_CSUM;
@@ -784,16 +786,20 @@ static int ibmveth_set_csum_offload(struct net_device *dev, u32 data)
 	if (ret == H_SUCCESS && !(ret_attr & IBMVETH_ILLAN_ACTIVE_TRUNK) &&
 	    !(ret_attr & IBMVETH_ILLAN_TRUNK_PRI_MASK) &&
 	    (ret_attr & IBMVETH_ILLAN_PADDED_PKT_CSUM)) {
-		ret = h_illan_attributes(adapter->vdev->unit_address, clr_attr,
+		ret4 = h_illan_attributes(adapter->vdev->unit_address, clr_attr,
 					 set_attr, &ret_attr);
 
-		if (ret != H_SUCCESS) {
+		if (ret4 != H_SUCCESS) {
 			netdev_err(dev, "unable to change IPv4 checksum "
 					"offload settings. %d rc=%ld\n",
-					data, ret);
+					data, ret4);
+
+			h_illan_attributes(adapter->vdev->unit_address,
+					   set_attr, clr_attr, &ret_attr);
+
+			if (data == 1)
+				dev->features &= ~NETIF_F_IP_CSUM;
 
-			ret = h_illan_attributes(adapter->vdev->unit_address,
-						 set_attr, clr_attr, &ret_attr);
 		} else {
 			adapter->fw_ipv4_csum_support = data;
 		}
@@ -804,15 +810,18 @@ static int ibmveth_set_csum_offload(struct net_device *dev, u32 data)
 		if (ret6 != H_SUCCESS) {
 			netdev_err(dev, "unable to change IPv6 checksum "
 					"offload settings. %d rc=%ld\n",
-					data, ret);
+					data, ret6);
+
+			h_illan_attributes(adapter->vdev->unit_address,
+					   set_attr6, clr_attr6, &ret_attr);
+
+			if (data == 1)
+				dev->features &= ~NETIF_F_IPV6_CSUM;
 
-			ret = h_illan_attributes(adapter->vdev->unit_address,
-						 set_attr6, clr_attr6,
-						 &ret_attr);
 		} else
 			adapter->fw_ipv6_csum_support = data;
 
-		if (ret != H_SUCCESS || ret6 != H_SUCCESS)
+		if (ret4 == H_SUCCESS || ret6 == H_SUCCESS)
 			adapter->rx_csum = data;
 		else
 			rc1 = -EIO;
@@ -930,6 +939,7 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb,
 	union ibmveth_buf_desc descs[6];
 	int last, i;
 	int force_bounce = 0;
+	dma_addr_t dma_addr;
 
 	/*
 	 * veth handles a maximum of 6 segments including the header, so
@@ -994,17 +1004,16 @@ retry_bounce:
 	}
 
 	/* Map the header */
-	descs[0].fields.address = dma_map_single(&adapter->vdev->dev, skb->data,
-						 skb_headlen(skb),
-						 DMA_TO_DEVICE);
-	if (dma_mapping_error(&adapter->vdev->dev, descs[0].fields.address))
+	dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
+				  skb_headlen(skb), DMA_TO_DEVICE);
+	if (dma_mapping_error(&adapter->vdev->dev, dma_addr))
 		goto map_failed;
 
 	descs[0].fields.flags_len = desc_flags | skb_headlen(skb);
+	descs[0].fields.address = dma_addr;
 
 	/* Map the frags */
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		unsigned long dma_addr;
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
 		dma_addr = dma_map_page(&adapter->vdev->dev, frag->page,
@@ -1026,7 +1035,12 @@ retry_bounce:
 		netdev->stats.tx_bytes += skb->len;
 	}
 
-	for (i = 0; i < skb_shinfo(skb)->nr_frags + 1; i++)
+	dma_unmap_single(&adapter->vdev->dev,
+			 descs[0].fields.address,
+			 descs[0].fields.flags_len & IBMVETH_BUF_LEN_MASK,
+			 DMA_TO_DEVICE);
+
+	for (i = 1; i < skb_shinfo(skb)->nr_frags + 1; i++)
 		dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address,
 			       descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK,
 			       DMA_TO_DEVICE);
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 22790394318a..e1fcc9589278 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -1321,8 +1321,8 @@ static void ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 		if (ring_is_rsc_enabled(rx_ring))
 			pkt_is_rsc = ixgbe_get_rsc_state(rx_desc);
 
-		/* if this is a skb from previous receive DMA will be 0 */
-		if (rx_buffer_info->dma) {
+		/* linear means we are building an skb from multiple pages */
+		if (!skb_is_nonlinear(skb)) {
 			u16 hlen;
 			if (pkt_is_rsc &&
 			    !(staterr & IXGBE_RXD_STAT_EOP) &&
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 05172c39a0ce..376e3e94bae0 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -239,7 +239,7 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
 		dest = macvlan_hash_lookup(port, eth->h_dest);
 		if (dest && dest->mode == MACVLAN_MODE_BRIDGE) {
 			/* send to lowerdev first for its network taps */
-			vlan->forward(vlan->lowerdev, skb);
+			dev_forward_skb(vlan->lowerdev, skb);
 
 			return NET_XMIT_SUCCESS;
 		}
diff --git a/drivers/net/mlx4/en_tx.c b/drivers/net/mlx4/en_tx.c
index 6e03de034ac7..f76ab6bf3096 100644
--- a/drivers/net/mlx4/en_tx.c
+++ b/drivers/net/mlx4/en_tx.c
@@ -172,7 +172,7 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
 	memset(ring->buf, 0, ring->buf_size);
 
 	ring->qp_state = MLX4_QP_STATE_RST;
-	ring->doorbell_qpn = swab32(ring->qp.qpn << 8);
+	ring->doorbell_qpn = ring->qp.qpn << 8;
 
 	mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn,
 				ring->cqn, &ring->context);
@@ -791,7 +791,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 		skb_orphan(skb);
 
 	if (ring->bf_enabled && desc_size <= MAX_BF && !bounce && !vlan_tag) {
-		*(u32 *) (&tx_desc->ctrl.vlan_tag) |= ring->doorbell_qpn;
+		*(__be32 *) (&tx_desc->ctrl.vlan_tag) |= cpu_to_be32(ring->doorbell_qpn);
 		op_own |= htonl((bf_index & 0xffff) << 8);
 		/* Ensure new descirptor hits memory
 		* before setting ownership of this descriptor to HW */
@@ -812,7 +812,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 		wmb();
 		tx_desc->ctrl.owner_opcode = op_own;
 		wmb();
-		writel(ring->doorbell_qpn, ring->bf.uar->map + MLX4_SEND_DOORBELL);
+		iowrite32be(ring->doorbell_qpn, ring->bf.uar->map + MLX4_SEND_DOORBELL);
 	}
 
 	/* Poll CQ here */
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index dfc82720065a..ed2a3977c6e7 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -799,5 +799,11 @@ static void __exit cleanup_netconsole(void)
 	}
 }
 
-module_init(init_netconsole);
+/*
+ * Use late_initcall to ensure netconsole is
+ * initialized after network device driver if built-in.
+ *
+ * late_initcall() and module_init() are identical if built as module.
+ */
+late_initcall(init_netconsole);
 module_exit(cleanup_netconsole);
diff --git a/drivers/net/pch_gbe/pch_gbe.h b/drivers/net/pch_gbe/pch_gbe.h
index 59fac77d0dbb..a09a07197eb5 100644
--- a/drivers/net/pch_gbe/pch_gbe.h
+++ b/drivers/net/pch_gbe/pch_gbe.h
@@ -127,8 +127,8 @@ struct pch_gbe_regs {
 
 /* Reset */
 #define PCH_GBE_ALL_RST         0x80000000  /* All reset */
-#define PCH_GBE_TX_RST          0x40000000  /* TX MAC, TX FIFO, TX DMA reset */
-#define PCH_GBE_RX_RST          0x04000000  /* RX MAC, RX FIFO, RX DMA reset */
+#define PCH_GBE_TX_RST          0x00008000  /* TX MAC, TX FIFO, TX DMA reset */
+#define PCH_GBE_RX_RST          0x00004000  /* RX MAC, RX FIFO, RX DMA reset */
 
 /* TCP/IP Accelerator Control */
 #define PCH_GBE_EX_LIST_EN      0x00000008  /* External List Enable */
@@ -276,6 +276,9 @@ struct pch_gbe_regs {
 #define PCH_GBE_RX_DMA_EN       0x00000002   /* Enables Receive DMA */
 #define PCH_GBE_TX_DMA_EN       0x00000001   /* Enables Transmission DMA */
 
+/* RX DMA STATUS */
+#define PCH_GBE_IDLE_CHECK       0xFFFFFFFE
+
 /* Wake On LAN Status */
 #define PCH_GBE_WLS_BR          0x00000008 /* Broadcas Address */
 #define PCH_GBE_WLS_MLT         0x00000004 /* Multicast Address */
@@ -471,6 +474,7 @@ struct pch_gbe_tx_desc {
 struct pch_gbe_buffer {
 	struct sk_buff *skb;
 	dma_addr_t dma;
+	unsigned char *rx_buffer;
 	unsigned long time_stamp;
 	u16 length;
 	bool mapped;
@@ -511,6 +515,9 @@ struct pch_gbe_tx_ring {
 struct pch_gbe_rx_ring {
 	struct pch_gbe_rx_desc *desc;
 	dma_addr_t dma;
+	unsigned char *rx_buff_pool;
+	dma_addr_t rx_buff_pool_logic;
+	unsigned int rx_buff_pool_size;
 	unsigned int size;
 	unsigned int count;
 	unsigned int next_to_use;
@@ -622,6 +629,7 @@ struct pch_gbe_adapter {
 	unsigned long rx_buffer_len;
 	unsigned long tx_queue_len;
 	bool have_msi;
+	bool rx_stop_flag;
 };
 
 extern const char pch_driver_version[];
diff --git a/drivers/net/pch_gbe/pch_gbe_main.c b/drivers/net/pch_gbe/pch_gbe_main.c
index eac3c5ca9731..b8b4ba27b0e7 100644
--- a/drivers/net/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/pch_gbe/pch_gbe_main.c
@@ -20,7 +20,6 @@
 
 #include "pch_gbe.h"
 #include "pch_gbe_api.h"
-#include <linux/prefetch.h>
 
 #define DRV_VERSION     "1.00"
 const char pch_driver_version[] = DRV_VERSION;
@@ -34,11 +33,15 @@ const char pch_driver_version[] = DRV_VERSION;
 #define PCH_GBE_WATCHDOG_PERIOD		(1 * HZ)	/* watchdog time */
 #define PCH_GBE_COPYBREAK_DEFAULT	256
 #define PCH_GBE_PCI_BAR			1
+#define PCH_GBE_RESERVE_MEMORY		0x200000	/* 2MB */
 
 /* Macros for ML7223 */
 #define PCI_VENDOR_ID_ROHM			0x10db
 #define PCI_DEVICE_ID_ROHM_ML7223_GBE		0x8013
 
+/* Macros for ML7831 */
+#define PCI_DEVICE_ID_ROHM_ML7831_GBE		0x8802
+
 #define PCH_GBE_TX_WEIGHT         64
 #define PCH_GBE_RX_WEIGHT         64
 #define PCH_GBE_RX_BUFFER_WRITE   16
@@ -52,6 +55,7 @@ const char pch_driver_version[] = DRV_VERSION;
 	)
 
 /* Ethertype field values */
+#define PCH_GBE_MAX_RX_BUFFER_SIZE      0x2880
 #define PCH_GBE_MAX_JUMBO_FRAME_SIZE    10318
 #define PCH_GBE_FRAME_SIZE_2048         2048
 #define PCH_GBE_FRAME_SIZE_4096         4096
@@ -83,10 +87,12 @@ const char pch_driver_version[] = DRV_VERSION;
 #define PCH_GBE_INT_ENABLE_MASK ( \
 	PCH_GBE_INT_RX_DMA_CMPLT |    \
 	PCH_GBE_INT_RX_DSC_EMP   |    \
+	PCH_GBE_INT_RX_FIFO_ERR  |    \
 	PCH_GBE_INT_WOL_DET      |    \
 	PCH_GBE_INT_TX_CMPLT          \
 	)
 
+#define PCH_GBE_INT_DISABLE_ALL		0
 
 static unsigned int copybreak __read_mostly = PCH_GBE_COPYBREAK_DEFAULT;
 
@@ -138,6 +144,27 @@ static void pch_gbe_wait_clr_bit(void *reg, u32 bit)
 	if (!tmp)
 		pr_err("Error: busy bit is not cleared\n");
 }
+
+/**
+ * pch_gbe_wait_clr_bit_irq - Wait to clear a bit for interrupt context
+ * @reg:	Pointer of register
+ * @busy:	Busy bit
+ */
+static int pch_gbe_wait_clr_bit_irq(void *reg, u32 bit)
+{
+	u32 tmp;
+	int ret = -1;
+	/* wait busy */
+	tmp = 20;
+	while ((ioread32(reg) & bit) && --tmp)
+		udelay(5);
+	if (!tmp)
+		pr_err("Error: busy bit is not cleared\n");
+	else
+		ret = 0;
+	return ret;
+}
+
 /**
  * pch_gbe_mac_mar_set - Set MAC address register
  * @hw:	    Pointer to the HW structure
@@ -189,6 +216,17 @@ static void pch_gbe_mac_reset_hw(struct pch_gbe_hw *hw)
 	return;
 }
 
+static void pch_gbe_mac_reset_rx(struct pch_gbe_hw *hw)
+{
+	/* Read the MAC address. and store to the private data */
+	pch_gbe_mac_read_mac_addr(hw);
+	iowrite32(PCH_GBE_RX_RST, &hw->reg->RESET);
+	pch_gbe_wait_clr_bit_irq(&hw->reg->RESET, PCH_GBE_RX_RST);
+	/* Setup the MAC address */
+	pch_gbe_mac_mar_set(hw, hw->mac.addr, 0);
+	return;
+}
+
 /**
  * pch_gbe_mac_init_rx_addrs - Initialize receive address's
  * @hw:	Pointer to the HW structure
@@ -671,13 +709,8 @@ static void pch_gbe_setup_rctl(struct pch_gbe_adapter *adapter)
 
 	tcpip = ioread32(&hw->reg->TCPIP_ACC);
 
-	if (netdev->features & NETIF_F_RXCSUM) {
-		tcpip &= ~PCH_GBE_RX_TCPIPACC_OFF;
-		tcpip |= PCH_GBE_RX_TCPIPACC_EN;
-	} else {
-		tcpip |= PCH_GBE_RX_TCPIPACC_OFF;
-		tcpip &= ~PCH_GBE_RX_TCPIPACC_EN;
-	}
+	tcpip |= PCH_GBE_RX_TCPIPACC_OFF;
+	tcpip &= ~PCH_GBE_RX_TCPIPACC_EN;
 	iowrite32(tcpip, &hw->reg->TCPIP_ACC);
 	return;
 }
@@ -717,13 +750,6 @@ static void pch_gbe_configure_rx(struct pch_gbe_adapter *adapter)
 	iowrite32(rdba, &hw->reg->RX_DSC_BASE);
 	iowrite32(rdlen, &hw->reg->RX_DSC_SIZE);
 	iowrite32((rdba + rdlen), &hw->reg->RX_DSC_SW_P);
-
-	/* Enables Receive DMA */
-	rxdma = ioread32(&hw->reg->DMA_CTRL);
-	rxdma |= PCH_GBE_RX_DMA_EN;
-	iowrite32(rxdma, &hw->reg->DMA_CTRL);
-	/* Enables Receive */
-	iowrite32(PCH_GBE_MRE_MAC_RX_EN, &hw->reg->MAC_RX_EN);
 }
 
 /**
@@ -1097,6 +1123,48 @@ void pch_gbe_update_stats(struct pch_gbe_adapter *adapter)
 	spin_unlock_irqrestore(&adapter->stats_lock, flags);
 }
 
+static void pch_gbe_stop_receive(struct pch_gbe_adapter *adapter)
+{
+	struct pch_gbe_hw *hw = &adapter->hw;
+	u32 rxdma;
+	u16 value;
+	int ret;
+
+	/* Disable Receive DMA */
+	rxdma = ioread32(&hw->reg->DMA_CTRL);
+	rxdma &= ~PCH_GBE_RX_DMA_EN;
+	iowrite32(rxdma, &hw->reg->DMA_CTRL);
+	/* Wait Rx DMA BUS is IDLE */
+	ret = pch_gbe_wait_clr_bit_irq(&hw->reg->RX_DMA_ST, PCH_GBE_IDLE_CHECK);
+	if (ret) {
+		/* Disable Bus master */
+		pci_read_config_word(adapter->pdev, PCI_COMMAND, &value);
+		value &= ~PCI_COMMAND_MASTER;
+		pci_write_config_word(adapter->pdev, PCI_COMMAND, value);
+		/* Stop Receive */
+		pch_gbe_mac_reset_rx(hw);
+		/* Enable Bus master */
+		value |= PCI_COMMAND_MASTER;
+		pci_write_config_word(adapter->pdev, PCI_COMMAND, value);
+	} else {
+		/* Stop Receive */
+		pch_gbe_mac_reset_rx(hw);
+	}
+}
+
+static void pch_gbe_start_receive(struct pch_gbe_hw *hw)
+{
+	u32 rxdma;
+
+	/* Enables Receive DMA */
+	rxdma = ioread32(&hw->reg->DMA_CTRL);
+	rxdma |= PCH_GBE_RX_DMA_EN;
+	iowrite32(rxdma, &hw->reg->DMA_CTRL);
+	/* Enables Receive */
+	iowrite32(PCH_GBE_MRE_MAC_RX_EN, &hw->reg->MAC_RX_EN);
+	return;
+}
+
 /**
  * pch_gbe_intr - Interrupt Handler
  * @irq:   Interrupt number
@@ -1123,7 +1191,17 @@ static irqreturn_t pch_gbe_intr(int irq, void *data)
 	if (int_st & PCH_GBE_INT_RX_FRAME_ERR)
 		adapter->stats.intr_rx_frame_err_count++;
 	if (int_st & PCH_GBE_INT_RX_FIFO_ERR)
-		adapter->stats.intr_rx_fifo_err_count++;
+		if (!adapter->rx_stop_flag) {
+			adapter->stats.intr_rx_fifo_err_count++;
+			pr_debug("Rx fifo over run\n");
+			adapter->rx_stop_flag = true;
+			int_en = ioread32(&hw->reg->INT_EN);
+			iowrite32((int_en & ~PCH_GBE_INT_RX_FIFO_ERR),
+				  &hw->reg->INT_EN);
+			pch_gbe_stop_receive(adapter);
+			int_st |= ioread32(&hw->reg->INT_ST);
+			int_st = int_st & ioread32(&hw->reg->INT_EN);
+		}
 	if (int_st & PCH_GBE_INT_RX_DMA_ERR)
 		adapter->stats.intr_rx_dma_err_count++;
 	if (int_st & PCH_GBE_INT_TX_FIFO_ERR)
@@ -1135,21 +1213,18 @@ static irqreturn_t pch_gbe_intr(int irq, void *data)
 	/* When Rx descriptor is empty  */
 	if ((int_st & PCH_GBE_INT_RX_DSC_EMP)) {
 		adapter->stats.intr_rx_dsc_empty_count++;
-		pr_err("Rx descriptor is empty\n");
+		pr_debug("Rx descriptor is empty\n");
 		int_en = ioread32(&hw->reg->INT_EN);
 		iowrite32((int_en & ~PCH_GBE_INT_RX_DSC_EMP), &hw->reg->INT_EN);
 		if (hw->mac.tx_fc_enable) {
 			/* Set Pause packet */
 			pch_gbe_mac_set_pause_packet(hw);
 		}
-		if ((int_en & (PCH_GBE_INT_RX_DMA_CMPLT | PCH_GBE_INT_TX_CMPLT))
-		    == 0) {
-			return IRQ_HANDLED;
-		}
 	}
 
 	/* When request status is Receive interruption */
-	if ((int_st & (PCH_GBE_INT_RX_DMA_CMPLT | PCH_GBE_INT_TX_CMPLT))) {
+	if ((int_st & (PCH_GBE_INT_RX_DMA_CMPLT | PCH_GBE_INT_TX_CMPLT)) ||
+	    (adapter->rx_stop_flag == true)) {
 		if (likely(napi_schedule_prep(&adapter->napi))) {
 			/* Enable only Rx Descriptor empty */
 			atomic_inc(&adapter->irq_sem);
@@ -1185,29 +1260,23 @@ pch_gbe_alloc_rx_buffers(struct pch_gbe_adapter *adapter,
 	unsigned int i;
 	unsigned int bufsz;
 
-	bufsz = adapter->rx_buffer_len + PCH_GBE_DMA_ALIGN;
+	bufsz = adapter->rx_buffer_len + NET_IP_ALIGN;
 	i = rx_ring->next_to_use;
 
 	while ((cleaned_count--)) {
 		buffer_info = &rx_ring->buffer_info[i];
-		skb = buffer_info->skb;
-		if (skb) {
-			skb_trim(skb, 0);
-		} else {
-			skb = netdev_alloc_skb(netdev, bufsz);
-			if (unlikely(!skb)) {
-				/* Better luck next round */
-				adapter->stats.rx_alloc_buff_failed++;
-				break;
-			}
-			/* 64byte align */
-			skb_reserve(skb, PCH_GBE_DMA_ALIGN);
-
-			buffer_info->skb = skb;
-			buffer_info->length = adapter->rx_buffer_len;
+		skb = netdev_alloc_skb(netdev, bufsz);
+		if (unlikely(!skb)) {
+			/* Better luck next round */
+			adapter->stats.rx_alloc_buff_failed++;
+			break;
 		}
+		/* align */
+		skb_reserve(skb, NET_IP_ALIGN);
+		buffer_info->skb = skb;
+
 		buffer_info->dma = dma_map_single(&pdev->dev,
-						  skb->data,
+						  buffer_info->rx_buffer,
 						  buffer_info->length,
 						  DMA_FROM_DEVICE);
 		if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) {
@@ -1240,6 +1309,36 @@ pch_gbe_alloc_rx_buffers(struct pch_gbe_adapter *adapter,
 	return;
 }
 
+static int
+pch_gbe_alloc_rx_buffers_pool(struct pch_gbe_adapter *adapter,
+			 struct pch_gbe_rx_ring *rx_ring, int cleaned_count)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	struct pch_gbe_buffer *buffer_info;
+	unsigned int i;
+	unsigned int bufsz;
+	unsigned int size;
+
+	bufsz = adapter->rx_buffer_len;
+
+	size = rx_ring->count * bufsz + PCH_GBE_RESERVE_MEMORY;
+	rx_ring->rx_buff_pool = dma_alloc_coherent(&pdev->dev, size,
+						&rx_ring->rx_buff_pool_logic,
+						GFP_KERNEL);
+	if (!rx_ring->rx_buff_pool) {
+		pr_err("Unable to allocate memory for the receive poll buffer\n");
+		return -ENOMEM;
+	}
+	memset(rx_ring->rx_buff_pool, 0, size);
+	rx_ring->rx_buff_pool_size = size;
+	for (i = 0; i < rx_ring->count; i++) {
+		buffer_info = &rx_ring->buffer_info[i];
+		buffer_info->rx_buffer = rx_ring->rx_buff_pool + bufsz * i;
+		buffer_info->length = bufsz;
+	}
+	return 0;
+}
+
 /**
  * pch_gbe_alloc_tx_buffers - Allocate transmit buffers
  * @adapter:   Board private structure
@@ -1285,7 +1384,7 @@ pch_gbe_clean_tx(struct pch_gbe_adapter *adapter,
 	struct sk_buff *skb;
 	unsigned int i;
 	unsigned int cleaned_count = 0;
-	bool cleaned = false;
+	bool cleaned = true;
 
 	pr_debug("next_to_clean : %d\n", tx_ring->next_to_clean);
 
@@ -1296,7 +1395,6 @@ pch_gbe_clean_tx(struct pch_gbe_adapter *adapter,
 
 	while ((tx_desc->gbec_status & DSC_INIT16) == 0x0000) {
 		pr_debug("gbec_status:0x%04x\n", tx_desc->gbec_status);
-		cleaned = true;
 		buffer_info = &tx_ring->buffer_info[i];
 		skb = buffer_info->skb;
 
@@ -1339,8 +1437,10 @@ pch_gbe_clean_tx(struct pch_gbe_adapter *adapter,
 		tx_desc = PCH_GBE_TX_DESC(*tx_ring, i);
 
 		/* weight of a sort for tx, to avoid endless transmit cleanup */
-		if (cleaned_count++ == PCH_GBE_TX_WEIGHT)
+		if (cleaned_count++ == PCH_GBE_TX_WEIGHT) {
+			cleaned = false;
 			break;
+		}
 	}
 	pr_debug("called pch_gbe_unmap_and_free_tx_resource() %d count\n",
 		 cleaned_count);
@@ -1380,7 +1480,7 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter,
 	unsigned int i;
 	unsigned int cleaned_count = 0;
 	bool cleaned = false;
-	struct sk_buff *skb, *new_skb;
+	struct sk_buff *skb;
 	u8 dma_status;
 	u16 gbec_status;
 	u32 tcp_ip_status;
@@ -1401,13 +1501,12 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter,
 		rx_desc->gbec_status = DSC_INIT16;
 		buffer_info = &rx_ring->buffer_info[i];
 		skb = buffer_info->skb;
+		buffer_info->skb = NULL;
 
 		/* unmap dma */
 		dma_unmap_single(&pdev->dev, buffer_info->dma,
 				   buffer_info->length, DMA_FROM_DEVICE);
 		buffer_info->mapped = false;
-		/* Prefetch the packet */
-		prefetch(skb->data);
 
 		pr_debug("RxDecNo = 0x%04x  Status[DMA:0x%02x GBE:0x%04x "
 			 "TCP:0x%08x]  BufInf = 0x%p\n",
@@ -1427,70 +1526,16 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter,
 			pr_err("Receive CRC Error\n");
 		} else {
 			/* get receive length */
-			/* length convert[-3] */
-			length = (rx_desc->rx_words_eob) - 3;
-
-			/* Decide the data conversion method */
-			if (!(netdev->features & NETIF_F_RXCSUM)) {
-				/* [Header:14][payload] */
-				if (NET_IP_ALIGN) {
-					/* Because alignment differs,
-					 * the new_skb is newly allocated,
-					 * and data is copied to new_skb.*/
-					new_skb = netdev_alloc_skb(netdev,
-							 length + NET_IP_ALIGN);
-					if (!new_skb) {
-						/* dorrop error */
-						pr_err("New skb allocation "
-							"Error\n");
-						goto dorrop;
-					}
-					skb_reserve(new_skb, NET_IP_ALIGN);
-					memcpy(new_skb->data, skb->data,
-					       length);
-					skb = new_skb;
-				} else {
-					/* DMA buffer is used as SKB as it is.*/
-					buffer_info->skb = NULL;
-				}
-			} else {
-				/* [Header:14][padding:2][payload] */
-				/* The length includes padding length */
-				length = length - PCH_GBE_DMA_PADDING;
-				if ((length < copybreak) ||
-				    (NET_IP_ALIGN != PCH_GBE_DMA_PADDING)) {
-					/* Because alignment differs,
-					 * the new_skb is newly allocated,
-					 * and data is copied to new_skb.
-					 * Padding data is deleted
-					 * at the time of a copy.*/
-					new_skb = netdev_alloc_skb(netdev,
-							 length + NET_IP_ALIGN);
-					if (!new_skb) {
-						/* dorrop error */
-						pr_err("New skb allocation "
-							"Error\n");
-						goto dorrop;
-					}
-					skb_reserve(new_skb, NET_IP_ALIGN);
-					memcpy(new_skb->data, skb->data,
-					       ETH_HLEN);
-					memcpy(&new_skb->data[ETH_HLEN],
-					       &skb->data[ETH_HLEN +
-					       PCH_GBE_DMA_PADDING],
-					       length - ETH_HLEN);
-					skb = new_skb;
-				} else {
-					/* Padding data is deleted
-					 * by moving header data.*/
-					memmove(&skb->data[PCH_GBE_DMA_PADDING],
-						&skb->data[0], ETH_HLEN);
-					skb_reserve(skb, NET_IP_ALIGN);
-					buffer_info->skb = NULL;
-				}
-			}
-			/* The length includes FCS length */
-			length = length - ETH_FCS_LEN;
+			/* length convert[-3], length includes FCS length */
+			length = (rx_desc->rx_words_eob) - 3 - ETH_FCS_LEN;
+			if (rx_desc->rx_words_eob & 0x02)
+				length = length - 4;
+			/*
+			 * buffer_info->rx_buffer: [Header:14][payload]
+			 * skb->data: [Reserve:2][Header:14][payload]
+			 */
+			memcpy(skb->data, buffer_info->rx_buffer, length);
+
 			/* update status of driver */
 			adapter->stats.rx_bytes += length;
 			adapter->stats.rx_packets++;
@@ -1509,7 +1554,6 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter,
 			pr_debug("Receive skb->ip_summed: %d length: %d\n",
 				 skb->ip_summed, length);
 		}
-dorrop:
 		/* return some buffers to hardware, one at a time is too slow */
 		if (unlikely(cleaned_count >= PCH_GBE_RX_BUFFER_WRITE)) {
 			pch_gbe_alloc_rx_buffers(adapter, rx_ring,
@@ -1714,9 +1758,15 @@ int pch_gbe_up(struct pch_gbe_adapter *adapter)
 		pr_err("Error: can't bring device up\n");
 		return err;
 	}
+	err = pch_gbe_alloc_rx_buffers_pool(adapter, rx_ring, rx_ring->count);
+	if (err) {
+		pr_err("Error: can't bring device up\n");
+		return err;
+	}
 	pch_gbe_alloc_tx_buffers(adapter, tx_ring);
 	pch_gbe_alloc_rx_buffers(adapter, rx_ring, rx_ring->count);
 	adapter->tx_queue_len = netdev->tx_queue_len;
+	pch_gbe_start_receive(&adapter->hw);
 
 	mod_timer(&adapter->watchdog_timer, jiffies);
 
@@ -1734,6 +1784,7 @@ int pch_gbe_up(struct pch_gbe_adapter *adapter)
 void pch_gbe_down(struct pch_gbe_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
+	struct pch_gbe_rx_ring *rx_ring = adapter->rx_ring;
 
 	/* signal that we're down so the interrupt handler does not
 	 * reschedule our watchdog timer */
@@ -1752,6 +1803,12 @@ void pch_gbe_down(struct pch_gbe_adapter *adapter)
 	pch_gbe_reset(adapter);
 	pch_gbe_clean_tx_ring(adapter, adapter->tx_ring);
 	pch_gbe_clean_rx_ring(adapter, adapter->rx_ring);
+
+	pci_free_consistent(adapter->pdev, rx_ring->rx_buff_pool_size,
+			    rx_ring->rx_buff_pool, rx_ring->rx_buff_pool_logic);
+	rx_ring->rx_buff_pool_logic = 0;
+	rx_ring->rx_buff_pool_size = 0;
+	rx_ring->rx_buff_pool = NULL;
 }
 
 /**
@@ -2004,6 +2061,8 @@ static int pch_gbe_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct pch_gbe_adapter *adapter = netdev_priv(netdev);
 	int max_frame;
+	unsigned long old_rx_buffer_len = adapter->rx_buffer_len;
+	int err;
 
 	max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
 	if ((max_frame < ETH_ZLEN + ETH_FCS_LEN) ||
@@ -2018,14 +2077,24 @@ static int pch_gbe_change_mtu(struct net_device *netdev, int new_mtu)
 	else if (max_frame <= PCH_GBE_FRAME_SIZE_8192)
 		adapter->rx_buffer_len = PCH_GBE_FRAME_SIZE_8192;
 	else
-		adapter->rx_buffer_len = PCH_GBE_MAX_JUMBO_FRAME_SIZE;
-	netdev->mtu = new_mtu;
-	adapter->hw.mac.max_frame_size = max_frame;
+		adapter->rx_buffer_len = PCH_GBE_MAX_RX_BUFFER_SIZE;
 
-	if (netif_running(netdev))
-		pch_gbe_reinit_locked(adapter);
-	else
+	if (netif_running(netdev)) {
+		pch_gbe_down(adapter);
+		err = pch_gbe_up(adapter);
+		if (err) {
+			adapter->rx_buffer_len = old_rx_buffer_len;
+			pch_gbe_up(adapter);
+			return -ENOMEM;
+		} else {
+			netdev->mtu = new_mtu;
+			adapter->hw.mac.max_frame_size = max_frame;
+		}
+	} else {
 		pch_gbe_reset(adapter);
+		netdev->mtu = new_mtu;
+		adapter->hw.mac.max_frame_size = max_frame;
+	}
 
 	pr_debug("max_frame : %d  rx_buffer_len : %d  mtu : %d  max_frame_size : %d\n",
 		 max_frame, (u32) adapter->rx_buffer_len, netdev->mtu,
@@ -2099,33 +2168,39 @@ static int pch_gbe_napi_poll(struct napi_struct *napi, int budget)
 {
 	struct pch_gbe_adapter *adapter =
 	    container_of(napi, struct pch_gbe_adapter, napi);
-	struct net_device *netdev = adapter->netdev;
 	int work_done = 0;
 	bool poll_end_flag = false;
 	bool cleaned = false;
+	u32 int_en;
 
 	pr_debug("budget : %d\n", budget);
 
-	/* Keep link state information with original netdev */
-	if (!netif_carrier_ok(netdev)) {
-		poll_end_flag = true;
-	} else {
-		cleaned = pch_gbe_clean_tx(adapter, adapter->tx_ring);
-		pch_gbe_clean_rx(adapter, adapter->rx_ring, &work_done, budget);
+	pch_gbe_clean_rx(adapter, adapter->rx_ring, &work_done, budget);
+	cleaned = pch_gbe_clean_tx(adapter, adapter->tx_ring);
 
-		if (cleaned)
-			work_done = budget;
-		/* If no Tx and not enough Rx work done,
-		 * exit the polling mode
-		 */
-		if ((work_done < budget) || !netif_running(netdev))
-			poll_end_flag = true;
-	}
+	if (!cleaned)
+		work_done = budget;
+	/* If no Tx and not enough Rx work done,
+	 * exit the polling mode
+	 */
+	if (work_done < budget)
+		poll_end_flag = true;
 
 	if (poll_end_flag) {
 		napi_complete(napi);
+		if (adapter->rx_stop_flag) {
+			adapter->rx_stop_flag = false;
+			pch_gbe_start_receive(&adapter->hw);
+		}
 		pch_gbe_irq_enable(adapter);
-	}
+	} else
+		if (adapter->rx_stop_flag) {
+			adapter->rx_stop_flag = false;
+			pch_gbe_start_receive(&adapter->hw);
+			int_en = ioread32(&adapter->hw.reg->INT_EN);
+			iowrite32((int_en | PCH_GBE_INT_RX_FIFO_ERR),
+				&adapter->hw.reg->INT_EN);
+		}
 
 	pr_debug("poll_end_flag : %d  work_done : %d  budget : %d\n",
 		 poll_end_flag, work_done, budget);
@@ -2452,6 +2527,13 @@ static DEFINE_PCI_DEVICE_TABLE(pch_gbe_pcidev_id) = {
 	 .class = (PCI_CLASS_NETWORK_ETHERNET << 8),
 	 .class_mask = (0xFFFF00)
 	 },
+	{.vendor = PCI_VENDOR_ID_ROHM,
+	 .device = PCI_DEVICE_ID_ROHM_ML7831_GBE,
+	 .subvendor = PCI_ANY_ID,
+	 .subdevice = PCI_ANY_ID,
+	 .class = (PCI_CLASS_NETWORK_ETHERNET << 8),
+	 .class_mask = (0xFFFF00)
+	 },
 	/* required last entry */
 	{0}
 };
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index cb6e0b486b1e..edd7304773eb 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -589,7 +589,7 @@ static void decode_rxts(struct dp83640_private *dp83640,
 	prune_rx_ts(dp83640);
 
 	if (list_empty(&dp83640->rxpool)) {
-		pr_warning("dp83640: rx timestamp pool is empty\n");
+		pr_debug("dp83640: rx timestamp pool is empty\n");
 		goto out;
 	}
 	rxts = list_first_entry(&dp83640->rxpool, struct rxts, list);
@@ -612,7 +612,7 @@ static void decode_txts(struct dp83640_private *dp83640,
 	skb = skb_dequeue(&dp83640->tx_queue);
 
 	if (!skb) {
-		pr_warning("dp83640: have timestamp but tx_queue empty\n");
+		pr_debug("dp83640: have timestamp but tx_queue empty\n");
 		return;
 	}
 	ns = phy2txts(phy_txts);
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 10e5d985afa3..edfa15d2e795 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -1465,7 +1465,12 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
 			continue;
 		}
 
-		mtu = pch->chan->mtu - hdrlen;
+		/*
+		 * hdrlen includes the 2-byte PPP protocol field, but the
+		 * MTU counts only the payload excluding the protocol field.
+		 * (RFC1661 Section 2)
+		 */
+		mtu = pch->chan->mtu - (hdrlen - 2);
 		if (mtu < 4)
 			mtu = 4;
 		if (flen > mtu)
diff --git a/drivers/net/pxa168_eth.c b/drivers/net/pxa168_eth.c
index 1a3033d8e7ed..d17d0624c5e6 100644
--- a/drivers/net/pxa168_eth.c
+++ b/drivers/net/pxa168_eth.c
@@ -40,6 +40,7 @@
 #include <linux/clk.h>
 #include <linux/phy.h>
 #include <linux/io.h>
+#include <linux/interrupt.h>
 #include <linux/types.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 02339b3352e7..c23667017922 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -407,6 +407,7 @@ enum rtl_register_content {
 	RxOK		= 0x0001,
 
 	/* RxStatusDesc */
+	RxBOVF	= (1 << 24),
 	RxFOVF	= (1 << 23),
 	RxRWT	= (1 << 22),
 	RxRES	= (1 << 21),
@@ -682,6 +683,7 @@ struct rtl8169_private {
 	struct mii_if_info mii;
 	struct rtl8169_counters counters;
 	u32 saved_wolopts;
+	u32 opts1_mask;
 
 	struct rtl_fw {
 		const struct firmware *fw;
@@ -710,6 +712,7 @@ MODULE_FIRMWARE(FIRMWARE_8168D_1);
 MODULE_FIRMWARE(FIRMWARE_8168D_2);
 MODULE_FIRMWARE(FIRMWARE_8168E_1);
 MODULE_FIRMWARE(FIRMWARE_8168E_2);
+MODULE_FIRMWARE(FIRMWARE_8168E_3);
 MODULE_FIRMWARE(FIRMWARE_8105E_1);
 
 static int rtl8169_open(struct net_device *dev);
@@ -3077,6 +3080,14 @@ static void rtl8169_phy_reset(struct net_device *dev,
 	netif_err(tp, link, dev, "PHY reset failed\n");
 }
 
+static bool rtl_tbi_enabled(struct rtl8169_private *tp)
+{
+	void __iomem *ioaddr = tp->mmio_addr;
+
+	return (tp->mac_version == RTL_GIGA_MAC_VER_01) &&
+	    (RTL_R8(PHYstatus) & TBI_Enable);
+}
+
 static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 {
 	void __iomem *ioaddr = tp->mmio_addr;
@@ -3109,7 +3120,7 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 			   ADVERTISED_1000baseT_Half |
 			   ADVERTISED_1000baseT_Full : 0));
 
-	if (RTL_R8(PHYstatus) & TBI_Enable)
+	if (rtl_tbi_enabled(tp))
 		netif_info(tp, link, dev, "TBI auto-negotiating\n");
 }
 
@@ -3319,9 +3330,16 @@ static void r810x_phy_power_up(struct rtl8169_private *tp)
 
 static void r810x_pll_power_down(struct rtl8169_private *tp)
 {
+	void __iomem *ioaddr = tp->mmio_addr;
+
 	if (__rtl8169_get_wol(tp) & WAKE_ANY) {
 		rtl_writephy(tp, 0x1f, 0x0000);
 		rtl_writephy(tp, MII_BMCR, 0x0000);
+
+		if (tp->mac_version == RTL_GIGA_MAC_VER_29 ||
+		    tp->mac_version == RTL_GIGA_MAC_VER_30)
+			RTL_W32(RxConfig, RTL_R32(RxConfig) | AcceptBroadcast |
+				AcceptMulticast | AcceptMyPhys);
 		return;
 	}
 
@@ -3417,7 +3435,8 @@ static void r8168_pll_power_down(struct rtl8169_private *tp)
 		rtl_writephy(tp, MII_BMCR, 0x0000);
 
 		if (tp->mac_version == RTL_GIGA_MAC_VER_32 ||
-		    tp->mac_version == RTL_GIGA_MAC_VER_33)
+		    tp->mac_version == RTL_GIGA_MAC_VER_33 ||
+		    tp->mac_version == RTL_GIGA_MAC_VER_34)
 			RTL_W32(RxConfig, RTL_R32(RxConfig) | AcceptBroadcast |
 				AcceptMulticast | AcceptMyPhys);
 		return;
@@ -3727,8 +3746,7 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	tp->features |= rtl_try_msi(pdev, ioaddr, cfg);
 	RTL_W8(Cfg9346, Cfg9346_Lock);
 
-	if ((tp->mac_version <= RTL_GIGA_MAC_VER_06) &&
-	    (RTL_R8(PHYstatus) & TBI_Enable)) {
+	if (rtl_tbi_enabled(tp)) {
 		tp->set_speed = rtl8169_set_speed_tbi;
 		tp->get_settings = rtl8169_gset_tbi;
 		tp->phy_reset_enable = rtl8169_tbi_reset_enable;
@@ -3777,6 +3795,9 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	tp->intr_event = cfg->intr_event;
 	tp->napi_event = cfg->napi_event;
 
+	tp->opts1_mask = (tp->mac_version != RTL_GIGA_MAC_VER_01) ?
+		~(RxBOVF | RxFOVF) : ~0;
+
 	init_timer(&tp->timer);
 	tp->timer.data = (unsigned long) dev;
 	tp->timer.function = rtl8169_phy_timer;
@@ -3988,6 +4009,7 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp)
 		while (RTL_R8(TxPoll) & NPQ)
 			udelay(20);
 	} else if (tp->mac_version == RTL_GIGA_MAC_VER_34) {
+		RTL_W8(ChipCmd, RTL_R8(ChipCmd) | StopReq);
 		while (!(RTL_R32(TxConfig) & TXCFG_EMPTY))
 			udelay(100);
 	} else {
@@ -5314,7 +5336,7 @@ static int rtl8169_rx_interrupt(struct net_device *dev,
 		u32 status;
 
 		rmb();
-		status = le32_to_cpu(desc->opts1);
+		status = le32_to_cpu(desc->opts1) & tp->opts1_mask;
 
 		if (status & DescOwn)
 			break;
diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c
index faca764aa21b..b59abc706d93 100644
--- a/drivers/net/sfc/efx.c
+++ b/drivers/net/sfc/efx.c
@@ -1050,7 +1050,6 @@ static int efx_init_io(struct efx_nic *efx)
 {
 	struct pci_dev *pci_dev = efx->pci_dev;
 	dma_addr_t dma_mask = efx->type->max_dma_mask;
-	bool use_wc;
 	int rc;
 
 	netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n");
@@ -1101,21 +1100,8 @@ static int efx_init_io(struct efx_nic *efx)
 		rc = -EIO;
 		goto fail3;
 	}
-
-	/* bug22643: If SR-IOV is enabled then tx push over a write combined
-	 * mapping is unsafe. We need to disable write combining in this case.
-	 * MSI is unsupported when SR-IOV is enabled, and the firmware will
-	 * have removed the MSI capability. So write combining is safe if
-	 * there is an MSI capability.
-	 */
-	use_wc = (!EFX_WORKAROUND_22643(efx) ||
-		  pci_find_capability(pci_dev, PCI_CAP_ID_MSI));
-	if (use_wc)
-		efx->membase = ioremap_wc(efx->membase_phys,
-					  efx->type->mem_map_size);
-	else
-		efx->membase = ioremap_nocache(efx->membase_phys,
-					       efx->type->mem_map_size);
+	efx->membase = ioremap_nocache(efx->membase_phys,
+				       efx->type->mem_map_size);
 	if (!efx->membase) {
 		netif_err(efx, probe, efx->net_dev,
 			  "could not map memory BAR at %llx+%x\n",
diff --git a/drivers/net/sfc/io.h b/drivers/net/sfc/io.h
index cc978803d484..751d1ec112cc 100644
--- a/drivers/net/sfc/io.h
+++ b/drivers/net/sfc/io.h
@@ -103,7 +103,6 @@ static inline void efx_writeo(struct efx_nic *efx, efx_oword_t *value,
 	_efx_writed(efx, value->u32[2], reg + 8);
 	_efx_writed(efx, value->u32[3], reg + 12);
 #endif
-	wmb();
 	mmiowb();
 	spin_unlock_irqrestore(&efx->biu_lock, flags);
 }
@@ -126,7 +125,6 @@ static inline void efx_sram_writeq(struct efx_nic *efx, void __iomem *membase,
 	__raw_writel((__force u32)value->u32[0], membase + addr);
 	__raw_writel((__force u32)value->u32[1], membase + addr + 4);
 #endif
-	wmb();
 	mmiowb();
 	spin_unlock_irqrestore(&efx->biu_lock, flags);
 }
@@ -141,7 +139,6 @@ static inline void efx_writed(struct efx_nic *efx, efx_dword_t *value,
 
 	/* No lock required */
 	_efx_writed(efx, value->u32[0], reg);
-	wmb();
 }
 
 /* Read a 128-bit CSR, locking as appropriate. */
@@ -152,7 +149,6 @@ static inline void efx_reado(struct efx_nic *efx, efx_oword_t *value,
 
 	spin_lock_irqsave(&efx->biu_lock, flags);
 	value->u32[0] = _efx_readd(efx, reg + 0);
-	rmb();
 	value->u32[1] = _efx_readd(efx, reg + 4);
 	value->u32[2] = _efx_readd(efx, reg + 8);
 	value->u32[3] = _efx_readd(efx, reg + 12);
@@ -175,7 +171,6 @@ static inline void efx_sram_readq(struct efx_nic *efx, void __iomem *membase,
 	value->u64[0] = (__force __le64)__raw_readq(membase + addr);
 #else
 	value->u32[0] = (__force __le32)__raw_readl(membase + addr);
-	rmb();
 	value->u32[1] = (__force __le32)__raw_readl(membase + addr + 4);
 #endif
 	spin_unlock_irqrestore(&efx->biu_lock, flags);
@@ -249,7 +244,6 @@ static inline void _efx_writeo_page(struct efx_nic *efx, efx_oword_t *value,
 	_efx_writed(efx, value->u32[2], reg + 8);
 	_efx_writed(efx, value->u32[3], reg + 12);
 #endif
-	wmb();
 }
 #define efx_writeo_page(efx, value, reg, page)				\
 	_efx_writeo_page(efx, value,					\
diff --git a/drivers/net/sfc/mcdi.c b/drivers/net/sfc/mcdi.c
index 3dd45ed61f0a..81a425397468 100644
--- a/drivers/net/sfc/mcdi.c
+++ b/drivers/net/sfc/mcdi.c
@@ -50,20 +50,6 @@ static inline struct efx_mcdi_iface *efx_mcdi(struct efx_nic *efx)
 	return &nic_data->mcdi;
 }
 
-static inline void
-efx_mcdi_readd(struct efx_nic *efx, efx_dword_t *value, unsigned reg)
-{
-	struct siena_nic_data *nic_data = efx->nic_data;
-	value->u32[0] = (__force __le32)__raw_readl(nic_data->mcdi_smem + reg);
-}
-
-static inline void
-efx_mcdi_writed(struct efx_nic *efx, const efx_dword_t *value, unsigned reg)
-{
-	struct siena_nic_data *nic_data = efx->nic_data;
-	__raw_writel((__force u32)value->u32[0], nic_data->mcdi_smem + reg);
-}
-
 void efx_mcdi_init(struct efx_nic *efx)
 {
 	struct efx_mcdi_iface *mcdi;
@@ -84,8 +70,8 @@ static void efx_mcdi_copyin(struct efx_nic *efx, unsigned cmd,
 			    const u8 *inbuf, size_t inlen)
 {
 	struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
-	unsigned pdu = MCDI_PDU(efx);
-	unsigned doorbell = MCDI_DOORBELL(efx);
+	unsigned pdu = FR_CZ_MC_TREG_SMEM + MCDI_PDU(efx);
+	unsigned doorbell = FR_CZ_MC_TREG_SMEM + MCDI_DOORBELL(efx);
 	unsigned int i;
 	efx_dword_t hdr;
 	u32 xflags, seqno;
@@ -106,28 +92,29 @@ static void efx_mcdi_copyin(struct efx_nic *efx, unsigned cmd,
 			     MCDI_HEADER_SEQ, seqno,
 			     MCDI_HEADER_XFLAGS, xflags);
 
-	efx_mcdi_writed(efx, &hdr, pdu);
+	efx_writed(efx, &hdr, pdu);
 
 	for (i = 0; i < inlen; i += 4)
-		efx_mcdi_writed(efx, (const efx_dword_t *)(inbuf + i),
-				pdu + 4 + i);
+		_efx_writed(efx, *((__le32 *)(inbuf + i)), pdu + 4 + i);
+
+	/* Ensure the payload is written out before the header */
+	wmb();
 
 	/* ring the doorbell with a distinctive value */
-	EFX_POPULATE_DWORD_1(hdr, EFX_DWORD_0, 0x45789abc);
-	efx_mcdi_writed(efx, &hdr, doorbell);
+	_efx_writed(efx, (__force __le32) 0x45789abc, doorbell);
 }
 
 static void efx_mcdi_copyout(struct efx_nic *efx, u8 *outbuf, size_t outlen)
 {
 	struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
-	unsigned int pdu = MCDI_PDU(efx);
+	unsigned int pdu = FR_CZ_MC_TREG_SMEM + MCDI_PDU(efx);
 	int i;
 
 	BUG_ON(atomic_read(&mcdi->state) == MCDI_STATE_QUIESCENT);
 	BUG_ON(outlen & 3 || outlen >= 0x100);
 
 	for (i = 0; i < outlen; i += 4)
-		efx_mcdi_readd(efx, (efx_dword_t *)(outbuf + i), pdu + 4 + i);
+		*((__le32 *)(outbuf + i)) = _efx_readd(efx, pdu + 4 + i);
 }
 
 static int efx_mcdi_poll(struct efx_nic *efx)
@@ -135,7 +122,7 @@ static int efx_mcdi_poll(struct efx_nic *efx)
 	struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
 	unsigned int time, finish;
 	unsigned int respseq, respcmd, error;
-	unsigned int pdu = MCDI_PDU(efx);
+	unsigned int pdu = FR_CZ_MC_TREG_SMEM + MCDI_PDU(efx);
 	unsigned int rc, spins;
 	efx_dword_t reg;
 
@@ -161,7 +148,8 @@ static int efx_mcdi_poll(struct efx_nic *efx)
 
 		time = get_seconds();
 
-		efx_mcdi_readd(efx, &reg, pdu);
+		rmb();
+		efx_readd(efx, &reg, pdu);
 
 		/* All 1's indicates that shared memory is in reset (and is
 		 * not a valid header). Wait for it to come out reset before
@@ -188,7 +176,7 @@ static int efx_mcdi_poll(struct efx_nic *efx)
 			  respseq, mcdi->seqno);
 		rc = EIO;
 	} else if (error) {
-		efx_mcdi_readd(efx, &reg, pdu + 4);
+		efx_readd(efx, &reg, pdu + 4);
 		switch (EFX_DWORD_FIELD(reg, EFX_DWORD_0)) {
 #define TRANSLATE_ERROR(name)					\
 		case MC_CMD_ERR_ ## name:			\
@@ -222,21 +210,21 @@ out:
 /* Test and clear MC-rebooted flag for this port/function */
 int efx_mcdi_poll_reboot(struct efx_nic *efx)
 {
-	unsigned int addr = MCDI_REBOOT_FLAG(efx);
+	unsigned int addr = FR_CZ_MC_TREG_SMEM + MCDI_REBOOT_FLAG(efx);
 	efx_dword_t reg;
 	uint32_t value;
 
 	if (efx_nic_rev(efx) < EFX_REV_SIENA_A0)
 		return false;
 
-	efx_mcdi_readd(efx, &reg, addr);
+	efx_readd(efx, &reg, addr);
 	value = EFX_DWORD_FIELD(reg, EFX_DWORD_0);
 
 	if (value == 0)
 		return 0;
 
 	EFX_ZERO_DWORD(reg);
-	efx_mcdi_writed(efx, &reg, addr);
+	efx_writed(efx, &reg, addr);
 
 	if (value == MC_STATUS_DWORD_ASSERT)
 		return -EINTR;
diff --git a/drivers/net/sfc/nic.c b/drivers/net/sfc/nic.c
index bafa23a6874c..3edfbaf5f022 100644
--- a/drivers/net/sfc/nic.c
+++ b/drivers/net/sfc/nic.c
@@ -1936,13 +1936,6 @@ void efx_nic_get_regs(struct efx_nic *efx, void *buf)
 
 		size = min_t(size_t, table->step, 16);
 
-		if (table->offset >= efx->type->mem_map_size) {
-			/* No longer mapped; return dummy data */
-			memcpy(buf, "\xde\xc0\xad\xde", 4);
-			buf += table->rows * size;
-			continue;
-		}
-
 		for (i = 0; i < table->rows; i++) {
 			switch (table->step) {
 			case 4: /* 32-bit register or SRAM */
diff --git a/drivers/net/sfc/nic.h b/drivers/net/sfc/nic.h
index 4bd1f2839dfe..7443f99c977f 100644
--- a/drivers/net/sfc/nic.h
+++ b/drivers/net/sfc/nic.h
@@ -143,12 +143,10 @@ static inline struct falcon_board *falcon_board(struct efx_nic *efx)
 /**
  * struct siena_nic_data - Siena NIC state
  * @mcdi: Management-Controller-to-Driver Interface
- * @mcdi_smem: MCDI shared memory mapping. The mapping is always uncacheable.
  * @wol_filter_id: Wake-on-LAN packet filter id
  */
 struct siena_nic_data {
 	struct efx_mcdi_iface mcdi;
-	void __iomem *mcdi_smem;
 	int wol_filter_id;
 };
 
diff --git a/drivers/net/sfc/siena.c b/drivers/net/sfc/siena.c
index 5735e84c69de..2c3bd93fab54 100644
--- a/drivers/net/sfc/siena.c
+++ b/drivers/net/sfc/siena.c
@@ -250,26 +250,12 @@ static int siena_probe_nic(struct efx_nic *efx)
 	efx_reado(efx, &reg, FR_AZ_CS_DEBUG);
 	efx->net_dev->dev_id = EFX_OWORD_FIELD(reg, FRF_CZ_CS_PORT_NUM) - 1;
 
-	/* Initialise MCDI */
-	nic_data->mcdi_smem = ioremap_nocache(efx->membase_phys +
-					      FR_CZ_MC_TREG_SMEM,
-					      FR_CZ_MC_TREG_SMEM_STEP *
-					      FR_CZ_MC_TREG_SMEM_ROWS);
-	if (!nic_data->mcdi_smem) {
-		netif_err(efx, probe, efx->net_dev,
-			  "could not map MCDI at %llx+%x\n",
-			  (unsigned long long)efx->membase_phys +
-			  FR_CZ_MC_TREG_SMEM,
-			  FR_CZ_MC_TREG_SMEM_STEP * FR_CZ_MC_TREG_SMEM_ROWS);
-		rc = -ENOMEM;
-		goto fail1;
-	}
 	efx_mcdi_init(efx);
 
 	/* Recover from a failed assertion before probing */
 	rc = efx_mcdi_handle_assertion(efx);
 	if (rc)
-		goto fail2;
+		goto fail1;
 
 	/* Let the BMC know that the driver is now in charge of link and
 	 * filter settings. We must do this before we reset the NIC */
@@ -324,7 +310,6 @@ fail4:
 fail3:
 	efx_mcdi_drv_attach(efx, false, NULL);
 fail2:
-	iounmap(nic_data->mcdi_smem);
 fail1:
 	kfree(efx->nic_data);
 	return rc;
@@ -404,8 +389,6 @@ static int siena_init_nic(struct efx_nic *efx)
 
 static void siena_remove_nic(struct efx_nic *efx)
 {
-	struct siena_nic_data *nic_data = efx->nic_data;
-
 	efx_nic_free_buffer(efx, &efx->irq_status);
 
 	siena_reset_hw(efx, RESET_TYPE_ALL);
@@ -415,8 +398,7 @@ static void siena_remove_nic(struct efx_nic *efx)
 		efx_mcdi_drv_attach(efx, false, NULL);
 
 	/* Tear down the private nic state */
-	iounmap(nic_data->mcdi_smem);
-	kfree(nic_data);
+	kfree(efx->nic_data);
 	efx->nic_data = NULL;
 }
 
@@ -656,7 +638,8 @@ const struct efx_nic_type siena_a0_nic_type = {
 	.default_mac_ops = &efx_mcdi_mac_operations,
 
 	.revision = EFX_REV_SIENA_A0,
-	.mem_map_size = FR_CZ_MC_TREG_SMEM, /* MC_TREG_SMEM mapped separately */
+	.mem_map_size = (FR_CZ_MC_TREG_SMEM +
+			 FR_CZ_MC_TREG_SMEM_STEP * FR_CZ_MC_TREG_SMEM_ROWS),
 	.txd_ptr_tbl_base = FR_BZ_TX_DESC_PTR_TBL,
 	.rxd_ptr_tbl_base = FR_BZ_RX_DESC_PTR_TBL,
 	.buf_tbl_base = FR_BZ_BUF_FULL_TBL,
diff --git a/drivers/net/sfc/workarounds.h b/drivers/net/sfc/workarounds.h
index 99ff11400cef..e4dd3a7f304b 100644
--- a/drivers/net/sfc/workarounds.h
+++ b/drivers/net/sfc/workarounds.h
@@ -38,8 +38,6 @@
 #define EFX_WORKAROUND_15783 EFX_WORKAROUND_ALWAYS
 /* Legacy interrupt storm when interrupt fifo fills */
 #define EFX_WORKAROUND_17213 EFX_WORKAROUND_SIENA
-/* Write combining and sriov=enabled are incompatible */
-#define EFX_WORKAROUND_22643 EFX_WORKAROUND_SIENA
 
 /* Spurious parity errors in TSORT buffers */
 #define EFX_WORKAROUND_5129 EFX_WORKAROUND_FALCON_A
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index dc3fbf61910b..4a1374df6084 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -6234,12 +6234,10 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 	}
 
-#ifdef BCM_KERNEL_SUPPORTS_8021Q
 	if (vlan_tx_tag_present(skb)) {
 		base_flags |= TXD_FLAG_VLAN;
 		vlan = vlan_tx_tag_get(skb);
 	}
-#endif
 
 	if (tg3_flag(tp, USE_JUMBO_BDFLAG) &&
 	    !mss && skb->len > VLAN_ETH_FRAME_LEN)
diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c
index 15772b1b6a91..13c1f044b40d 100644
--- a/drivers/net/usb/ipheth.c
+++ b/drivers/net/usb/ipheth.c
@@ -59,6 +59,7 @@
 #define USB_PRODUCT_IPHONE_3G   0x1292
 #define USB_PRODUCT_IPHONE_3GS  0x1294
 #define USB_PRODUCT_IPHONE_4	0x1297
+#define USB_PRODUCT_IPHONE_4_VZW 0x129c
 
 #define IPHETH_USBINTF_CLASS    255
 #define IPHETH_USBINTF_SUBCLASS 253
@@ -98,6 +99,10 @@ static struct usb_device_id ipheth_table[] = {
 		USB_VENDOR_APPLE, USB_PRODUCT_IPHONE_4,
 		IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
 		IPHETH_USBINTF_PROTO) },
+	{ USB_DEVICE_AND_INTERFACE_INFO(
+		USB_VENDOR_APPLE, USB_PRODUCT_IPHONE_4_VZW,
+		IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
+		IPHETH_USBINTF_PROTO) },
 	{ }
 };
 MODULE_DEVICE_TABLE(usb, ipheth_table);
diff --git a/drivers/net/wireless/ath/ath9k/ar9002_calib.c b/drivers/net/wireless/ath/ath9k/ar9002_calib.c
index 2d4c0910295b..2d394af82171 100644
--- a/drivers/net/wireless/ath/ath9k/ar9002_calib.c
+++ b/drivers/net/wireless/ath/ath9k/ar9002_calib.c
@@ -41,7 +41,8 @@ static bool ar9002_hw_is_cal_supported(struct ath_hw *ah,
 	case ADC_DC_CAL:
 		/* Run ADC Gain Cal for non-CCK & non 2GHz-HT20 only */
 		if (!IS_CHAN_B(chan) &&
-		    !(IS_CHAN_2GHZ(chan) && IS_CHAN_HT20(chan)))
+		    !((IS_CHAN_2GHZ(chan) || IS_CHAN_A_FAST_CLOCK(ah, chan)) &&
+		      IS_CHAN_HT20(chan)))
 			supported = true;
 		break;
 	}
diff --git a/drivers/net/wireless/ath/ath9k/ar9003_2p2_initvals.h b/drivers/net/wireless/ath/ath9k/ar9003_2p2_initvals.h
index 2339728a7306..3e69c631ebb4 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_2p2_initvals.h
+++ b/drivers/net/wireless/ath/ath9k/ar9003_2p2_initvals.h
@@ -1514,7 +1514,7 @@ static const u32 ar9300_2p2_mac_core[][2] = {
 	{0x00008258, 0x00000000},
 	{0x0000825c, 0x40000000},
 	{0x00008260, 0x00080922},
-	{0x00008264, 0x9bc00010},
+	{0x00008264, 0x9d400010},
 	{0x00008268, 0xffffffff},
 	{0x0000826c, 0x0000ffff},
 	{0x00008270, 0x00000000},
diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.c b/drivers/net/wireless/ath/ath9k/ar9003_phy.c
index 1baca8e4715d..fcafec0605f4 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_phy.c
+++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.c
@@ -671,7 +671,7 @@ static int ar9003_hw_process_ini(struct ath_hw *ah,
 		REG_WRITE_ARRAY(&ah->iniModesAdditional,
 				modesIndex, regWrites);
 
-	if (AR_SREV_9300(ah))
+	if (AR_SREV_9330(ah))
 		REG_WRITE_ARRAY(&ah->iniModesAdditional, 1, regWrites);
 
 	if (AR_SREV_9340(ah) && !ah->is_clk_25mhz)
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index 6530694a59ae..722967b86cf1 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -2303,6 +2303,12 @@ static void ath9k_flush(struct ieee80211_hw *hw, bool drop)
 	mutex_lock(&sc->mutex);
 	cancel_delayed_work_sync(&sc->tx_complete_work);
 
+	if (ah->ah_flags & AH_UNPLUGGED) {
+		ath_dbg(common, ATH_DBG_ANY, "Device has been unplugged!\n");
+		mutex_unlock(&sc->mutex);
+		return;
+	}
+
 	if (sc->sc_flags & SC_OP_INVALID) {
 		ath_dbg(common, ATH_DBG_ANY, "Device not present\n");
 		mutex_unlock(&sc->mutex);
diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
index 9a4850154fb2..4c21f8cbdeb5 100644
--- a/drivers/net/wireless/ath/ath9k/recv.c
+++ b/drivers/net/wireless/ath/ath9k/recv.c
@@ -205,14 +205,22 @@ static void ath_rx_remove_buffer(struct ath_softc *sc,
 
 static void ath_rx_edma_cleanup(struct ath_softc *sc)
 {
+	struct ath_hw *ah = sc->sc_ah;
+	struct ath_common *common = ath9k_hw_common(ah);
 	struct ath_buf *bf;
 
 	ath_rx_remove_buffer(sc, ATH9K_RX_QUEUE_LP);
 	ath_rx_remove_buffer(sc, ATH9K_RX_QUEUE_HP);
 
 	list_for_each_entry(bf, &sc->rx.rxbuf, list) {
-		if (bf->bf_mpdu)
+		if (bf->bf_mpdu) {
+			dma_unmap_single(sc->dev, bf->bf_buf_addr,
+					common->rx_bufsize,
+					DMA_BIDIRECTIONAL);
 			dev_kfree_skb_any(bf->bf_mpdu);
+			bf->bf_buf_addr = 0;
+			bf->bf_mpdu = NULL;
+		}
 	}
 
 	INIT_LIST_HEAD(&sc->rx.rxbuf);
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 26f1ab840cc7..e293a7921bf0 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -1632,7 +1632,8 @@ static void handle_irq_beacon(struct b43_wldev *dev)
 	u32 cmd, beacon0_valid, beacon1_valid;
 
 	if (!b43_is_mode(wl, NL80211_IFTYPE_AP) &&
-	    !b43_is_mode(wl, NL80211_IFTYPE_MESH_POINT))
+	    !b43_is_mode(wl, NL80211_IFTYPE_MESH_POINT) &&
+	    !b43_is_mode(wl, NL80211_IFTYPE_ADHOC))
 		return;
 
 	/* This is the bottom half of the asynchronous beacon update. */
diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c
index 3774dd034746..ef9ad79d1bfd 100644
--- a/drivers/net/wireless/ipw2x00/ipw2100.c
+++ b/drivers/net/wireless/ipw2x00/ipw2100.c
@@ -1903,15 +1903,17 @@ static void ipw2100_down(struct ipw2100_priv *priv)
 static int ipw2100_net_init(struct net_device *dev)
 {
 	struct ipw2100_priv *priv = libipw_priv(dev);
+
+	return ipw2100_up(priv, 1);
+}
+
+static int ipw2100_wdev_init(struct net_device *dev)
+{
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	const struct libipw_geo *geo = libipw_get_geo(priv->ieee);
 	struct wireless_dev *wdev = &priv->ieee->wdev;
-	int ret;
 	int i;
 
-	ret = ipw2100_up(priv, 1);
-	if (ret)
-		return ret;
-
 	memcpy(wdev->wiphy->perm_addr, priv->mac_addr, ETH_ALEN);
 
 	/* fill-out priv->ieee->bg_band */
@@ -6350,9 +6352,13 @@ static int ipw2100_pci_init_one(struct pci_dev *pci_dev,
 		       "Error calling register_netdev.\n");
 		goto fail;
 	}
+	registered = 1;
+
+	err = ipw2100_wdev_init(dev);
+	if (err)
+		goto fail;
 
 	mutex_lock(&priv->action_mutex);
-	registered = 1;
 
 	IPW_DEBUG_INFO("%s: Bound to %s\n", dev->name, pci_name(pci_dev));
 
@@ -6389,7 +6395,8 @@ static int ipw2100_pci_init_one(struct pci_dev *pci_dev,
 
       fail_unlock:
 	mutex_unlock(&priv->action_mutex);
-
+	wiphy_unregister(priv->ieee->wdev.wiphy);
+	kfree(priv->ieee->bg_band.channels);
       fail:
 	if (dev) {
 		if (registered)
diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c
index 87813c33bdc2..4ffebede5e03 100644
--- a/drivers/net/wireless/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/ipw2x00/ipw2200.c
@@ -11425,16 +11425,23 @@ static void ipw_bg_down(struct work_struct *work)
 /* Called by register_netdev() */
 static int ipw_net_init(struct net_device *dev)
 {
+	int rc = 0;
+	struct ipw_priv *priv = libipw_priv(dev);
+
+	mutex_lock(&priv->mutex);
+	if (ipw_up(priv))
+		rc = -EIO;
+	mutex_unlock(&priv->mutex);
+
+	return rc;
+}
+
+static int ipw_wdev_init(struct net_device *dev)
+{
 	int i, rc = 0;
 	struct ipw_priv *priv = libipw_priv(dev);
 	const struct libipw_geo *geo = libipw_get_geo(priv->ieee);
 	struct wireless_dev *wdev = &priv->ieee->wdev;
-	mutex_lock(&priv->mutex);
-
-	if (ipw_up(priv)) {
-		rc = -EIO;
-		goto out;
-	}
 
 	memcpy(wdev->wiphy->perm_addr, priv->mac_addr, ETH_ALEN);
 
@@ -11519,13 +11526,9 @@ static int ipw_net_init(struct net_device *dev)
 	set_wiphy_dev(wdev->wiphy, &priv->pci_dev->dev);
 
 	/* With that information in place, we can now register the wiphy... */
-	if (wiphy_register(wdev->wiphy)) {
+	if (wiphy_register(wdev->wiphy))
 		rc = -EIO;
-		goto out;
-	}
-
 out:
-	mutex_unlock(&priv->mutex);
 	return rc;
 }
 
@@ -11832,14 +11835,22 @@ static int __devinit ipw_pci_probe(struct pci_dev *pdev,
 		goto out_remove_sysfs;
 	}
 
+	err = ipw_wdev_init(net_dev);
+	if (err) {
+		IPW_ERROR("failed to register wireless device\n");
+		goto out_unregister_netdev;
+	}
+
 #ifdef CONFIG_IPW2200_PROMISCUOUS
 	if (rtap_iface) {
 	        err = ipw_prom_alloc(priv);
 		if (err) {
 			IPW_ERROR("Failed to register promiscuous network "
 				  "device (error %d).\n", err);
-			unregister_netdev(priv->net_dev);
-			goto out_remove_sysfs;
+			wiphy_unregister(priv->ieee->wdev.wiphy);
+			kfree(priv->ieee->a_band.channels);
+			kfree(priv->ieee->bg_band.channels);
+			goto out_unregister_netdev;
 		}
 	}
 #endif
@@ -11851,6 +11862,8 @@ static int __devinit ipw_pci_probe(struct pci_dev *pdev,
 
 	return 0;
 
+      out_unregister_netdev:
+	unregister_netdev(priv->net_dev);
       out_remove_sysfs:
 	sysfs_remove_group(&pdev->dev.kobj, &ipw_attribute_group);
       out_release_irq:
diff --git a/drivers/net/wireless/iwlegacy/iwl-3945-rs.c b/drivers/net/wireless/iwlegacy/iwl-3945-rs.c
index 977bd2477c6a..164bcae821f8 100644
--- a/drivers/net/wireless/iwlegacy/iwl-3945-rs.c
+++ b/drivers/net/wireless/iwlegacy/iwl-3945-rs.c
@@ -822,12 +822,15 @@ static void iwl3945_rs_get_rate(void *priv_r, struct ieee80211_sta *sta,
 
  out:
 
-	rs_sta->last_txrate_idx = index;
-	if (sband->band == IEEE80211_BAND_5GHZ)
-		info->control.rates[0].idx = rs_sta->last_txrate_idx -
-				IWL_FIRST_OFDM_RATE;
-	else
+	if (sband->band == IEEE80211_BAND_5GHZ) {
+		if (WARN_ON_ONCE(index < IWL_FIRST_OFDM_RATE))
+			index = IWL_FIRST_OFDM_RATE;
+		rs_sta->last_txrate_idx = index;
+		info->control.rates[0].idx = index - IWL_FIRST_OFDM_RATE;
+	} else {
+		rs_sta->last_txrate_idx = index;
 		info->control.rates[0].idx = rs_sta->last_txrate_idx;
+	}
 
 	IWL_DEBUG_RATE(priv, "leave: %d\n", index);
 }
diff --git a/drivers/net/wireless/iwlegacy/iwl-core.c b/drivers/net/wireless/iwlegacy/iwl-core.c
index 35cd2537e7fd..e5971fe9d169 100644
--- a/drivers/net/wireless/iwlegacy/iwl-core.c
+++ b/drivers/net/wireless/iwlegacy/iwl-core.c
@@ -937,7 +937,7 @@ void iwl_legacy_irq_handle_error(struct iwl_priv *priv)
 					&priv->contexts[IWL_RXON_CTX_BSS]);
 #endif
 
-	wake_up_interruptible(&priv->wait_command_queue);
+	wake_up(&priv->wait_command_queue);
 
 	/* Keep the restart process from trying to send host
 	 * commands by clearing the INIT status bit */
@@ -1746,7 +1746,7 @@ int iwl_legacy_force_reset(struct iwl_priv *priv, bool external)
 
 	/* Set the FW error flag -- cleared on iwl_down */
 	set_bit(STATUS_FW_ERROR, &priv->status);
-	wake_up_interruptible(&priv->wait_command_queue);
+	wake_up(&priv->wait_command_queue);
 	/*
 	 * Keep the restart process from trying to send host
 	 * commands by clearing the INIT status bit
diff --git a/drivers/net/wireless/iwlegacy/iwl-hcmd.c b/drivers/net/wireless/iwlegacy/iwl-hcmd.c
index 62b4b09122cb..ce1fc9feb61f 100644
--- a/drivers/net/wireless/iwlegacy/iwl-hcmd.c
+++ b/drivers/net/wireless/iwlegacy/iwl-hcmd.c
@@ -167,7 +167,7 @@ int iwl_legacy_send_cmd_sync(struct iwl_priv *priv, struct iwl_host_cmd *cmd)
 		goto out;
 	}
 
-	ret = wait_event_interruptible_timeout(priv->wait_command_queue,
+	ret = wait_event_timeout(priv->wait_command_queue,
 			!test_bit(STATUS_HCMD_ACTIVE, &priv->status),
 			HOST_COMPLETE_TIMEOUT);
 	if (!ret) {
diff --git a/drivers/net/wireless/iwlegacy/iwl-tx.c b/drivers/net/wireless/iwlegacy/iwl-tx.c
index 4fff995c6f3e..ef9e268bf8a0 100644
--- a/drivers/net/wireless/iwlegacy/iwl-tx.c
+++ b/drivers/net/wireless/iwlegacy/iwl-tx.c
@@ -625,6 +625,8 @@ iwl_legacy_tx_cmd_complete(struct iwl_priv *priv, struct iwl_rx_mem_buffer *rxb)
 	cmd = txq->cmd[cmd_index];
 	meta = &txq->meta[cmd_index];
 
+	txq->time_stamp = jiffies;
+
 	pci_unmap_single(priv->pci_dev,
 			 dma_unmap_addr(meta, mapping),
 			 dma_unmap_len(meta, len),
@@ -645,7 +647,7 @@ iwl_legacy_tx_cmd_complete(struct iwl_priv *priv, struct iwl_rx_mem_buffer *rxb)
 		clear_bit(STATUS_HCMD_ACTIVE, &priv->status);
 		IWL_DEBUG_INFO(priv, "Clearing HCMD_ACTIVE for command %s\n",
 			       iwl_legacy_get_cmd_string(cmd->hdr.cmd));
-		wake_up_interruptible(&priv->wait_command_queue);
+		wake_up(&priv->wait_command_queue);
 	}
 
 	/* Mark as unmapped */
diff --git a/drivers/net/wireless/iwlegacy/iwl3945-base.c b/drivers/net/wireless/iwlegacy/iwl3945-base.c
index 795826a014ed..66ee15629a76 100644
--- a/drivers/net/wireless/iwlegacy/iwl3945-base.c
+++ b/drivers/net/wireless/iwlegacy/iwl3945-base.c
@@ -841,7 +841,7 @@ static void iwl3945_rx_card_state_notif(struct iwl_priv *priv,
 		wiphy_rfkill_set_hw_state(priv->hw->wiphy,
 				test_bit(STATUS_RF_KILL_HW, &priv->status));
 	else
-		wake_up_interruptible(&priv->wait_command_queue);
+		wake_up(&priv->wait_command_queue);
 }
 
 /**
@@ -2269,7 +2269,7 @@ static void iwl3945_alive_start(struct iwl_priv *priv)
 	iwl3945_reg_txpower_periodic(priv);
 
 	IWL_DEBUG_INFO(priv, "ALIVE processing complete.\n");
-	wake_up_interruptible(&priv->wait_command_queue);
+	wake_up(&priv->wait_command_queue);
 
 	return;
 
@@ -2300,7 +2300,7 @@ static void __iwl3945_down(struct iwl_priv *priv)
 	iwl_legacy_clear_driver_stations(priv);
 
 	/* Unblock any waiting calls */
-	wake_up_interruptible_all(&priv->wait_command_queue);
+	wake_up_all(&priv->wait_command_queue);
 
 	/* Wipe out the EXIT_PENDING status bit if we are not actually
 	 * exiting the module */
@@ -2853,7 +2853,7 @@ static int iwl3945_mac_start(struct ieee80211_hw *hw)
 
 	/* Wait for START_ALIVE from ucode. Otherwise callbacks from
 	 * mac80211 will not be run successfully. */
-	ret = wait_event_interruptible_timeout(priv->wait_command_queue,
+	ret = wait_event_timeout(priv->wait_command_queue,
 			test_bit(STATUS_READY, &priv->status),
 			UCODE_READY_TIMEOUT);
 	if (!ret) {
diff --git a/drivers/net/wireless/iwlegacy/iwl4965-base.c b/drivers/net/wireless/iwlegacy/iwl4965-base.c
index 14334668034e..aa0c2539761e 100644
--- a/drivers/net/wireless/iwlegacy/iwl4965-base.c
+++ b/drivers/net/wireless/iwlegacy/iwl4965-base.c
@@ -576,7 +576,7 @@ static void iwl4965_rx_card_state_notif(struct iwl_priv *priv,
 		wiphy_rfkill_set_hw_state(priv->hw->wiphy,
 			test_bit(STATUS_RF_KILL_HW, &priv->status));
 	else
-		wake_up_interruptible(&priv->wait_command_queue);
+		wake_up(&priv->wait_command_queue);
 }
 
 /**
@@ -926,7 +926,7 @@ static void iwl4965_irq_tasklet(struct iwl_priv *priv)
 		handled |= CSR_INT_BIT_FH_TX;
 		/* Wake up uCode load routine, now that load is complete */
 		priv->ucode_write_complete = 1;
-		wake_up_interruptible(&priv->wait_command_queue);
+		wake_up(&priv->wait_command_queue);
 	}
 
 	if (inta & ~handled) {
@@ -1795,7 +1795,7 @@ static void iwl4965_alive_start(struct iwl_priv *priv)
 	iwl4965_rf_kill_ct_config(priv);
 
 	IWL_DEBUG_INFO(priv, "ALIVE processing complete.\n");
-	wake_up_interruptible(&priv->wait_command_queue);
+	wake_up(&priv->wait_command_queue);
 
 	iwl_legacy_power_update_mode(priv, true);
 	IWL_DEBUG_INFO(priv, "Updated power mode\n");
@@ -1828,7 +1828,7 @@ static void __iwl4965_down(struct iwl_priv *priv)
 	iwl_legacy_clear_driver_stations(priv);
 
 	/* Unblock any waiting calls */
-	wake_up_interruptible_all(&priv->wait_command_queue);
+	wake_up_all(&priv->wait_command_queue);
 
 	/* Wipe out the EXIT_PENDING status bit if we are not actually
 	 * exiting the module */
@@ -2266,7 +2266,7 @@ int iwl4965_mac_start(struct ieee80211_hw *hw)
 
 	/* Wait for START_ALIVE from Run Time ucode. Otherwise callbacks from
 	 * mac80211 will not be run successfully. */
-	ret = wait_event_interruptible_timeout(priv->wait_command_queue,
+	ret = wait_event_timeout(priv->wait_command_queue,
 			test_bit(STATUS_READY, &priv->status),
 			UCODE_READY_TIMEOUT);
 	if (!ret) {
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-ucode.c b/drivers/net/wireless/iwlwifi/iwl-agn-ucode.c
index a895a099d086..56211006a182 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-ucode.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-ucode.c
@@ -167,7 +167,7 @@ static int iwlagn_set_temperature_offset_calib(struct iwl_priv *priv)
 
 	memset(&cmd, 0, sizeof(cmd));
 	iwl_set_calib_hdr(&cmd.hdr, IWL_PHY_CALIBRATE_TEMP_OFFSET_CMD);
-	memcpy(&cmd.radio_sensor_offset, offset_calib, sizeof(offset_calib));
+	memcpy(&cmd.radio_sensor_offset, offset_calib, sizeof(*offset_calib));
 	if (!(cmd.radio_sensor_offset))
 		cmd.radio_sensor_offset = DEFAULT_RADIO_SENSOR_OFFSET;
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c
index b0ae4de7f083..f9c3cd95d614 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn.c
@@ -2140,7 +2140,12 @@ static int iwl_mac_setup_register(struct iwl_priv *priv,
 		    IEEE80211_HW_SPECTRUM_MGMT |
 		    IEEE80211_HW_REPORTS_TX_ACK_STATUS;
 
+	/*
+	 * Including the following line will crash some AP's.  This
+	 * workaround removes the stimulus which causes the crash until
+	 * the AP software can be fixed.
 	hw->max_tx_aggregation_subframes = LINK_QUAL_AGG_FRAME_LIMIT_DEF;
+	 */
 
 	hw->flags |= IEEE80211_HW_SUPPORTS_PS |
 		     IEEE80211_HW_SUPPORTS_DYNAMIC_PS;
diff --git a/drivers/net/wireless/iwlwifi/iwl-scan.c b/drivers/net/wireless/iwlwifi/iwl-scan.c
index dd6937e97055..77e528f5db88 100644
--- a/drivers/net/wireless/iwlwifi/iwl-scan.c
+++ b/drivers/net/wireless/iwlwifi/iwl-scan.c
@@ -405,31 +405,33 @@ int iwl_mac_hw_scan(struct ieee80211_hw *hw,
 
 	mutex_lock(&priv->mutex);
 
-	if (test_bit(STATUS_SCANNING, &priv->status) &&
-	    priv->scan_type != IWL_SCAN_NORMAL) {
-		IWL_DEBUG_SCAN(priv, "Scan already in progress.\n");
-		ret = -EAGAIN;
-		goto out_unlock;
-	}
-
-	/* mac80211 will only ask for one band at a time */
-	priv->scan_request = req;
-	priv->scan_vif = vif;
-
 	/*
 	 * If an internal scan is in progress, just set
 	 * up the scan_request as per above.
 	 */
 	if (priv->scan_type != IWL_SCAN_NORMAL) {
-		IWL_DEBUG_SCAN(priv, "SCAN request during internal scan\n");
+		IWL_DEBUG_SCAN(priv,
+			       "SCAN request during internal scan - defer\n");
+		priv->scan_request = req;
+		priv->scan_vif = vif;
 		ret = 0;
-	} else
+	} else {
+		priv->scan_request = req;
+		priv->scan_vif = vif;
+		/*
+		 * mac80211 will only ask for one band at a time
+		 * so using channels[0] here is ok
+		 */
 		ret = iwl_scan_initiate(priv, vif, IWL_SCAN_NORMAL,
 					req->channels[0]->band);
+		if (ret) {
+			priv->scan_request = NULL;
+			priv->scan_vif = NULL;
+		}
+	}
 
 	IWL_DEBUG_MAC80211(priv, "leave\n");
 
-out_unlock:
 	mutex_unlock(&priv->mutex);
 
 	return ret;
diff --git a/drivers/net/wireless/iwlwifi/iwl-trans-tx-pcie.c b/drivers/net/wireless/iwlwifi/iwl-trans-tx-pcie.c
index a6b2b1db0b1d..222d410c586e 100644
--- a/drivers/net/wireless/iwlwifi/iwl-trans-tx-pcie.c
+++ b/drivers/net/wireless/iwlwifi/iwl-trans-tx-pcie.c
@@ -771,6 +771,8 @@ void iwl_tx_cmd_complete(struct iwl_priv *priv, struct iwl_rx_mem_buffer *rxb)
 	cmd = txq->cmd[cmd_index];
 	meta = &txq->meta[cmd_index];
 
+	txq->time_stamp = jiffies;
+
 	iwlagn_unmap_tfd(priv, meta, &txq->tfds[index], DMA_BIDIRECTIONAL);
 
 	/* Input error checking is done when commands are added to queue. */
diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c
index ef67f6786a84..0019dfd8fb01 100644
--- a/drivers/net/wireless/rt2x00/rt2800lib.c
+++ b/drivers/net/wireless/rt2x00/rt2800lib.c
@@ -3697,14 +3697,15 @@ static void rt2800_efuse_read(struct rt2x00_dev *rt2x00dev, unsigned int i)
 	rt2800_regbusy_read(rt2x00dev, EFUSE_CTRL, EFUSE_CTRL_KICK, &reg);
 
 	/* Apparently the data is read from end to start */
-	rt2800_register_read_lock(rt2x00dev, EFUSE_DATA3,
-					(u32 *)&rt2x00dev->eeprom[i]);
-	rt2800_register_read_lock(rt2x00dev, EFUSE_DATA2,
-					(u32 *)&rt2x00dev->eeprom[i + 2]);
-	rt2800_register_read_lock(rt2x00dev, EFUSE_DATA1,
-					(u32 *)&rt2x00dev->eeprom[i + 4]);
-	rt2800_register_read_lock(rt2x00dev, EFUSE_DATA0,
-					(u32 *)&rt2x00dev->eeprom[i + 6]);
+	rt2800_register_read_lock(rt2x00dev, EFUSE_DATA3, &reg);
+	/* The returned value is in CPU order, but eeprom is le */
+	rt2x00dev->eeprom[i] = cpu_to_le32(reg);
+	rt2800_register_read_lock(rt2x00dev, EFUSE_DATA2, &reg);
+	*(u32 *)&rt2x00dev->eeprom[i + 2] = cpu_to_le32(reg);
+	rt2800_register_read_lock(rt2x00dev, EFUSE_DATA1, &reg);
+	*(u32 *)&rt2x00dev->eeprom[i + 4] = cpu_to_le32(reg);
+	rt2800_register_read_lock(rt2x00dev, EFUSE_DATA0, &reg);
+	*(u32 *)&rt2x00dev->eeprom[i + 6] = cpu_to_le32(reg);
 
 	mutex_unlock(&rt2x00dev->csr_mutex);
 }
@@ -3870,19 +3871,23 @@ int rt2800_init_eeprom(struct rt2x00_dev *rt2x00dev)
 		return -ENODEV;
 	}
 
-	if (!rt2x00_rf(rt2x00dev, RF2820) &&
-	    !rt2x00_rf(rt2x00dev, RF2850) &&
-	    !rt2x00_rf(rt2x00dev, RF2720) &&
-	    !rt2x00_rf(rt2x00dev, RF2750) &&
-	    !rt2x00_rf(rt2x00dev, RF3020) &&
-	    !rt2x00_rf(rt2x00dev, RF2020) &&
-	    !rt2x00_rf(rt2x00dev, RF3021) &&
-	    !rt2x00_rf(rt2x00dev, RF3022) &&
-	    !rt2x00_rf(rt2x00dev, RF3052) &&
-	    !rt2x00_rf(rt2x00dev, RF3320) &&
-	    !rt2x00_rf(rt2x00dev, RF5370) &&
-	    !rt2x00_rf(rt2x00dev, RF5390)) {
-		ERROR(rt2x00dev, "Invalid RF chipset detected.\n");
+	switch (rt2x00dev->chip.rf) {
+	case RF2820:
+	case RF2850:
+	case RF2720:
+	case RF2750:
+	case RF3020:
+	case RF2020:
+	case RF3021:
+	case RF3022:
+	case RF3052:
+	case RF3320:
+	case RF5370:
+	case RF5390:
+		break;
+	default:
+		ERROR(rt2x00dev, "Invalid RF chipset 0x%x detected.\n",
+		      rt2x00dev->chip.rf);
 		return -ENODEV;
 	}
 
diff --git a/drivers/net/wireless/rtlwifi/core.c b/drivers/net/wireless/rtlwifi/core.c
index 1bdc1aa305c0..04c4e9eb6ee6 100644
--- a/drivers/net/wireless/rtlwifi/core.c
+++ b/drivers/net/wireless/rtlwifi/core.c
@@ -610,6 +610,11 @@ static void rtl_op_bss_info_changed(struct ieee80211_hw *hw,
 
 			mac->link_state = MAC80211_NOLINK;
 			memset(mac->bssid, 0, 6);
+
+			/* reset sec info */
+			rtl_cam_reset_sec_info(hw);
+
+			rtl_cam_reset_all_entry(hw);
 			mac->vendor = PEER_UNKNOWN;
 
 			RT_TRACE(rtlpriv, COMP_MAC80211, DBG_DMESG,
@@ -1063,6 +1068,9 @@ static int rtl_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 		 *or clear all entry here.
 		 */
 		rtl_cam_delete_one_entry(hw, mac_addr, key_idx);
+
+		rtl_cam_reset_sec_info(hw);
+
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c b/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c
index 906e7aa55bc3..3e52a5496224 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c
@@ -549,15 +549,16 @@ void rtl92cu_tx_fill_desc(struct ieee80211_hw *hw,
 			       (tcb_desc->rts_use_shortpreamble ? 1 : 0)
 			       : (tcb_desc->rts_use_shortgi ? 1 : 0)));
 	if (mac->bw_40) {
-		if (tcb_desc->packet_bw) {
+		if (rate_flag & IEEE80211_TX_RC_DUP_DATA) {
 			SET_TX_DESC_DATA_BW(txdesc, 1);
 			SET_TX_DESC_DATA_SC(txdesc, 3);
+		} else if(rate_flag & IEEE80211_TX_RC_40_MHZ_WIDTH){
+			SET_TX_DESC_DATA_BW(txdesc, 1);
+			SET_TX_DESC_DATA_SC(txdesc, mac->cur_40_prime_sc);
 		} else {
 			SET_TX_DESC_DATA_BW(txdesc, 0);
-				if (rate_flag & IEEE80211_TX_RC_DUP_DATA)
-					SET_TX_DESC_DATA_SC(txdesc,
-							  mac->cur_40_prime_sc);
-			}
+			SET_TX_DESC_DATA_SC(txdesc, 0);
+		}
 	} else {
 		SET_TX_DESC_DATA_BW(txdesc, 0);
 		SET_TX_DESC_DATA_SC(txdesc, 0);
diff --git a/drivers/net/wireless/rtlwifi/usb.c b/drivers/net/wireless/rtlwifi/usb.c
index 8b1cef0ffde6..4bf3cf457ef0 100644
--- a/drivers/net/wireless/rtlwifi/usb.c
+++ b/drivers/net/wireless/rtlwifi/usb.c
@@ -863,6 +863,7 @@ static void _rtl_usb_tx_preprocess(struct ieee80211_hw *hw, struct sk_buff *skb,
 	u8 tid = 0;
 	u16 seq_number = 0;
 
+	memset(&tcb_desc, 0, sizeof(struct rtl_tcb_desc));
 	if (ieee80211_is_auth(fc)) {
 		RT_TRACE(rtlpriv, COMP_SEND, DBG_DMESG, ("MAC80211_LINKING\n"));
 		rtl_ips_nic_on(hw);
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 0ca86f9ec4ed..182562952c79 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -327,12 +327,12 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
 	xenvif_get(vif);
 
 	rtnl_lock();
-	if (netif_running(vif->dev))
-		xenvif_up(vif);
 	if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN)
 		dev_set_mtu(vif->dev, ETH_DATA_LEN);
 	netdev_update_features(vif->dev);
 	netif_carrier_on(vif->dev);
+	if (netif_running(vif->dev))
+		xenvif_up(vif);
 	rtnl_unlock();
 
 	return 0;
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 4e84fd4a4312..e9651f0a8817 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -77,7 +77,7 @@ unsigned long pci_cardbus_mem_size = DEFAULT_CARDBUS_MEM_SIZE;
 unsigned long pci_hotplug_io_size  = DEFAULT_HOTPLUG_IO_SIZE;
 unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE;
 
-enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_SAFE;
+enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_TUNE_OFF;
 
 /*
  * The default CLS is used if arch didn't set CLS explicitly and not
@@ -3568,10 +3568,14 @@ static int __init pci_setup(char *str)
 				pci_hotplug_io_size = memparse(str + 9, &str);
 			} else if (!strncmp(str, "hpmemsize=", 10)) {
 				pci_hotplug_mem_size = memparse(str + 10, &str);
+			} else if (!strncmp(str, "pcie_bus_tune_off", 17)) {
+				pcie_bus_config = PCIE_BUS_TUNE_OFF;
 			} else if (!strncmp(str, "pcie_bus_safe", 13)) {
 				pcie_bus_config = PCIE_BUS_SAFE;
 			} else if (!strncmp(str, "pcie_bus_perf", 13)) {
 				pcie_bus_config = PCIE_BUS_PERFORMANCE;
+			} else if (!strncmp(str, "pcie_bus_peer2peer", 18)) {
+				pcie_bus_config = PCIE_BUS_PEER2PEER;
 			} else {
 				printk(KERN_ERR "PCI: Unknown option `%s'\n",
 						str);
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index b1187ff31d89..6ab6bd3df4b2 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1351,7 +1351,8 @@ static int pcie_find_smpss(struct pci_dev *dev, void *data)
 	 * will occur as normal.
 	 */
 	if (dev->is_hotplug_bridge && (!list_is_singular(&dev->bus->devices) ||
-	    dev->bus->self->pcie_type != PCI_EXP_TYPE_ROOT_PORT))
+	     (dev->bus->self &&
+	      dev->bus->self->pcie_type != PCI_EXP_TYPE_ROOT_PORT)))
 		*smpss = 0;
 
 	if (*smpss > dev->pcie_mpss)
@@ -1457,12 +1458,24 @@ static int pcie_bus_configure_set(struct pci_dev *dev, void *data)
  */
 void pcie_bus_configure_settings(struct pci_bus *bus, u8 mpss)
 {
-	u8 smpss = mpss;
+	u8 smpss;
 
 	if (!pci_is_pcie(bus->self))
 		return;
 
+	if (pcie_bus_config == PCIE_BUS_TUNE_OFF)
+		return;
+
+	/* FIXME - Peer to peer DMA is possible, though the endpoint would need
+	 * to be aware to the MPS of the destination.  To work around this,
+	 * simply force the MPS of the entire system to the smallest possible.
+	 */
+	if (pcie_bus_config == PCIE_BUS_PEER2PEER)
+		smpss = 0;
+
 	if (pcie_bus_config == PCIE_BUS_SAFE) {
+		smpss = mpss;
+
 		pcie_find_smpss(bus->self, &smpss);
 		pci_walk_bus(bus, pcie_find_smpss, &smpss);
 	}
diff --git a/drivers/rtc/rtc-imxdi.c b/drivers/rtc/rtc-imxdi.c
index 2dd3c0163272..d93a9608b1f0 100644
--- a/drivers/rtc/rtc-imxdi.c
+++ b/drivers/rtc/rtc-imxdi.c
@@ -35,6 +35,7 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
+#include <linux/sched.h>
 #include <linux/workqueue.h>
 
 /* DryIce Register Definitions */
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index 4e7c04e773e0..7639ab906f02 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -51,6 +51,27 @@ static enum s3c_cpu_type s3c_rtc_cpu_type;
 
 static DEFINE_SPINLOCK(s3c_rtc_pie_lock);
 
+static void s3c_rtc_alarm_clk_enable(bool enable)
+{
+	static DEFINE_SPINLOCK(s3c_rtc_alarm_clk_lock);
+	static bool alarm_clk_enabled;
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&s3c_rtc_alarm_clk_lock, irq_flags);
+	if (enable) {
+		if (!alarm_clk_enabled) {
+			clk_enable(rtc_clk);
+			alarm_clk_enabled = true;
+		}
+	} else {
+		if (alarm_clk_enabled) {
+			clk_disable(rtc_clk);
+			alarm_clk_enabled = false;
+		}
+	}
+	spin_unlock_irqrestore(&s3c_rtc_alarm_clk_lock, irq_flags);
+}
+
 /* IRQ Handlers */
 
 static irqreturn_t s3c_rtc_alarmirq(int irq, void *id)
@@ -64,6 +85,9 @@ static irqreturn_t s3c_rtc_alarmirq(int irq, void *id)
 		writeb(S3C2410_INTP_ALM, s3c_rtc_base + S3C2410_INTP);
 
 	clk_disable(rtc_clk);
+
+	s3c_rtc_alarm_clk_enable(false);
+
 	return IRQ_HANDLED;
 }
 
@@ -97,6 +121,8 @@ static int s3c_rtc_setaie(struct device *dev, unsigned int enabled)
 	writeb(tmp, s3c_rtc_base + S3C2410_RTCALM);
 	clk_disable(rtc_clk);
 
+	s3c_rtc_alarm_clk_enable(enabled);
+
 	return 0;
 }
 
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index cbde448f9947..eb3140ee821e 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -654,8 +654,8 @@ static struct io_subchannel_private console_priv;
 static int console_subchannel_in_use;
 
 /*
- * Use tpi to get a pending interrupt, call the interrupt handler and
- * return a pointer to the subchannel structure.
+ * Use cio_tpi to get a pending interrupt and call the interrupt handler.
+ * Return non-zero if an interrupt was processed, zero otherwise.
  */
 static int cio_tpi(void)
 {
@@ -667,6 +667,10 @@ static int cio_tpi(void)
 	tpi_info = (struct tpi_info *)&S390_lowcore.subchannel_id;
 	if (tpi(NULL) != 1)
 		return 0;
+	if (tpi_info->adapter_IO) {
+		do_adapter_IO(tpi_info->isc);
+		return 1;
+	}
 	irb = (struct irb *)&S390_lowcore.irb;
 	/* Store interrupt response block to lowcore. */
 	if (tsch(tpi_info->schid, irb) != 0)
diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c
index b7bd5b0cc7aa..3868ab2397c6 100644
--- a/drivers/scsi/3w-9xxx.c
+++ b/drivers/scsi/3w-9xxx.c
@@ -1800,10 +1800,12 @@ static int twa_scsi_queue_lck(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_
 	switch (retval) {
 	case SCSI_MLQUEUE_HOST_BUSY:
 		twa_free_request_id(tw_dev, request_id);
+		twa_unmap_scsi_data(tw_dev, request_id);
 		break;
 	case 1:
 		tw_dev->state[request_id] = TW_S_COMPLETED;
 		twa_free_request_id(tw_dev, request_id);
+		twa_unmap_scsi_data(tw_dev, request_id);
 		SCpnt->result = (DID_ERROR << 16);
 		done(SCpnt);
 		retval = 0;
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 8d9dae89f065..3878b7395081 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -837,6 +837,7 @@ config SCSI_ISCI
 	# (temporary): known alpha quality driver
 	depends on EXPERIMENTAL
 	select SCSI_SAS_LIBSAS
+	select SCSI_SAS_HOST_SMP
 	---help---
 	  This driver supports the 6Gb/s SAS capabilities of the storage
 	  control unit found in the Intel(R) C600 series chipset.
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 3c08f5352b2d..6153a66a8a31 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -88,7 +88,7 @@ obj-$(CONFIG_SCSI_QLOGIC_FAS)	+= qlogicfas408.o	qlogicfas.o
 obj-$(CONFIG_PCMCIA_QLOGIC)	+= qlogicfas408.o
 obj-$(CONFIG_SCSI_QLOGIC_1280)	+= qla1280.o 
 obj-$(CONFIG_SCSI_QLA_FC)	+= qla2xxx/
-obj-$(CONFIG_SCSI_QLA_ISCSI)	+= qla4xxx/
+obj-$(CONFIG_SCSI_QLA_ISCSI)	+= libiscsi.o qla4xxx/
 obj-$(CONFIG_SCSI_LPFC)		+= lpfc/
 obj-$(CONFIG_SCSI_BFA_FC)	+= bfa/
 obj-$(CONFIG_SCSI_PAS16)	+= pas16.o
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index e7d0d47b9185..e5f2d7d9002e 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -1283,6 +1283,8 @@ static int _aac_reset_adapter(struct aac_dev *aac, int forced)
 	kfree(aac->queues);
 	aac->queues = NULL;
 	free_irq(aac->pdev->irq, aac);
+	if (aac->msi)
+		pci_disable_msi(aac->pdev);
 	kfree(aac->fsa_dev);
 	aac->fsa_dev = NULL;
 	quirks = aac_get_driver_ident(index)->quirks;
diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c
index 9ae80cd5953b..dba72a4e6a1c 100644
--- a/drivers/scsi/bnx2i/bnx2i_hwi.c
+++ b/drivers/scsi/bnx2i/bnx2i_hwi.c
@@ -563,7 +563,7 @@ int bnx2i_send_iscsi_nopout(struct bnx2i_conn *bnx2i_conn,
 	nopout_wqe->itt = ((u16)task->itt |
 			   (ISCSI_TASK_TYPE_MPATH <<
 			    ISCSI_TMF_REQUEST_TYPE_SHIFT));
-	nopout_wqe->ttt = nopout_hdr->ttt;
+	nopout_wqe->ttt = be32_to_cpu(nopout_hdr->ttt);
 	nopout_wqe->flags = 0;
 	if (!unsol)
 		nopout_wqe->flags = ISCSI_NOP_OUT_REQUEST_LOCAL_COMPLETION;
diff --git a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
index bd22041e2789..f58644850333 100644
--- a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
+++ b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
@@ -913,7 +913,7 @@ static void l2t_put(struct cxgbi_sock *csk)
 	struct t3cdev *t3dev = (struct t3cdev *)csk->cdev->lldev;
 
 	if (csk->l2t) {
-		l2t_release(L2DATA(t3dev), csk->l2t);
+		l2t_release(t3dev, csk->l2t);
 		csk->l2t = NULL;
 		cxgbi_sock_put(csk);
 	}
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index ba710e350ac5..5d0e9a24ae94 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -432,6 +432,8 @@ void fcoe_interface_cleanup(struct fcoe_interface *fcoe)
 	u8 flogi_maddr[ETH_ALEN];
 	const struct net_device_ops *ops;
 
+	rtnl_lock();
+
 	/*
 	 * Don't listen for Ethernet packets anymore.
 	 * synchronize_net() ensures that the packet handlers are not running
@@ -461,6 +463,8 @@ void fcoe_interface_cleanup(struct fcoe_interface *fcoe)
 					" specific feature for LLD.\n");
 	}
 
+	rtnl_unlock();
+
 	/* Release the self-reference taken during fcoe_interface_create() */
 	fcoe_interface_put(fcoe);
 }
@@ -1951,11 +1955,8 @@ static void fcoe_destroy_work(struct work_struct *work)
 	fcoe_if_destroy(port->lport);
 
 	/* Do not tear down the fcoe interface for NPIV port */
-	if (!npiv) {
-		rtnl_lock();
+	if (!npiv)
 		fcoe_interface_cleanup(fcoe);
-		rtnl_unlock();
-	}
 
 	mutex_unlock(&fcoe_config_mutex);
 }
@@ -2009,8 +2010,9 @@ static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode)
 		printk(KERN_ERR "fcoe: Failed to create interface (%s)\n",
 		       netdev->name);
 		rc = -EIO;
+		rtnl_unlock();
 		fcoe_interface_cleanup(fcoe);
-		goto out_nodev;
+		goto out_nortnl;
 	}
 
 	/* Make this the "master" N_Port */
@@ -2027,6 +2029,7 @@ static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode)
 
 out_nodev:
 	rtnl_unlock();
+out_nortnl:
 	mutex_unlock(&fcoe_config_mutex);
 	return rc;
 }
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index ec61bdb833ac..b200b736b000 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -676,6 +676,16 @@ static void hpsa_scsi_replace_entry(struct ctlr_info *h, int hostno,
 	BUG_ON(entry < 0 || entry >= HPSA_MAX_SCSI_DEVS_PER_HBA);
 	removed[*nremoved] = h->dev[entry];
 	(*nremoved)++;
+
+	/*
+	 * New physical devices won't have target/lun assigned yet
+	 * so we need to preserve the values in the slot we are replacing.
+	 */
+	if (new_entry->target == -1) {
+		new_entry->target = h->dev[entry]->target;
+		new_entry->lun = h->dev[entry]->lun;
+	}
+
 	h->dev[entry] = new_entry;
 	added[*nadded] = new_entry;
 	(*nadded)++;
@@ -1548,10 +1558,17 @@ static inline void hpsa_set_bus_target_lun(struct hpsa_scsi_dev_t *device,
 }
 
 static int hpsa_update_device_info(struct ctlr_info *h,
-	unsigned char scsi3addr[], struct hpsa_scsi_dev_t *this_device)
+	unsigned char scsi3addr[], struct hpsa_scsi_dev_t *this_device,
+	unsigned char *is_OBDR_device)
 {
-#define OBDR_TAPE_INQ_SIZE 49
+
+#define OBDR_SIG_OFFSET 43
+#define OBDR_TAPE_SIG "$DR-10"
+#define OBDR_SIG_LEN (sizeof(OBDR_TAPE_SIG) - 1)
+#define OBDR_TAPE_INQ_SIZE (OBDR_SIG_OFFSET + OBDR_SIG_LEN)
+
 	unsigned char *inq_buff;
+	unsigned char *obdr_sig;
 
 	inq_buff = kzalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL);
 	if (!inq_buff)
@@ -1583,6 +1600,16 @@ static int hpsa_update_device_info(struct ctlr_info *h,
 	else
 		this_device->raid_level = RAID_UNKNOWN;
 
+	if (is_OBDR_device) {
+		/* See if this is a One-Button-Disaster-Recovery device
+		 * by looking for "$DR-10" at offset 43 in inquiry data.
+		 */
+		obdr_sig = &inq_buff[OBDR_SIG_OFFSET];
+		*is_OBDR_device = (this_device->devtype == TYPE_ROM &&
+					strncmp(obdr_sig, OBDR_TAPE_SIG,
+						OBDR_SIG_LEN) == 0);
+	}
+
 	kfree(inq_buff);
 	return 0;
 
@@ -1716,7 +1743,7 @@ static int add_msa2xxx_enclosure_device(struct ctlr_info *h,
 		return 0;
 	}
 
-	if (hpsa_update_device_info(h, scsi3addr, this_device))
+	if (hpsa_update_device_info(h, scsi3addr, this_device, NULL))
 		return 0;
 	(*nmsa2xxx_enclosures)++;
 	hpsa_set_bus_target_lun(this_device, bus, target, 0);
@@ -1808,7 +1835,6 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
 	 */
 	struct ReportLUNdata *physdev_list = NULL;
 	struct ReportLUNdata *logdev_list = NULL;
-	unsigned char *inq_buff = NULL;
 	u32 nphysicals = 0;
 	u32 nlogicals = 0;
 	u32 ndev_allocated = 0;
@@ -1824,11 +1850,9 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
 		GFP_KERNEL);
 	physdev_list = kzalloc(reportlunsize, GFP_KERNEL);
 	logdev_list = kzalloc(reportlunsize, GFP_KERNEL);
-	inq_buff = kmalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL);
 	tmpdevice = kzalloc(sizeof(*tmpdevice), GFP_KERNEL);
 
-	if (!currentsd || !physdev_list || !logdev_list ||
-		!inq_buff || !tmpdevice) {
+	if (!currentsd || !physdev_list || !logdev_list || !tmpdevice) {
 		dev_err(&h->pdev->dev, "out of memory\n");
 		goto out;
 	}
@@ -1863,7 +1887,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
 	/* adjust our table of devices */
 	nmsa2xxx_enclosures = 0;
 	for (i = 0; i < nphysicals + nlogicals + 1; i++) {
-		u8 *lunaddrbytes;
+		u8 *lunaddrbytes, is_OBDR = 0;
 
 		/* Figure out where the LUN ID info is coming from */
 		lunaddrbytes = figure_lunaddrbytes(h, raid_ctlr_position,
@@ -1874,7 +1898,8 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
 			continue;
 
 		/* Get device type, vendor, model, device id */
-		if (hpsa_update_device_info(h, lunaddrbytes, tmpdevice))
+		if (hpsa_update_device_info(h, lunaddrbytes, tmpdevice,
+							&is_OBDR))
 			continue; /* skip it if we can't talk to it. */
 		figure_bus_target_lun(h, lunaddrbytes, &bus, &target, &lun,
 			tmpdevice);
@@ -1898,7 +1923,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
 		hpsa_set_bus_target_lun(this_device, bus, target, lun);
 
 		switch (this_device->devtype) {
-		case TYPE_ROM: {
+		case TYPE_ROM:
 			/* We don't *really* support actual CD-ROM devices,
 			 * just "One Button Disaster Recovery" tape drive
 			 * which temporarily pretends to be a CD-ROM drive.
@@ -1906,15 +1931,8 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
 			 * device by checking for "$DR-10" in bytes 43-48 of
 			 * the inquiry data.
 			 */
-				char obdr_sig[7];
-#define OBDR_TAPE_SIG "$DR-10"
-				strncpy(obdr_sig, &inq_buff[43], 6);
-				obdr_sig[6] = '\0';
-				if (strncmp(obdr_sig, OBDR_TAPE_SIG, 6) != 0)
-					/* Not OBDR device, ignore it. */
-					break;
-			}
-			ncurrent++;
+			if (is_OBDR)
+				ncurrent++;
 			break;
 		case TYPE_DISK:
 			if (i < nphysicals)
@@ -1947,7 +1965,6 @@ out:
 	for (i = 0; i < ndev_allocated; i++)
 		kfree(currentsd[i]);
 	kfree(currentsd);
-	kfree(inq_buff);
 	kfree(physdev_list);
 	kfree(logdev_list);
 }
diff --git a/drivers/scsi/isci/host.c b/drivers/scsi/isci/host.c
index 26072f1e9852..6981b773a88d 100644
--- a/drivers/scsi/isci/host.c
+++ b/drivers/scsi/isci/host.c
@@ -531,6 +531,9 @@ static void sci_controller_process_completions(struct isci_host *ihost)
 			break;
 
 		case SCU_COMPLETION_TYPE_EVENT:
+			sci_controller_event_completion(ihost, ent);
+			break;
+
 		case SCU_COMPLETION_TYPE_NOTIFY: {
 			event_cycle ^= ((event_get+1) & SCU_MAX_EVENTS) <<
 				       (SMU_COMPLETION_QUEUE_GET_EVENT_CYCLE_BIT_SHIFT - SCU_MAX_EVENTS_SHIFT);
@@ -1091,6 +1094,7 @@ static void isci_host_completion_routine(unsigned long data)
 	struct isci_request *request;
 	struct isci_request *next_request;
 	struct sas_task     *task;
+	u16 active;
 
 	INIT_LIST_HEAD(&completed_request_list);
 	INIT_LIST_HEAD(&errored_request_list);
@@ -1181,6 +1185,13 @@ static void isci_host_completion_routine(unsigned long data)
 		}
 	}
 
+	/* the coalesence timeout doubles at each encoding step, so
+	 * update it based on the ilog2 value of the outstanding requests
+	 */
+	active = isci_tci_active(ihost);
+	writel(SMU_ICC_GEN_VAL(NUMBER, active) |
+	       SMU_ICC_GEN_VAL(TIMER, ISCI_COALESCE_BASE + ilog2(active)),
+	       &ihost->smu_registers->interrupt_coalesce_control);
 }
 
 /**
@@ -1471,7 +1482,7 @@ static void sci_controller_ready_state_enter(struct sci_base_state_machine *sm)
 	struct isci_host *ihost = container_of(sm, typeof(*ihost), sm);
 
 	/* set the default interrupt coalescence number and timeout value. */
-	sci_controller_set_interrupt_coalescence(ihost, 0x10, 250);
+	sci_controller_set_interrupt_coalescence(ihost, 0, 0);
 }
 
 static void sci_controller_ready_state_exit(struct sci_base_state_machine *sm)
diff --git a/drivers/scsi/isci/host.h b/drivers/scsi/isci/host.h
index 062101a39f79..9f33831a2f04 100644
--- a/drivers/scsi/isci/host.h
+++ b/drivers/scsi/isci/host.h
@@ -369,6 +369,9 @@ static inline struct isci_host *dev_to_ihost(struct domain_device *dev)
 #define ISCI_TAG_SEQ(tag) (((tag) >> 12) & (SCI_MAX_SEQ-1))
 #define ISCI_TAG_TCI(tag) ((tag) & (SCI_MAX_IO_REQUESTS-1))
 
+/* interrupt coalescing baseline: 9 == 3 to 5us interrupt delay per command */
+#define ISCI_COALESCE_BASE 9
+
 /* expander attached sata devices require 3 rnc slots */
 static inline int sci_remote_device_node_count(struct isci_remote_device *idev)
 {
diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c
index 61e0d09e2b57..29aa34efb0f5 100644
--- a/drivers/scsi/isci/init.c
+++ b/drivers/scsi/isci/init.c
@@ -59,10 +59,19 @@
 #include <linux/firmware.h>
 #include <linux/efi.h>
 #include <asm/string.h>
+#include <scsi/scsi_host.h>
 #include "isci.h"
 #include "task.h"
 #include "probe_roms.h"
 
+#define MAJ 1
+#define MIN 0
+#define BUILD 0
+#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
+	__stringify(BUILD)
+
+MODULE_VERSION(DRV_VERSION);
+
 static struct scsi_transport_template *isci_transport_template;
 
 static DEFINE_PCI_DEVICE_TABLE(isci_id_table) = {
@@ -113,6 +122,22 @@ unsigned char max_concurr_spinup = 1;
 module_param(max_concurr_spinup, byte, 0);
 MODULE_PARM_DESC(max_concurr_spinup, "Max concurrent device spinup");
 
+static ssize_t isci_show_id(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct Scsi_Host *shost = container_of(dev, typeof(*shost), shost_dev);
+	struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(shost);
+	struct isci_host *ihost = container_of(sas_ha, typeof(*ihost), sas_ha);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", ihost->id);
+}
+
+static DEVICE_ATTR(isci_id, S_IRUGO, isci_show_id, NULL);
+
+struct device_attribute *isci_host_attrs[] = {
+	&dev_attr_isci_id,
+	NULL
+};
+
 static struct scsi_host_template isci_sht = {
 
 	.module				= THIS_MODULE,
@@ -138,6 +163,7 @@ static struct scsi_host_template isci_sht = {
 	.slave_alloc			= sas_slave_alloc,
 	.target_destroy			= sas_target_destroy,
 	.ioctl				= sas_ioctl,
+	.shost_attrs			= isci_host_attrs,
 };
 
 static struct sas_domain_function_template isci_transport_ops  = {
@@ -232,17 +258,6 @@ static int isci_register_sas_ha(struct isci_host *isci_host)
 	return 0;
 }
 
-static ssize_t isci_show_id(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct Scsi_Host *shost = container_of(dev, typeof(*shost), shost_dev);
-	struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(shost);
-	struct isci_host *ihost = container_of(sas_ha, typeof(*ihost), sas_ha);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", ihost->id);
-}
-
-static DEVICE_ATTR(isci_id, S_IRUGO, isci_show_id, NULL);
-
 static void isci_unregister(struct isci_host *isci_host)
 {
 	struct Scsi_Host *shost;
@@ -251,7 +266,6 @@ static void isci_unregister(struct isci_host *isci_host)
 		return;
 
 	shost = isci_host->shost;
-	device_remove_file(&shost->shost_dev, &dev_attr_isci_id);
 
 	sas_unregister_ha(&isci_host->sas_ha);
 
@@ -415,14 +429,8 @@ static struct isci_host *isci_host_alloc(struct pci_dev *pdev, int id)
 	if (err)
 		goto err_shost_remove;
 
-	err = device_create_file(&shost->shost_dev, &dev_attr_isci_id);
-	if (err)
-		goto err_unregister_ha;
-
 	return isci_host;
 
- err_unregister_ha:
-	sas_unregister_ha(&(isci_host->sas_ha));
  err_shost_remove:
 	scsi_remove_host(shost);
  err_shost:
@@ -540,7 +548,8 @@ static __init int isci_init(void)
 {
 	int err;
 
-	pr_info("%s: Intel(R) C600 SAS Controller Driver\n", DRV_NAME);
+	pr_info("%s: Intel(R) C600 SAS Controller Driver - version %s\n",
+		DRV_NAME, DRV_VERSION);
 
 	isci_transport_template = sas_domain_attach_transport(&isci_transport_ops);
 	if (!isci_transport_template)
diff --git a/drivers/scsi/isci/phy.c b/drivers/scsi/isci/phy.c
index 79313a7a2356..430fc8ff014a 100644
--- a/drivers/scsi/isci/phy.c
+++ b/drivers/scsi/isci/phy.c
@@ -104,6 +104,7 @@ sci_phy_link_layer_initialization(struct isci_phy *iphy,
 	u32 parity_count = 0;
 	u32 llctl, link_rate;
 	u32 clksm_value = 0;
+	u32 sp_timeouts = 0;
 
 	iphy->link_layer_registers = reg;
 
@@ -211,6 +212,18 @@ sci_phy_link_layer_initialization(struct isci_phy *iphy,
 	llctl |= SCU_SAS_LLCTL_GEN_VAL(MAX_LINK_RATE, link_rate);
 	writel(llctl, &iphy->link_layer_registers->link_layer_control);
 
+	sp_timeouts = readl(&iphy->link_layer_registers->sas_phy_timeouts);
+
+	/* Clear the default 0x36 (54us) RATE_CHANGE timeout value. */
+	sp_timeouts &= ~SCU_SAS_PHYTOV_GEN_VAL(RATE_CHANGE, 0xFF);
+
+	/* Set RATE_CHANGE timeout value to 0x3B (59us).  This ensures SCU can
+	 * lock with 3Gb drive when SCU max rate is set to 1.5Gb.
+	 */
+	sp_timeouts |= SCU_SAS_PHYTOV_GEN_VAL(RATE_CHANGE, 0x3B);
+
+	writel(sp_timeouts, &iphy->link_layer_registers->sas_phy_timeouts);
+
 	if (is_a2(ihost->pdev)) {
 		/* Program the max ARB time for the PHY to 700us so we inter-operate with
 		 * the PMC expander which shuts down PHYs if the expander PHY generates too
diff --git a/drivers/scsi/isci/registers.h b/drivers/scsi/isci/registers.h
index 9b266c7428e8..00afc738bbed 100644
--- a/drivers/scsi/isci/registers.h
+++ b/drivers/scsi/isci/registers.h
@@ -1299,6 +1299,18 @@ struct scu_transport_layer_registers {
 #define SCU_AFE_XCVRCR_OFFSET       0x00DC
 #define SCU_AFE_LUTCR_OFFSET        0x00E0
 
+#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_ALIGN_DETECTION_SHIFT          (0UL)
+#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_ALIGN_DETECTION_MASK           (0x000000FFUL)
+#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_HOT_PLUG_SHIFT                 (8UL)
+#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_HOT_PLUG_MASK                  (0x0000FF00UL)
+#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_COMSAS_DETECTION_SHIFT         (16UL)
+#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_COMSAS_DETECTION_MASK          (0x00FF0000UL)
+#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_RATE_CHANGE_SHIFT              (24UL)
+#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_RATE_CHANGE_MASK               (0xFF000000UL)
+
+#define SCU_SAS_PHYTOV_GEN_VAL(name, value) \
+	SCU_GEN_VALUE(SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_##name, value)
+
 #define SCU_SAS_LINK_LAYER_CONTROL_MAX_LINK_RATE_SHIFT                  (0)
 #define SCU_SAS_LINK_LAYER_CONTROL_MAX_LINK_RATE_MASK                   (0x00000003)
 #define SCU_SAS_LINK_LAYER_CONTROL_MAX_LINK_RATE_GEN1                   (0)
diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c
index a46e07ac789f..b5d3a8c4d329 100644
--- a/drivers/scsi/isci/request.c
+++ b/drivers/scsi/isci/request.c
@@ -732,12 +732,20 @@ sci_io_request_terminate(struct isci_request *ireq)
 		sci_change_state(&ireq->sm, SCI_REQ_ABORTING);
 		return SCI_SUCCESS;
 	case SCI_REQ_TASK_WAIT_TC_RESP:
+		/* The task frame was already confirmed to have been
+		 * sent by the SCU HW.  Since the state machine is
+		 * now only waiting for the task response itself,
+		 * abort the request and complete it immediately
+		 * and don't wait for the task response.
+		 */
 		sci_change_state(&ireq->sm, SCI_REQ_ABORTING);
 		sci_change_state(&ireq->sm, SCI_REQ_COMPLETED);
 		return SCI_SUCCESS;
 	case SCI_REQ_ABORTING:
-		sci_change_state(&ireq->sm, SCI_REQ_COMPLETED);
-		return SCI_SUCCESS;
+		/* If a request has a termination requested twice, return
+		 * a failure indication, since HW confirmation of the first
+		 * abort is still outstanding.
+		 */
 	case SCI_REQ_COMPLETED:
 	default:
 		dev_warn(&ireq->owning_controller->pdev->dev,
@@ -2399,22 +2407,19 @@ static void isci_task_save_for_upper_layer_completion(
 	}
 }
 
-static void isci_request_process_stp_response(struct sas_task *task,
-					      void *response_buffer)
+static void isci_process_stp_response(struct sas_task *task, struct dev_to_host_fis *fis)
 {
-	struct dev_to_host_fis *d2h_reg_fis = response_buffer;
 	struct task_status_struct *ts = &task->task_status;
 	struct ata_task_resp *resp = (void *)&ts->buf[0];
 
-	resp->frame_len = le16_to_cpu(*(__le16 *)(response_buffer + 6));
-	memcpy(&resp->ending_fis[0], response_buffer + 16, 24);
+	resp->frame_len = sizeof(*fis);
+	memcpy(resp->ending_fis, fis, sizeof(*fis));
 	ts->buf_valid_size = sizeof(*resp);
 
-	/**
-	 * If the device fault bit is set in the status register, then
+	/* If the device fault bit is set in the status register, then
 	 * set the sense data and return.
 	 */
-	if (d2h_reg_fis->status & ATA_DF)
+	if (fis->status & ATA_DF)
 		ts->stat = SAS_PROTO_RESPONSE;
 	else
 		ts->stat = SAM_STAT_GOOD;
@@ -2428,7 +2433,6 @@ static void isci_request_io_request_complete(struct isci_host *ihost,
 {
 	struct sas_task *task = isci_request_access_task(request);
 	struct ssp_response_iu *resp_iu;
-	void *resp_buf;
 	unsigned long task_flags;
 	struct isci_remote_device *idev = isci_lookup_device(task->dev);
 	enum service_response response       = SAS_TASK_UNDELIVERED;
@@ -2565,9 +2569,7 @@ static void isci_request_io_request_complete(struct isci_host *ihost,
 				task);
 
 			if (sas_protocol_ata(task->task_proto)) {
-				resp_buf = &request->stp.rsp;
-				isci_request_process_stp_response(task,
-								  resp_buf);
+				isci_process_stp_response(task, &request->stp.rsp);
 			} else if (SAS_PROTOCOL_SSP == task->task_proto) {
 
 				/* crack the iu response buffer. */
diff --git a/drivers/scsi/isci/unsolicited_frame_control.c b/drivers/scsi/isci/unsolicited_frame_control.c
index e9e1e2abacb9..16f88ab939c8 100644
--- a/drivers/scsi/isci/unsolicited_frame_control.c
+++ b/drivers/scsi/isci/unsolicited_frame_control.c
@@ -72,7 +72,7 @@ int sci_unsolicited_frame_control_construct(struct isci_host *ihost)
 	 */
 	buf_len = SCU_MAX_UNSOLICITED_FRAMES * SCU_UNSOLICITED_FRAME_BUFFER_SIZE;
 	header_len = SCU_MAX_UNSOLICITED_FRAMES * sizeof(struct scu_unsolicited_frame_header);
-	size = buf_len + header_len + SCU_MAX_UNSOLICITED_FRAMES * sizeof(dma_addr_t);
+	size = buf_len + header_len + SCU_MAX_UNSOLICITED_FRAMES * sizeof(uf_control->address_table.array[0]);
 
 	/*
 	 * The Unsolicited Frame buffers are set at the start of the UF
diff --git a/drivers/scsi/isci/unsolicited_frame_control.h b/drivers/scsi/isci/unsolicited_frame_control.h
index 31cb9506f52d..75d896686f5a 100644
--- a/drivers/scsi/isci/unsolicited_frame_control.h
+++ b/drivers/scsi/isci/unsolicited_frame_control.h
@@ -214,7 +214,7 @@ struct sci_uf_address_table_array {
 	 * starting address of the UF address table.
 	 * 64-bit pointers are required by the hardware.
 	 */
-	dma_addr_t *array;
+	u64 *array;
 
 	/**
 	 * This field specifies the physical address location for the UF
diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
index 01ff082dc34c..d261e982a2fa 100644
--- a/drivers/scsi/libfc/fc_exch.c
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -494,6 +494,9 @@ static int fc_seq_send(struct fc_lport *lport, struct fc_seq *sp,
 	 */
 	error = lport->tt.frame_send(lport, fp);
 
+	if (fh->fh_type == FC_TYPE_BLS)
+		return error;
+
 	/*
 	 * Update the exchange and sequence flags,
 	 * assuming all frames for the sequence have been sent.
@@ -575,42 +578,35 @@ static void fc_seq_set_resp(struct fc_seq *sp,
 }
 
 /**
- * fc_seq_exch_abort() - Abort an exchange and sequence
- * @req_sp:	The sequence to be aborted
+ * fc_exch_abort_locked() - Abort an exchange
+ * @ep:	The exchange to be aborted
  * @timer_msec: The period of time to wait before aborting
  *
- * Generally called because of a timeout or an abort from the upper layer.
+ * Locking notes:  Called with exch lock held
+ *
+ * Return value: 0 on success else error code
  */
-static int fc_seq_exch_abort(const struct fc_seq *req_sp,
-			     unsigned int timer_msec)
+static int fc_exch_abort_locked(struct fc_exch *ep,
+				unsigned int timer_msec)
 {
 	struct fc_seq *sp;
-	struct fc_exch *ep;
 	struct fc_frame *fp;
 	int error;
 
-	ep = fc_seq_exch(req_sp);
-
-	spin_lock_bh(&ep->ex_lock);
 	if (ep->esb_stat & (ESB_ST_COMPLETE | ESB_ST_ABNORMAL) ||
-	    ep->state & (FC_EX_DONE | FC_EX_RST_CLEANUP)) {
-		spin_unlock_bh(&ep->ex_lock);
+	    ep->state & (FC_EX_DONE | FC_EX_RST_CLEANUP))
 		return -ENXIO;
-	}
 
 	/*
 	 * Send the abort on a new sequence if possible.
 	 */
 	sp = fc_seq_start_next_locked(&ep->seq);
-	if (!sp) {
-		spin_unlock_bh(&ep->ex_lock);
+	if (!sp)
 		return -ENOMEM;
-	}
 
 	ep->esb_stat |= ESB_ST_SEQ_INIT | ESB_ST_ABNORMAL;
 	if (timer_msec)
 		fc_exch_timer_set_locked(ep, timer_msec);
-	spin_unlock_bh(&ep->ex_lock);
 
 	/*
 	 * If not logged into the fabric, don't send ABTS but leave
@@ -633,6 +629,28 @@ static int fc_seq_exch_abort(const struct fc_seq *req_sp,
 }
 
 /**
+ * fc_seq_exch_abort() - Abort an exchange and sequence
+ * @req_sp:	The sequence to be aborted
+ * @timer_msec: The period of time to wait before aborting
+ *
+ * Generally called because of a timeout or an abort from the upper layer.
+ *
+ * Return value: 0 on success else error code
+ */
+static int fc_seq_exch_abort(const struct fc_seq *req_sp,
+			     unsigned int timer_msec)
+{
+	struct fc_exch *ep;
+	int error;
+
+	ep = fc_seq_exch(req_sp);
+	spin_lock_bh(&ep->ex_lock);
+	error = fc_exch_abort_locked(ep, timer_msec);
+	spin_unlock_bh(&ep->ex_lock);
+	return error;
+}
+
+/**
  * fc_exch_timeout() - Handle exchange timer expiration
  * @work: The work_struct identifying the exchange that timed out
  */
@@ -1715,6 +1733,7 @@ static void fc_exch_reset(struct fc_exch *ep)
 	int rc = 1;
 
 	spin_lock_bh(&ep->ex_lock);
+	fc_exch_abort_locked(ep, 0);
 	ep->state |= FC_EX_RST_CLEANUP;
 	if (cancel_delayed_work(&ep->timeout_work))
 		atomic_dec(&ep->ex_refcnt);	/* drop hold for timer */
@@ -1962,6 +1981,7 @@ static struct fc_seq *fc_exch_seq_send(struct fc_lport *lport,
 	struct fc_exch *ep;
 	struct fc_seq *sp = NULL;
 	struct fc_frame_header *fh;
+	struct fc_fcp_pkt *fsp = NULL;
 	int rc = 1;
 
 	ep = fc_exch_alloc(lport, fp);
@@ -1984,8 +2004,10 @@ static struct fc_seq *fc_exch_seq_send(struct fc_lport *lport,
 	fc_exch_setup_hdr(ep, fp, ep->f_ctl);
 	sp->cnt++;
 
-	if (ep->xid <= lport->lro_xid && fh->fh_r_ctl == FC_RCTL_DD_UNSOL_CMD)
+	if (ep->xid <= lport->lro_xid && fh->fh_r_ctl == FC_RCTL_DD_UNSOL_CMD) {
+		fsp = fr_fsp(fp);
 		fc_fcp_ddp_setup(fr_fsp(fp), ep->xid);
+	}
 
 	if (unlikely(lport->tt.frame_send(lport, fp)))
 		goto err;
@@ -1999,7 +2021,8 @@ static struct fc_seq *fc_exch_seq_send(struct fc_lport *lport,
 	spin_unlock_bh(&ep->ex_lock);
 	return sp;
 err:
-	fc_fcp_ddp_done(fr_fsp(fp));
+	if (fsp)
+		fc_fcp_ddp_done(fsp);
 	rc = fc_exch_done_locked(ep);
 	spin_unlock_bh(&ep->ex_lock);
 	if (!rc)
diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
index afb63c843144..4c41ee816f0b 100644
--- a/drivers/scsi/libfc/fc_fcp.c
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -2019,6 +2019,11 @@ int fc_eh_abort(struct scsi_cmnd *sc_cmd)
 	struct fc_fcp_internal *si;
 	int rc = FAILED;
 	unsigned long flags;
+	int rval;
+
+	rval = fc_block_scsi_eh(sc_cmd);
+	if (rval)
+		return rval;
 
 	lport = shost_priv(sc_cmd->device->host);
 	if (lport->state != LPORT_ST_READY)
@@ -2068,9 +2073,9 @@ int fc_eh_device_reset(struct scsi_cmnd *sc_cmd)
 	int rc = FAILED;
 	int rval;
 
-	rval = fc_remote_port_chkready(rport);
+	rval = fc_block_scsi_eh(sc_cmd);
 	if (rval)
-		goto out;
+		return rval;
 
 	lport = shost_priv(sc_cmd->device->host);
 
@@ -2116,6 +2121,8 @@ int fc_eh_host_reset(struct scsi_cmnd *sc_cmd)
 
 	FC_SCSI_DBG(lport, "Resetting host\n");
 
+	fc_block_scsi_eh(sc_cmd);
+
 	lport->tt.lport_reset(lport);
 	wait_tmo = jiffies + FC_HOST_RESET_TIMEOUT;
 	while (!fc_fcp_lport_queue_ready(lport) && time_before(jiffies,
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index e55ed9cf23fb..628f347404f9 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -88,6 +88,7 @@
  */
 
 #include <linux/timer.h>
+#include <linux/delay.h>
 #include <linux/slab.h>
 #include <asm/unaligned.h>
 
@@ -1029,8 +1030,16 @@ static void fc_lport_enter_reset(struct fc_lport *lport)
 			   FCH_EVT_LIPRESET, 0);
 	fc_vports_linkchange(lport);
 	fc_lport_reset_locked(lport);
-	if (lport->link_up)
+	if (lport->link_up) {
+		/*
+		 * Wait upto resource allocation time out before
+		 * doing re-login since incomplete FIP exchanged
+		 * from last session may collide with exchanges
+		 * in new session.
+		 */
+		msleep(lport->r_a_tov);
 		fc_lport_enter_flogi(lport);
+	}
 }
 
 /**
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index f84084bba2f0..16ad97df5ba6 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -1721,7 +1721,7 @@ static int sas_find_bcast_dev(struct domain_device *dev,
 	list_for_each_entry(ch, &ex->children, siblings) {
 		if (ch->dev_type == EDGE_DEV || ch->dev_type == FANOUT_DEV) {
 			res = sas_find_bcast_dev(ch, src_dev);
-			if (src_dev)
+			if (*src_dev)
 				return res;
 		}
 	}
@@ -1769,10 +1769,12 @@ static void sas_unregister_devs_sas_addr(struct domain_device *parent,
 		sas_disable_routing(parent, phy->attached_sas_addr);
 	}
 	memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE);
-	sas_port_delete_phy(phy->port, phy->phy);
-	if (phy->port->num_phys == 0)
-		sas_port_delete(phy->port);
-	phy->port = NULL;
+	if (phy->port) {
+		sas_port_delete_phy(phy->port, phy->phy);
+		if (phy->port->num_phys == 0)
+			sas_port_delete(phy->port);
+		phy->port = NULL;
+	}
 }
 
 static int sas_discover_bfs_by_root_level(struct domain_device *root,
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 7836eb01c7fc..a31e05f3bfd4 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -1786,13 +1786,16 @@ qla24xx_vport_create(struct fc_vport *fc_vport, bool disable)
 			fc_vport_set_state(fc_vport, FC_VPORT_LINKDOWN);
 	}
 
-	if ((IS_QLA25XX(ha) || IS_QLA81XX(ha)) && ql2xenabledif) {
+	if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif) {
 		if (ha->fw_attributes & BIT_4) {
+			int prot = 0;
 			vha->flags.difdix_supported = 1;
 			ql_dbg(ql_dbg_user, vha, 0x7082,
 			    "Registered for DIF/DIX type 1 and 3 protection.\n");
+			if (ql2xenabledif == 1)
+				prot = SHOST_DIX_TYPE0_PROTECTION;
 			scsi_host_set_prot(vha->host,
-			    SHOST_DIF_TYPE1_PROTECTION
+			    prot | SHOST_DIF_TYPE1_PROTECTION
 			    | SHOST_DIF_TYPE2_PROTECTION
 			    | SHOST_DIF_TYPE3_PROTECTION
 			    | SHOST_DIX_TYPE1_PROTECTION
diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c
index 2155071f3100..d79cd8a5f831 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.c
+++ b/drivers/scsi/qla2xxx/qla_dbg.c
@@ -8,24 +8,24 @@
 /*
  * Table for showing the current message id in use for particular level
  * Change this table for addition of log/debug messages.
- * -----------------------------------------------------
- * |             Level            |   Last Value Used  |
- * -----------------------------------------------------
- * | Module Init and Probe        |       0x0116       |
- * | Mailbox commands             |       0x111e       |
- * | Device Discovery             |       0x2083       |
- * | Queue Command and IO tracing |       0x302e       |
- * | DPC Thread                   |       0x401c       |
- * | Async Events                 |       0x5059       |
- * | Timer Routines               |       0x600d       |
- * | User Space Interactions      |       0x709c       |
- * | Task Management              |       0x8043       |
- * | AER/EEH                      |       0x900f       |
- * | Virtual Port                 |       0xa007       |
- * | ISP82XX Specific             |       0xb027       |
- * | MultiQ                       |       0xc00b       |
- * | Misc                         |       0xd00b       |
- * -----------------------------------------------------
+ * ----------------------------------------------------------------------
+ * |             Level            |   Last Value Used  |     Holes	|
+ * ----------------------------------------------------------------------
+ * | Module Init and Probe        |       0x0116       |  		|
+ * | Mailbox commands             |       0x1126       |		|
+ * | Device Discovery             |       0x2083       |		|
+ * | Queue Command and IO tracing |       0x302e       |     0x3008     |
+ * | DPC Thread                   |       0x401c       |		|
+ * | Async Events                 |       0x5059       |		|
+ * | Timer Routines               |       0x600d       |		|
+ * | User Space Interactions      |       0x709d       |		|
+ * | Task Management              |       0x8041       |    		|
+ * | AER/EEH                      |       0x900f       |		|
+ * | Virtual Port                 |       0xa007       |		|
+ * | ISP82XX Specific             |       0xb04f       |    		|
+ * | MultiQ                       |       0xc00b       |		|
+ * | Misc                         |       0xd00b       |		|
+ * ----------------------------------------------------------------------
  */
 
 #include "qla_def.h"
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index cc5a79259d33..a03eaf40f377 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2529,6 +2529,7 @@ struct qla_hw_data {
 #define DT_ISP8021			BIT_14
 #define DT_ISP_LAST			(DT_ISP8021 << 1)
 
+#define DT_T10_PI                       BIT_25
 #define DT_IIDMA                        BIT_26
 #define DT_FWI2                         BIT_27
 #define DT_ZIO_SUPPORTED                BIT_28
@@ -2572,6 +2573,7 @@ struct qla_hw_data {
 #define IS_NOCACHE_VPD_TYPE(ha)	(IS_QLA81XX(ha))
 #define IS_ALOGIO_CAPABLE(ha)	(IS_QLA23XX(ha) || IS_FWI2_CAPABLE(ha))
 
+#define IS_T10_PI_CAPABLE(ha)   ((ha)->device_type & DT_T10_PI)
 #define IS_IIDMA_CAPABLE(ha)    ((ha)->device_type & DT_IIDMA)
 #define IS_FWI2_CAPABLE(ha)     ((ha)->device_type & DT_FWI2)
 #define IS_ZIO_SUPPORTED(ha)    ((ha)->device_type & DT_ZIO_SUPPORTED)
diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h
index 691783abfb69..aa69486dc064 100644
--- a/drivers/scsi/qla2xxx/qla_fw.h
+++ b/drivers/scsi/qla2xxx/qla_fw.h
@@ -537,6 +537,11 @@ struct sts_entry_24xx {
 	/*
 	 * If DIF Error is set in comp_status, these additional fields are
 	 * defined:
+	 *
+	 * !!! NOTE: Firmware sends expected/actual DIF data in big endian
+	 * format; but all of the "data" field gets swab32-d in the beginning
+	 * of qla2x00_status_entry().
+	 *
 	 * &data[10] : uint8_t report_runt_bg[2];	- computed guard
 	 * &data[12] : uint8_t actual_dif[8];		- DIF Data received
 	 * &data[20] : uint8_t expected_dif[8];		- DIF Data computed
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index def694271bf7..37da04d3db26 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -3838,15 +3838,12 @@ qla2x00_loop_resync(scsi_qla_host_t *vha)
 		req = vha->req;
 	rsp = req->rsp;
 
-	atomic_set(&vha->loop_state, LOOP_UPDATE);
 	clear_bit(ISP_ABORT_RETRY, &vha->dpc_flags);
 	if (vha->flags.online) {
 		if (!(rval = qla2x00_fw_ready(vha))) {
 			/* Wait at most MAX_TARGET RSCNs for a stable link. */
 			wait_time = 256;
 			do {
-				atomic_set(&vha->loop_state, LOOP_UPDATE);
-
 				/* Issue a marker after FW becomes ready. */
 				qla2x00_marker(vha, req, rsp, 0, 0,
 					MK_SYNC_ALL);
diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h
index d2e904bc21c0..9902834e0b74 100644
--- a/drivers/scsi/qla2xxx/qla_inline.h
+++ b/drivers/scsi/qla2xxx/qla_inline.h
@@ -102,3 +102,32 @@ qla2x00_set_fcport_state(fc_port_t *fcport, int state)
 		    fcport->d_id.b.al_pa);
 	}
 }
+
+static inline int
+qla2x00_hba_err_chk_enabled(srb_t *sp)
+{
+	/*
+	 * Uncomment when corresponding SCSI changes are done.
+	 *
+	if (!sp->cmd->prot_chk)
+		return 0;
+	 *
+	 */
+
+	switch (scsi_get_prot_op(sp->cmd)) {
+	case SCSI_PROT_READ_STRIP:
+	case SCSI_PROT_WRITE_INSERT:
+		if (ql2xenablehba_err_chk >= 1)
+			return 1;
+		break;
+	case SCSI_PROT_READ_PASS:
+	case SCSI_PROT_WRITE_PASS:
+		if (ql2xenablehba_err_chk >= 2)
+			return 1;
+		break;
+	case SCSI_PROT_READ_INSERT:
+	case SCSI_PROT_WRITE_STRIP:
+		return 1;
+	}
+	return 0;
+}
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index 49d6906af886..dbec89622a0f 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -709,20 +709,28 @@ struct fw_dif_context {
  *
  */
 static inline void
-qla24xx_set_t10dif_tags(struct scsi_cmnd *cmd, struct fw_dif_context *pkt,
+qla24xx_set_t10dif_tags(srb_t *sp, struct fw_dif_context *pkt,
     unsigned int protcnt)
 {
-	struct sd_dif_tuple *spt;
+	struct scsi_cmnd *cmd = sp->cmd;
 	scsi_qla_host_t *vha = shost_priv(cmd->device->host);
-	unsigned char op = scsi_get_prot_op(cmd);
 
 	switch (scsi_get_prot_type(cmd)) {
-	/* For TYPE 0 protection: no checking */
 	case SCSI_PROT_DIF_TYPE0:
-		pkt->ref_tag_mask[0] = 0x00;
-		pkt->ref_tag_mask[1] = 0x00;
-		pkt->ref_tag_mask[2] = 0x00;
-		pkt->ref_tag_mask[3] = 0x00;
+		/*
+		 * No check for ql2xenablehba_err_chk, as it would be an
+		 * I/O error if hba tag generation is not done.
+		 */
+		pkt->ref_tag = cpu_to_le32((uint32_t)
+		    (0xffffffff & scsi_get_lba(cmd)));
+
+		if (!qla2x00_hba_err_chk_enabled(sp))
+			break;
+
+		pkt->ref_tag_mask[0] = 0xff;
+		pkt->ref_tag_mask[1] = 0xff;
+		pkt->ref_tag_mask[2] = 0xff;
+		pkt->ref_tag_mask[3] = 0xff;
 		break;
 
 	/*
@@ -730,20 +738,16 @@ qla24xx_set_t10dif_tags(struct scsi_cmnd *cmd, struct fw_dif_context *pkt,
 	 * match LBA in CDB + N
 	 */
 	case SCSI_PROT_DIF_TYPE2:
-		if (!ql2xenablehba_err_chk)
-			break;
-
-		if (scsi_prot_sg_count(cmd)) {
-			spt = page_address(sg_page(scsi_prot_sglist(cmd))) +
-			    scsi_prot_sglist(cmd)[0].offset;
-			pkt->app_tag = swab32(spt->app_tag);
-			pkt->app_tag_mask[0] =  0xff;
-			pkt->app_tag_mask[1] =  0xff;
-		}
+		pkt->app_tag = __constant_cpu_to_le16(0);
+		pkt->app_tag_mask[0] = 0x0;
+		pkt->app_tag_mask[1] = 0x0;
 
 		pkt->ref_tag = cpu_to_le32((uint32_t)
 		    (0xffffffff & scsi_get_lba(cmd)));
 
+		if (!qla2x00_hba_err_chk_enabled(sp))
+			break;
+
 		/* enable ALL bytes of the ref tag */
 		pkt->ref_tag_mask[0] = 0xff;
 		pkt->ref_tag_mask[1] = 0xff;
@@ -763,26 +767,15 @@ qla24xx_set_t10dif_tags(struct scsi_cmnd *cmd, struct fw_dif_context *pkt,
 	 * 16 bit app tag.
 	 */
 	case SCSI_PROT_DIF_TYPE1:
-		if (!ql2xenablehba_err_chk)
+		pkt->ref_tag = cpu_to_le32((uint32_t)
+		    (0xffffffff & scsi_get_lba(cmd)));
+		pkt->app_tag = __constant_cpu_to_le16(0);
+		pkt->app_tag_mask[0] = 0x0;
+		pkt->app_tag_mask[1] = 0x0;
+
+		if (!qla2x00_hba_err_chk_enabled(sp))
 			break;
 
-		if (protcnt && (op == SCSI_PROT_WRITE_STRIP ||
-		    op == SCSI_PROT_WRITE_PASS)) {
-			spt = page_address(sg_page(scsi_prot_sglist(cmd))) +
-			    scsi_prot_sglist(cmd)[0].offset;
-			ql_dbg(ql_dbg_io, vha, 0x3008,
-			    "LBA from user %p, lba = 0x%x for cmd=%p.\n",
-			    spt, (int)spt->ref_tag, cmd);
-			pkt->ref_tag = swab32(spt->ref_tag);
-			pkt->app_tag_mask[0] = 0x0;
-			pkt->app_tag_mask[1] = 0x0;
-		} else {
-			pkt->ref_tag = cpu_to_le32((uint32_t)
-			    (0xffffffff & scsi_get_lba(cmd)));
-			pkt->app_tag = __constant_cpu_to_le16(0);
-			pkt->app_tag_mask[0] = 0x0;
-			pkt->app_tag_mask[1] = 0x0;
-		}
 		/* enable ALL bytes of the ref tag */
 		pkt->ref_tag_mask[0] = 0xff;
 		pkt->ref_tag_mask[1] = 0xff;
@@ -798,8 +791,162 @@ qla24xx_set_t10dif_tags(struct scsi_cmnd *cmd, struct fw_dif_context *pkt,
 	    scsi_get_prot_type(cmd), cmd);
 }
 
+struct qla2_sgx {
+	dma_addr_t		dma_addr;	/* OUT */
+	uint32_t		dma_len;	/* OUT */
+
+	uint32_t		tot_bytes;	/* IN */
+	struct scatterlist	*cur_sg;	/* IN */
+
+	/* for book keeping, bzero on initial invocation */
+	uint32_t		bytes_consumed;
+	uint32_t		num_bytes;
+	uint32_t		tot_partial;
+
+	/* for debugging */
+	uint32_t		num_sg;
+	srb_t			*sp;
+};
 
 static int
+qla24xx_get_one_block_sg(uint32_t blk_sz, struct qla2_sgx *sgx,
+	uint32_t *partial)
+{
+	struct scatterlist *sg;
+	uint32_t cumulative_partial, sg_len;
+	dma_addr_t sg_dma_addr;
+
+	if (sgx->num_bytes == sgx->tot_bytes)
+		return 0;
+
+	sg = sgx->cur_sg;
+	cumulative_partial = sgx->tot_partial;
+
+	sg_dma_addr = sg_dma_address(sg);
+	sg_len = sg_dma_len(sg);
+
+	sgx->dma_addr = sg_dma_addr + sgx->bytes_consumed;
+
+	if ((cumulative_partial + (sg_len - sgx->bytes_consumed)) >= blk_sz) {
+		sgx->dma_len = (blk_sz - cumulative_partial);
+		sgx->tot_partial = 0;
+		sgx->num_bytes += blk_sz;
+		*partial = 0;
+	} else {
+		sgx->dma_len = sg_len - sgx->bytes_consumed;
+		sgx->tot_partial += sgx->dma_len;
+		*partial = 1;
+	}
+
+	sgx->bytes_consumed += sgx->dma_len;
+
+	if (sg_len == sgx->bytes_consumed) {
+		sg = sg_next(sg);
+		sgx->num_sg++;
+		sgx->cur_sg = sg;
+		sgx->bytes_consumed = 0;
+	}
+
+	return 1;
+}
+
+static int
+qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *ha, srb_t *sp,
+	uint32_t *dsd, uint16_t tot_dsds)
+{
+	void *next_dsd;
+	uint8_t avail_dsds = 0;
+	uint32_t dsd_list_len;
+	struct dsd_dma *dsd_ptr;
+	struct scatterlist *sg_prot;
+	uint32_t *cur_dsd = dsd;
+	uint16_t	used_dsds = tot_dsds;
+
+	uint32_t	prot_int;
+	uint32_t	partial;
+	struct qla2_sgx sgx;
+	dma_addr_t	sle_dma;
+	uint32_t	sle_dma_len, tot_prot_dma_len = 0;
+	struct scsi_cmnd *cmd = sp->cmd;
+
+	prot_int = cmd->device->sector_size;
+
+	memset(&sgx, 0, sizeof(struct qla2_sgx));
+	sgx.tot_bytes = scsi_bufflen(sp->cmd);
+	sgx.cur_sg = scsi_sglist(sp->cmd);
+	sgx.sp = sp;
+
+	sg_prot = scsi_prot_sglist(sp->cmd);
+
+	while (qla24xx_get_one_block_sg(prot_int, &sgx, &partial)) {
+
+		sle_dma = sgx.dma_addr;
+		sle_dma_len = sgx.dma_len;
+alloc_and_fill:
+		/* Allocate additional continuation packets? */
+		if (avail_dsds == 0) {
+			avail_dsds = (used_dsds > QLA_DSDS_PER_IOCB) ?
+					QLA_DSDS_PER_IOCB : used_dsds;
+			dsd_list_len = (avail_dsds + 1) * 12;
+			used_dsds -= avail_dsds;
+
+			/* allocate tracking DS */
+			dsd_ptr = kzalloc(sizeof(struct dsd_dma), GFP_ATOMIC);
+			if (!dsd_ptr)
+				return 1;
+
+			/* allocate new list */
+			dsd_ptr->dsd_addr = next_dsd =
+			    dma_pool_alloc(ha->dl_dma_pool, GFP_ATOMIC,
+				&dsd_ptr->dsd_list_dma);
+
+			if (!next_dsd) {
+				/*
+				 * Need to cleanup only this dsd_ptr, rest
+				 * will be done by sp_free_dma()
+				 */
+				kfree(dsd_ptr);
+				return 1;
+			}
+
+			list_add_tail(&dsd_ptr->list,
+			    &((struct crc_context *)sp->ctx)->dsd_list);
+
+			sp->flags |= SRB_CRC_CTX_DSD_VALID;
+
+			/* add new list to cmd iocb or last list */
+			*cur_dsd++ = cpu_to_le32(LSD(dsd_ptr->dsd_list_dma));
+			*cur_dsd++ = cpu_to_le32(MSD(dsd_ptr->dsd_list_dma));
+			*cur_dsd++ = dsd_list_len;
+			cur_dsd = (uint32_t *)next_dsd;
+		}
+		*cur_dsd++ = cpu_to_le32(LSD(sle_dma));
+		*cur_dsd++ = cpu_to_le32(MSD(sle_dma));
+		*cur_dsd++ = cpu_to_le32(sle_dma_len);
+		avail_dsds--;
+
+		if (partial == 0) {
+			/* Got a full protection interval */
+			sle_dma = sg_dma_address(sg_prot) + tot_prot_dma_len;
+			sle_dma_len = 8;
+
+			tot_prot_dma_len += sle_dma_len;
+			if (tot_prot_dma_len == sg_dma_len(sg_prot)) {
+				tot_prot_dma_len = 0;
+				sg_prot = sg_next(sg_prot);
+			}
+
+			partial = 1; /* So as to not re-enter this block */
+			goto alloc_and_fill;
+		}
+	}
+	/* Null termination */
+	*cur_dsd++ = 0;
+	*cur_dsd++ = 0;
+	*cur_dsd++ = 0;
+	return 0;
+}
+static int
 qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd,
 	uint16_t tot_dsds)
 {
@@ -981,7 +1128,7 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt,
 	struct scsi_cmnd	*cmd;
 	struct scatterlist	*cur_seg;
 	int			sgc;
-	uint32_t		total_bytes;
+	uint32_t		total_bytes = 0;
 	uint32_t		data_bytes;
 	uint32_t		dif_bytes;
 	uint8_t			bundling = 1;
@@ -1023,8 +1170,10 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt,
 		    __constant_cpu_to_le16(CF_READ_DATA);
 	}
 
-	tot_prot_dsds = scsi_prot_sg_count(cmd);
-	if (!tot_prot_dsds)
+	if ((scsi_get_prot_op(sp->cmd) == SCSI_PROT_READ_INSERT) ||
+	    (scsi_get_prot_op(sp->cmd) == SCSI_PROT_WRITE_STRIP) ||
+	    (scsi_get_prot_op(sp->cmd) == SCSI_PROT_READ_STRIP) ||
+	    (scsi_get_prot_op(sp->cmd) == SCSI_PROT_WRITE_INSERT))
 		bundling = 0;
 
 	/* Allocate CRC context from global pool */
@@ -1047,7 +1196,7 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt,
 
 	INIT_LIST_HEAD(&crc_ctx_pkt->dsd_list);
 
-	qla24xx_set_t10dif_tags(cmd, (struct fw_dif_context *)
+	qla24xx_set_t10dif_tags(sp, (struct fw_dif_context *)
 	    &crc_ctx_pkt->ref_tag, tot_prot_dsds);
 
 	cmd_pkt->crc_context_address[0] = cpu_to_le32(LSD(crc_ctx_dma));
@@ -1076,7 +1225,6 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt,
 		fcp_cmnd->additional_cdb_len |= 2;
 
 	int_to_scsilun(sp->cmd->device->lun, &fcp_cmnd->lun);
-	host_to_fcp_swap((uint8_t *)&fcp_cmnd->lun, sizeof(fcp_cmnd->lun));
 	memcpy(fcp_cmnd->cdb, cmd->cmnd, cmd->cmd_len);
 	cmd_pkt->fcp_cmnd_dseg_len = cpu_to_le16(fcp_cmnd_len);
 	cmd_pkt->fcp_cmnd_dseg_address[0] = cpu_to_le32(
@@ -1107,15 +1255,28 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt,
 	cmd_pkt->fcp_rsp_dseg_len = 0; /* Let response come in status iocb */
 
 	/* Compute dif len and adjust data len to incude protection */
-	total_bytes = data_bytes;
 	dif_bytes = 0;
 	blk_size = cmd->device->sector_size;
-	if (scsi_get_prot_op(cmd) != SCSI_PROT_NORMAL) {
-		dif_bytes = (data_bytes / blk_size) * 8;
-		total_bytes += dif_bytes;
+	dif_bytes = (data_bytes / blk_size) * 8;
+
+	switch (scsi_get_prot_op(sp->cmd)) {
+	case SCSI_PROT_READ_INSERT:
+	case SCSI_PROT_WRITE_STRIP:
+	    total_bytes = data_bytes;
+	    data_bytes += dif_bytes;
+	    break;
+
+	case SCSI_PROT_READ_STRIP:
+	case SCSI_PROT_WRITE_INSERT:
+	case SCSI_PROT_READ_PASS:
+	case SCSI_PROT_WRITE_PASS:
+	    total_bytes = data_bytes + dif_bytes;
+	    break;
+	default:
+	    BUG();
 	}
 
-	if (!ql2xenablehba_err_chk)
+	if (!qla2x00_hba_err_chk_enabled(sp))
 		fw_prot_opts |= 0x10; /* Disable Guard tag checking */
 
 	if (!bundling) {
@@ -1151,7 +1312,12 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt,
 
 	cmd_pkt->control_flags |=
 	    __constant_cpu_to_le16(CF_DATA_SEG_DESCR_ENABLE);
-	if (qla24xx_walk_and_build_sglist(ha, sp, cur_dsd,
+
+	if (!bundling && tot_prot_dsds) {
+		if (qla24xx_walk_and_build_sglist_no_difb(ha, sp,
+		    cur_dsd, tot_dsds))
+			goto crc_queuing_error;
+	} else if (qla24xx_walk_and_build_sglist(ha, sp, cur_dsd,
 	    (tot_dsds - tot_prot_dsds)))
 		goto crc_queuing_error;
 
@@ -1414,6 +1580,22 @@ qla24xx_dif_start_scsi(srb_t *sp)
 			goto queuing_error;
 		else
 			sp->flags |= SRB_DMA_VALID;
+
+		if ((scsi_get_prot_op(cmd) == SCSI_PROT_READ_INSERT) ||
+		    (scsi_get_prot_op(cmd) == SCSI_PROT_WRITE_STRIP)) {
+			struct qla2_sgx sgx;
+			uint32_t	partial;
+
+			memset(&sgx, 0, sizeof(struct qla2_sgx));
+			sgx.tot_bytes = scsi_bufflen(cmd);
+			sgx.cur_sg = scsi_sglist(cmd);
+			sgx.sp = sp;
+
+			nseg = 0;
+			while (qla24xx_get_one_block_sg(
+			    cmd->device->sector_size, &sgx, &partial))
+				nseg++;
+		}
 	} else
 		nseg = 0;
 
@@ -1428,6 +1610,11 @@ qla24xx_dif_start_scsi(srb_t *sp)
 			goto queuing_error;
 		else
 			sp->flags |= SRB_CRC_PROT_DMA_VALID;
+
+		if ((scsi_get_prot_op(cmd) == SCSI_PROT_READ_INSERT) ||
+		    (scsi_get_prot_op(cmd) == SCSI_PROT_WRITE_STRIP)) {
+			nseg = scsi_bufflen(cmd) / cmd->device->sector_size;
+		}
 	} else {
 		nseg = 0;
 	}
@@ -1454,6 +1641,7 @@ qla24xx_dif_start_scsi(srb_t *sp)
 	/* Build header part of command packet (excluding the OPCODE). */
 	req->current_outstanding_cmd = handle;
 	req->outstanding_cmds[handle] = sp;
+	sp->handle = handle;
 	sp->cmd->host_scribble = (unsigned char *)(unsigned long)handle;
 	req->cnt -= req_cnt;
 
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index b16b7725dee0..8a7591f035e6 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -719,7 +719,6 @@ skip_rio:
 			vha->flags.rscn_queue_overflow = 1;
 		}
 
-		atomic_set(&vha->loop_state, LOOP_UPDATE);
 		atomic_set(&vha->loop_down_timer, 0);
 		vha->flags.management_server_logged_in = 0;
 
@@ -1435,25 +1434,27 @@ struct scsi_dif_tuple {
  * ASC/ASCQ fields in the sense buffer with ILLEGAL_REQUEST
  * to indicate to the kernel that the HBA detected error.
  */
-static inline void
+static inline int
 qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24)
 {
 	struct scsi_qla_host *vha = sp->fcport->vha;
 	struct scsi_cmnd *cmd = sp->cmd;
-	struct scsi_dif_tuple	*ep =
-			(struct scsi_dif_tuple *)&sts24->data[20];
-	struct scsi_dif_tuple	*ap =
-			(struct scsi_dif_tuple *)&sts24->data[12];
+	uint8_t		*ap = &sts24->data[12];
+	uint8_t		*ep = &sts24->data[20];
 	uint32_t	e_ref_tag, a_ref_tag;
 	uint16_t	e_app_tag, a_app_tag;
 	uint16_t	e_guard, a_guard;
 
-	e_ref_tag = be32_to_cpu(ep->ref_tag);
-	a_ref_tag = be32_to_cpu(ap->ref_tag);
-	e_app_tag = be16_to_cpu(ep->app_tag);
-	a_app_tag = be16_to_cpu(ap->app_tag);
-	e_guard = be16_to_cpu(ep->guard);
-	a_guard = be16_to_cpu(ap->guard);
+	/*
+	 * swab32 of the "data" field in the beginning of qla2x00_status_entry()
+	 * would make guard field appear at offset 2
+	 */
+	a_guard   = le16_to_cpu(*(uint16_t *)(ap + 2));
+	a_app_tag = le16_to_cpu(*(uint16_t *)(ap + 0));
+	a_ref_tag = le32_to_cpu(*(uint32_t *)(ap + 4));
+	e_guard   = le16_to_cpu(*(uint16_t *)(ep + 2));
+	e_app_tag = le16_to_cpu(*(uint16_t *)(ep + 0));
+	e_ref_tag = le32_to_cpu(*(uint32_t *)(ep + 4));
 
 	ql_dbg(ql_dbg_io, vha, 0x3023,
 	    "iocb(s) %p Returned STATUS.\n", sts24);
@@ -1465,6 +1466,63 @@ qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24)
 	    cmd->cmnd[0], (u64)scsi_get_lba(cmd), a_ref_tag, e_ref_tag,
 	    a_app_tag, e_app_tag, a_guard, e_guard);
 
+	/*
+	 * Ignore sector if:
+	 * For type     3: ref & app tag is all 'f's
+	 * For type 0,1,2: app tag is all 'f's
+	 */
+	if ((a_app_tag == 0xffff) &&
+	    ((scsi_get_prot_type(cmd) != SCSI_PROT_DIF_TYPE3) ||
+	     (a_ref_tag == 0xffffffff))) {
+		uint32_t blocks_done, resid;
+		sector_t lba_s = scsi_get_lba(cmd);
+
+		/* 2TB boundary case covered automatically with this */
+		blocks_done = e_ref_tag - (uint32_t)lba_s + 1;
+
+		resid = scsi_bufflen(cmd) - (blocks_done *
+		    cmd->device->sector_size);
+
+		scsi_set_resid(cmd, resid);
+		cmd->result = DID_OK << 16;
+
+		/* Update protection tag */
+		if (scsi_prot_sg_count(cmd)) {
+			uint32_t i, j = 0, k = 0, num_ent;
+			struct scatterlist *sg;
+			struct sd_dif_tuple *spt;
+
+			/* Patch the corresponding protection tags */
+			scsi_for_each_prot_sg(cmd, sg,
+			    scsi_prot_sg_count(cmd), i) {
+				num_ent = sg_dma_len(sg) / 8;
+				if (k + num_ent < blocks_done) {
+					k += num_ent;
+					continue;
+				}
+				j = blocks_done - k - 1;
+				k = blocks_done;
+				break;
+			}
+
+			if (k != blocks_done) {
+				qla_printk(KERN_WARNING, sp->fcport->vha->hw,
+				    "unexpected tag values tag:lba=%x:%llx)\n",
+				    e_ref_tag, (unsigned long long)lba_s);
+				return 1;
+			}
+
+			spt = page_address(sg_page(sg)) + sg->offset;
+			spt += j;
+
+			spt->app_tag = 0xffff;
+			if (scsi_get_prot_type(cmd) == SCSI_PROT_DIF_TYPE3)
+				spt->ref_tag = 0xffffffff;
+		}
+
+		return 0;
+	}
+
 	/* check guard */
 	if (e_guard != a_guard) {
 		scsi_build_sense_buffer(1, cmd->sense_buffer, ILLEGAL_REQUEST,
@@ -1472,28 +1530,30 @@ qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24)
 		set_driver_byte(cmd, DRIVER_SENSE);
 		set_host_byte(cmd, DID_ABORT);
 		cmd->result |= SAM_STAT_CHECK_CONDITION << 1;
-		return;
+		return 1;
 	}
 
-	/* check appl tag */
-	if (e_app_tag != a_app_tag) {
+	/* check ref tag */
+	if (e_ref_tag != a_ref_tag) {
 		scsi_build_sense_buffer(1, cmd->sense_buffer, ILLEGAL_REQUEST,
-		    0x10, 0x2);
+		    0x10, 0x3);
 		set_driver_byte(cmd, DRIVER_SENSE);
 		set_host_byte(cmd, DID_ABORT);
 		cmd->result |= SAM_STAT_CHECK_CONDITION << 1;
-		return;
+		return 1;
 	}
 
-	/* check ref tag */
-	if (e_ref_tag != a_ref_tag) {
+	/* check appl tag */
+	if (e_app_tag != a_app_tag) {
 		scsi_build_sense_buffer(1, cmd->sense_buffer, ILLEGAL_REQUEST,
-		    0x10, 0x3);
+		    0x10, 0x2);
 		set_driver_byte(cmd, DRIVER_SENSE);
 		set_host_byte(cmd, DID_ABORT);
 		cmd->result |= SAM_STAT_CHECK_CONDITION << 1;
-		return;
+		return 1;
 	}
+
+	return 1;
 }
 
 /**
@@ -1767,7 +1827,7 @@ check_scsi_status:
 		break;
 
 	case CS_DIF_ERROR:
-		qla2x00_handle_dif_error(sp, sts24);
+		logit = qla2x00_handle_dif_error(sp, sts24);
 		break;
 	default:
 		cp->result = DID_ERROR << 16;
@@ -2468,11 +2528,10 @@ qla2x00_request_irqs(struct qla_hw_data *ha, struct rsp_que *rsp)
 		goto skip_msi;
 	}
 
-	if (IS_QLA2432(ha) && (ha->pdev->revision < QLA_MSIX_CHIP_REV_24XX ||
-		!QLA_MSIX_FW_MODE_1(ha->fw_attributes))) {
+	if (IS_QLA2432(ha) && (ha->pdev->revision < QLA_MSIX_CHIP_REV_24XX)) {
 		ql_log(ql_log_warn, vha, 0x0035,
 		    "MSI-X; Unsupported ISP2432 (0x%X, 0x%X).\n",
-		    ha->pdev->revision, ha->fw_attributes);
+		    ha->pdev->revision, QLA_MSIX_CHIP_REV_24XX);
 		goto skip_msix;
 	}
 
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
index c706ed370000..f488cc69fc79 100644
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c
@@ -472,7 +472,7 @@ qla24xx_create_vhost(struct fc_vport *fc_vport)
 	host->can_queue = base_vha->req->length + 128;
 	host->this_id = 255;
 	host->cmd_per_lun = 3;
-	if ((IS_QLA25XX(ha) || IS_QLA81XX(ha)) && ql2xenabledif)
+	if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif)
 		host->max_cmd_len = 32;
 	else
 		host->max_cmd_len = MAX_CMDSZ;
diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c
index 5cbf33a50b14..049807cda419 100644
--- a/drivers/scsi/qla2xxx/qla_nx.c
+++ b/drivers/scsi/qla2xxx/qla_nx.c
@@ -2208,6 +2208,7 @@ qla82xx_msix_rsp_q(int irq, void *dev_id)
 	struct qla_hw_data *ha;
 	struct rsp_que *rsp;
 	struct device_reg_82xx __iomem *reg;
+	unsigned long flags;
 
 	rsp = (struct rsp_que *) dev_id;
 	if (!rsp) {
@@ -2218,11 +2219,11 @@ qla82xx_msix_rsp_q(int irq, void *dev_id)
 
 	ha = rsp->hw;
 	reg = &ha->iobase->isp82;
-	spin_lock_irq(&ha->hardware_lock);
+	spin_lock_irqsave(&ha->hardware_lock, flags);
 	vha = pci_get_drvdata(ha->pdev);
 	qla24xx_process_response_queue(vha, rsp);
 	WRT_REG_DWORD(&reg->host_int, 0);
-	spin_unlock_irq(&ha->hardware_lock);
+	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 	return IRQ_HANDLED;
 }
 
@@ -2838,6 +2839,16 @@ sufficient_dsds:
 		int_to_scsilun(sp->cmd->device->lun, &cmd_pkt->lun);
 		host_to_fcp_swap((uint8_t *)&cmd_pkt->lun, sizeof(cmd_pkt->lun));
 
+		/* build FCP_CMND IU */
+		memset(ctx->fcp_cmnd, 0, sizeof(struct fcp_cmnd));
+		int_to_scsilun(sp->cmd->device->lun, &ctx->fcp_cmnd->lun);
+		ctx->fcp_cmnd->additional_cdb_len = additional_cdb_len;
+
+		if (cmd->sc_data_direction == DMA_TO_DEVICE)
+			ctx->fcp_cmnd->additional_cdb_len |= 1;
+		else if (cmd->sc_data_direction == DMA_FROM_DEVICE)
+			ctx->fcp_cmnd->additional_cdb_len |= 2;
+
 		/*
 		 * Update tagged queuing modifier -- default is TSK_SIMPLE (0).
 		 */
@@ -2854,16 +2865,6 @@ sufficient_dsds:
 			}
 		}
 
-		/* build FCP_CMND IU */
-		memset(ctx->fcp_cmnd, 0, sizeof(struct fcp_cmnd));
-		int_to_scsilun(sp->cmd->device->lun, &ctx->fcp_cmnd->lun);
-		ctx->fcp_cmnd->additional_cdb_len = additional_cdb_len;
-
-		if (cmd->sc_data_direction == DMA_TO_DEVICE)
-			ctx->fcp_cmnd->additional_cdb_len |= 1;
-		else if (cmd->sc_data_direction == DMA_FROM_DEVICE)
-			ctx->fcp_cmnd->additional_cdb_len |= 2;
-
 		memcpy(ctx->fcp_cmnd->cdb, cmd->cmnd, cmd->cmd_len);
 
 		fcp_dl = (uint32_t *)(ctx->fcp_cmnd->cdb + 16 +
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index e02df276804e..1e69527f1e4e 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -106,17 +106,21 @@ MODULE_PARM_DESC(ql2xmaxqdepth,
 		"Maximum queue depth to report for target devices.");
 
 /* Do not change the value of this after module load */
-int ql2xenabledif = 1;
+int ql2xenabledif = 0;
 module_param(ql2xenabledif, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(ql2xenabledif,
 		" Enable T10-CRC-DIF "
-		" Default is 0 - No DIF Support. 1 - Enable it");
+		" Default is 0 - No DIF Support. 1 - Enable it"
+		", 2 - Enable DIF for all types, except Type 0.");
 
-int ql2xenablehba_err_chk;
+int ql2xenablehba_err_chk = 2;
 module_param(ql2xenablehba_err_chk, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(ql2xenablehba_err_chk,
-		" Enable T10-CRC-DIF Error isolation by HBA"
-		" Default is 0 - Error isolation disabled, 1 - Enable it");
+		" Enable T10-CRC-DIF Error isolation by HBA:\n"
+		" Default is 1.\n"
+		"  0 -- Error isolation disabled\n"
+		"  1 -- Error isolation enabled only for DIX Type 0\n"
+		"  2 -- Error isolation enabled for all Types\n");
 
 int ql2xiidmaenable=1;
 module_param(ql2xiidmaenable, int, S_IRUGO);
@@ -909,7 +913,14 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
 		    "Abort command mbx success.\n");
 		wait = 1;
 	}
+
+	spin_lock_irqsave(&ha->hardware_lock, flags);
 	qla2x00_sp_compl(ha, sp);
+	spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
+	/* Did the command return during mailbox execution? */
+	if (ret == FAILED && !CMD_SP(cmd))
+		ret = SUCCESS;
 
 	/* Wait for the command to be returned. */
 	if (wait) {
@@ -1317,10 +1328,9 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res)
 					qla2x00_sp_compl(ha, sp);
 				} else {
 					ctx = sp->ctx;
-					if (ctx->type == SRB_LOGIN_CMD ||
-					    ctx->type == SRB_LOGOUT_CMD) {
-						ctx->u.iocb_cmd->free(sp);
-					} else {
+					if (ctx->type == SRB_ELS_CMD_RPT ||
+					    ctx->type == SRB_ELS_CMD_HST ||
+					    ctx->type == SRB_CT_CMD) {
 						struct fc_bsg_job *bsg_job =
 						    ctx->u.bsg_job;
 						if (bsg_job->request->msgcode
@@ -1332,6 +1342,8 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res)
 						kfree(sp->ctx);
 						mempool_free(sp,
 							ha->srb_mempool);
+					} else {
+						ctx->u.iocb_cmd->free(sp);
 					}
 				}
 			}
@@ -2251,7 +2263,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	host->this_id = 255;
 	host->cmd_per_lun = 3;
 	host->unique_id = host->host_no;
-	if ((IS_QLA25XX(ha) || IS_QLA81XX(ha)) && ql2xenabledif)
+	if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif)
 		host->max_cmd_len = 32;
 	else
 		host->max_cmd_len = MAX_CMDSZ;
@@ -2378,13 +2390,16 @@ skip_dpc:
 	    "Detected hba at address=%p.\n",
 	    ha);
 
-	if ((IS_QLA25XX(ha) || IS_QLA81XX(ha)) && ql2xenabledif) {
+	if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif) {
 		if (ha->fw_attributes & BIT_4) {
+			int prot = 0;
 			base_vha->flags.difdix_supported = 1;
 			ql_dbg(ql_dbg_init, base_vha, 0x00f1,
 			    "Registering for DIF/DIX type 1 and 3 protection.\n");
+			if (ql2xenabledif == 1)
+				prot = SHOST_DIX_TYPE0_PROTECTION;
 			scsi_host_set_prot(host,
-			    SHOST_DIF_TYPE1_PROTECTION
+			    prot | SHOST_DIF_TYPE1_PROTECTION
 			    | SHOST_DIF_TYPE2_PROTECTION
 			    | SHOST_DIF_TYPE3_PROTECTION
 			    | SHOST_DIX_TYPE1_PROTECTION
diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h
index 062c97bf62f5..13b6357c1fa2 100644
--- a/drivers/scsi/qla2xxx/qla_version.h
+++ b/drivers/scsi/qla2xxx/qla_version.h
@@ -7,7 +7,7 @@
 /*
  * Driver version
  */
-#define QLA2XXX_VERSION      "8.03.07.03-k"
+#define QLA2XXX_VERSION      "8.03.07.07-k"
 
 #define QLA_DRIVER_MAJOR_VER	8
 #define QLA_DRIVER_MINOR_VER	3
diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c
index d2407558773f..24cacff57786 100644
--- a/drivers/spi/spi-fsl-spi.c
+++ b/drivers/spi/spi-fsl-spi.c
@@ -825,6 +825,9 @@ static void fsl_spi_cpm_free(struct mpc8xxx_spi *mspi)
 {
 	struct device *dev = mspi->dev;
 
+	if (!(mspi->flags & SPI_CPM_MODE))
+		return;
+
 	dma_unmap_single(dev, mspi->dma_dummy_rx, SPI_MRBLR, DMA_FROM_DEVICE);
 	dma_unmap_single(dev, mspi->dma_dummy_tx, PAGE_SIZE, DMA_TO_DEVICE);
 	cpm_muram_free(cpm_muram_offset(mspi->tx_bd));
diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index 8ac6542aedcd..fa594d604aca 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -786,9 +786,11 @@ static int __devinit spi_imx_probe(struct platform_device *pdev)
 		int cs_gpio = of_get_named_gpio(np, "cs-gpios", i);
 		if (cs_gpio < 0)
 			cs_gpio = mxc_platform_info->chipselect[i];
+
+		spi_imx->chipselect[i] = cs_gpio;
 		if (cs_gpio < 0)
 			continue;
-		spi_imx->chipselect[i] = cs_gpio;
+
 		ret = gpio_request(spi_imx->chipselect[i], DRIVER_NAME);
 		if (ret) {
 			while (i > 0) {
diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c
index 1d23f3831866..6a80749391db 100644
--- a/drivers/spi/spi-topcliff-pch.c
+++ b/drivers/spi/spi-topcliff-pch.c
@@ -50,6 +50,8 @@
 #define PCH_RX_THOLD		7
 #define PCH_RX_THOLD_MAX	15
 
+#define PCH_TX_THOLD		2
+
 #define PCH_MAX_BAUDRATE	5000000
 #define PCH_MAX_FIFO_DEPTH	16
 
@@ -58,6 +60,7 @@
 #define PCH_SLEEP_TIME		10
 
 #define SSN_LOW			0x02U
+#define SSN_HIGH		0x03U
 #define SSN_NO_CONTROL		0x00U
 #define PCH_MAX_CS		0xFF
 #define PCI_DEVICE_ID_GE_SPI	0x8816
@@ -316,16 +319,19 @@ static void pch_spi_handler_sub(struct pch_spi_data *data, u32 reg_spsr_val,
 
 	/* if transfer complete interrupt */
 	if (reg_spsr_val & SPSR_FI_BIT) {
-		if (tx_index < bpw_len)
+		if ((tx_index == bpw_len) && (rx_index == tx_index)) {
+			/* disable interrupts */
+			pch_spi_setclr_reg(data->master, PCH_SPCR, 0, PCH_ALL);
+
+			/* transfer is completed;
+			   inform pch_spi_process_messages */
+			data->transfer_complete = true;
+			data->transfer_active = false;
+			wake_up(&data->wait);
+		} else {
 			dev_err(&data->master->dev,
 				"%s : Transfer is not completed", __func__);
-		/* disable interrupts */
-		pch_spi_setclr_reg(data->master, PCH_SPCR, 0, PCH_ALL);
-
-		/* transfer is completed;inform pch_spi_process_messages */
-		data->transfer_complete = true;
-		data->transfer_active = false;
-		wake_up(&data->wait);
+		}
 	}
 }
 
@@ -348,16 +354,26 @@ static irqreturn_t pch_spi_handler(int irq, void *dev_id)
 			"%s returning due to suspend\n", __func__);
 		return IRQ_NONE;
 	}
-	if (data->use_dma)
-		return IRQ_NONE;
 
 	io_remap_addr = data->io_remap_addr;
 	spsr = io_remap_addr + PCH_SPSR;
 
 	reg_spsr_val = ioread32(spsr);
 
-	if (reg_spsr_val & SPSR_ORF_BIT)
-		dev_err(&board_dat->pdev->dev, "%s Over run error", __func__);
+	if (reg_spsr_val & SPSR_ORF_BIT) {
+		dev_err(&board_dat->pdev->dev, "%s Over run error\n", __func__);
+		if (data->current_msg->complete != 0) {
+			data->transfer_complete = true;
+			data->current_msg->status = -EIO;
+			data->current_msg->complete(data->current_msg->context);
+			data->bcurrent_msg_processing = false;
+			data->current_msg = NULL;
+			data->cur_trans = NULL;
+		}
+	}
+
+	if (data->use_dma)
+		return IRQ_NONE;
 
 	/* Check if the interrupt is for SPI device */
 	if (reg_spsr_val & (SPSR_FI_BIT | SPSR_RFI_BIT)) {
@@ -756,10 +772,6 @@ static void pch_spi_set_ir(struct pch_spi_data *data)
 
 	wait_event_interruptible(data->wait, data->transfer_complete);
 
-	pch_spi_writereg(data->master, PCH_SSNXCR, SSN_NO_CONTROL);
-	dev_dbg(&data->master->dev,
-		"%s:no more control over SSN-writing 0 to SSNXCR.", __func__);
-
 	/* clear all interrupts */
 	pch_spi_writereg(data->master, PCH_SPSR,
 			 pch_spi_readreg(data->master, PCH_SPSR));
@@ -815,10 +827,11 @@ static void pch_spi_copy_rx_data_for_dma(struct pch_spi_data *data, int bpw)
 	}
 }
 
-static void pch_spi_start_transfer(struct pch_spi_data *data)
+static int pch_spi_start_transfer(struct pch_spi_data *data)
 {
 	struct pch_spi_dma_ctrl *dma;
 	unsigned long flags;
+	int rtn;
 
 	dma = &data->dma;
 
@@ -833,19 +846,23 @@ static void pch_spi_start_transfer(struct pch_spi_data *data)
 				 initiating the transfer. */
 	dev_dbg(&data->master->dev,
 		"%s:waiting for transfer to get over\n", __func__);
-	wait_event_interruptible(data->wait, data->transfer_complete);
+	rtn = wait_event_interruptible_timeout(data->wait,
+					       data->transfer_complete,
+					       msecs_to_jiffies(2 * HZ));
 
 	dma_sync_sg_for_cpu(&data->master->dev, dma->sg_rx_p, dma->nent,
 			    DMA_FROM_DEVICE);
+
+	dma_sync_sg_for_cpu(&data->master->dev, dma->sg_tx_p, dma->nent,
+			    DMA_FROM_DEVICE);
+	memset(data->dma.tx_buf_virt, 0, PAGE_SIZE);
+
 	async_tx_ack(dma->desc_rx);
 	async_tx_ack(dma->desc_tx);
 	kfree(dma->sg_tx_p);
 	kfree(dma->sg_rx_p);
 
 	spin_lock_irqsave(&data->lock, flags);
-	pch_spi_writereg(data->master, PCH_SSNXCR, SSN_NO_CONTROL);
-	dev_dbg(&data->master->dev,
-		"%s:no more control over SSN-writing 0 to SSNXCR.", __func__);
 
 	/* clear fifo threshold, disable interrupts, disable SPI transfer */
 	pch_spi_setclr_reg(data->master, PCH_SPCR, 0,
@@ -858,6 +875,8 @@ static void pch_spi_start_transfer(struct pch_spi_data *data)
 	pch_spi_clear_fifo(data->master);
 
 	spin_unlock_irqrestore(&data->lock, flags);
+
+	return rtn;
 }
 
 static void pch_dma_rx_complete(void *arg)
@@ -1023,8 +1042,7 @@ static void pch_spi_handle_dma(struct pch_spi_data *data, int *bpw)
 	/* set receive fifo threshold and transmit fifo threshold */
 	pch_spi_setclr_reg(data->master, PCH_SPCR,
 			   ((size - 1) << SPCR_RFIC_FIELD) |
-			   ((PCH_MAX_FIFO_DEPTH - PCH_DMA_TRANS_SIZE) <<
-			    SPCR_TFIC_FIELD),
+			   (PCH_TX_THOLD << SPCR_TFIC_FIELD),
 			   MASK_RFIC_SPCR_BITS | MASK_TFIC_SPCR_BITS);
 
 	spin_unlock_irqrestore(&data->lock, flags);
@@ -1035,13 +1053,20 @@ static void pch_spi_handle_dma(struct pch_spi_data *data, int *bpw)
 	/* offset, length setting */
 	sg = dma->sg_rx_p;
 	for (i = 0; i < num; i++, sg++) {
-		if (i == 0) {
-			sg->offset = 0;
+		if (i == (num - 2)) {
+			sg->offset = size * i;
+			sg->offset = sg->offset * (*bpw / 8);
 			sg_set_page(sg, virt_to_page(dma->rx_buf_virt), rem,
 				    sg->offset);
 			sg_dma_len(sg) = rem;
+		} else if (i == (num - 1)) {
+			sg->offset = size * (i - 1) + rem;
+			sg->offset = sg->offset * (*bpw / 8);
+			sg_set_page(sg, virt_to_page(dma->rx_buf_virt), size,
+				    sg->offset);
+			sg_dma_len(sg) = size;
 		} else {
-			sg->offset = rem + size * (i - 1);
+			sg->offset = size * i;
 			sg->offset = sg->offset * (*bpw / 8);
 			sg_set_page(sg, virt_to_page(dma->rx_buf_virt), size,
 				    sg->offset);
@@ -1065,6 +1090,16 @@ static void pch_spi_handle_dma(struct pch_spi_data *data, int *bpw)
 	dma->desc_rx = desc_rx;
 
 	/* TX */
+	if (data->bpw_len > PCH_DMA_TRANS_SIZE) {
+		num = data->bpw_len / PCH_DMA_TRANS_SIZE;
+		size = PCH_DMA_TRANS_SIZE;
+		rem = 16;
+	} else {
+		num = 1;
+		size = data->bpw_len;
+		rem = data->bpw_len;
+	}
+
 	dma->sg_tx_p = kzalloc(sizeof(struct scatterlist)*num, GFP_ATOMIC);
 	sg_init_table(dma->sg_tx_p, num); /* Initialize SG table */
 	/* offset, length setting */
@@ -1162,6 +1197,7 @@ static void pch_spi_process_messages(struct work_struct *pwork)
 	if (data->use_dma)
 		pch_spi_request_dma(data,
 				    data->current_msg->spi->bits_per_word);
+	pch_spi_writereg(data->master, PCH_SSNXCR, SSN_NO_CONTROL);
 	do {
 		/* If we are already processing a message get the next
 		transfer structure from the message otherwise retrieve
@@ -1184,7 +1220,8 @@ static void pch_spi_process_messages(struct work_struct *pwork)
 
 		if (data->use_dma) {
 			pch_spi_handle_dma(data, &bpw);
-			pch_spi_start_transfer(data);
+			if (!pch_spi_start_transfer(data))
+				goto out;
 			pch_spi_copy_rx_data_for_dma(data, bpw);
 		} else {
 			pch_spi_set_tx(data, &bpw);
@@ -1222,6 +1259,8 @@ static void pch_spi_process_messages(struct work_struct *pwork)
 
 	} while (data->cur_trans != NULL);
 
+out:
+	pch_spi_writereg(data->master, PCH_SSNXCR, SSN_HIGH);
 	if (data->use_dma)
 		pch_spi_release_dma(data);
 }
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 06c9081d596d..2abfa28a522b 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -72,8 +72,6 @@ source "drivers/staging/phison/Kconfig"
 
 source "drivers/staging/line6/Kconfig"
 
-source "drivers/gpu/drm/vmwgfx/Kconfig"
-
 source "drivers/gpu/drm/nouveau/Kconfig"
 
 source "drivers/staging/octeon/Kconfig"
diff --git a/drivers/staging/comedi/drivers/ni_labpc.c b/drivers/staging/comedi/drivers/ni_labpc.c
index 6859af0778cf..7611def97d06 100644
--- a/drivers/staging/comedi/drivers/ni_labpc.c
+++ b/drivers/staging/comedi/drivers/ni_labpc.c
@@ -241,8 +241,10 @@ static int labpc_eeprom_write_insn(struct comedi_device *dev,
 				   struct comedi_insn *insn,
 				   unsigned int *data);
 static void labpc_adc_timing(struct comedi_device *dev, struct comedi_cmd *cmd);
-#ifdef CONFIG_COMEDI_PCI
+#ifdef CONFIG_ISA_DMA_API
 static unsigned int labpc_suggest_transfer_size(struct comedi_cmd cmd);
+#endif
+#ifdef CONFIG_COMEDI_PCI
 static int labpc_find_device(struct comedi_device *dev, int bus, int slot);
 #endif
 static int labpc_dio_mem_callback(int dir, int port, int data,
diff --git a/drivers/staging/octeon/ethernet-rx.c b/drivers/staging/octeon/ethernet-rx.c
index 1a7c19ae766f..8b307b428791 100644
--- a/drivers/staging/octeon/ethernet-rx.c
+++ b/drivers/staging/octeon/ethernet-rx.c
@@ -411,7 +411,8 @@ static int cvm_oct_napi_poll(struct napi_struct *napi, int budget)
 				skb->protocol = eth_type_trans(skb, dev);
 				skb->dev = dev;
 
-				if (unlikely(work->word2.s.not_IP || work->word2.s.IP_exc || work->word2.s.L4_error))
+				if (unlikely(work->word2.s.not_IP || work->word2.s.IP_exc ||
+					work->word2.s.L4_error || !work->word2.s.tcp_or_udp))
 					skb->ip_summed = CHECKSUM_NONE;
 				else
 					skb->ip_summed = CHECKSUM_UNNECESSARY;
diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c
index a3f5162bfedc..462fbc20561f 100644
--- a/drivers/staging/zcache/zcache-main.c
+++ b/drivers/staging/zcache/zcache-main.c
@@ -1242,7 +1242,7 @@ static int zcache_pampd_get_data_and_free(char *data, size_t *bufsize, bool raw,
 	int ret = 0;
 
 	BUG_ON(!is_ephemeral(pool));
-	zbud_decompress(virt_to_page(data), pampd);
+	zbud_decompress((struct page *)(data), pampd);
 	zbud_free_and_delist((struct zbud_hdr *)pampd);
 	atomic_dec(&zcache_curr_eph_pampd_count);
 	return ret;
diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c
index 497b2e718a76..5b773160200f 100644
--- a/drivers/target/iscsi/iscsi_target_parameters.c
+++ b/drivers/target/iscsi/iscsi_target_parameters.c
@@ -1430,7 +1430,7 @@ static int iscsi_enforce_integrity_rules(
 	u8 DataSequenceInOrder = 0;
 	u8 ErrorRecoveryLevel = 0, SessionType = 0;
 	u8 IFMarker = 0, OFMarker = 0;
-	u8 IFMarkInt_Reject = 0, OFMarkInt_Reject = 0;
+	u8 IFMarkInt_Reject = 1, OFMarkInt_Reject = 1;
 	u32 FirstBurstLength = 0, MaxBurstLength = 0;
 	struct iscsi_param *param = NULL;
 
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index a0d23bc0fc98..f00137f377b2 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -875,40 +875,6 @@ void iscsit_inc_session_usage_count(struct iscsi_session *sess)
 }
 
 /*
- *	Used before iscsi_do[rx,tx]_data() to determine iov and [rx,tx]_marker
- *	array counts needed for sync and steering.
- */
-static int iscsit_determine_sync_and_steering_counts(
-	struct iscsi_conn *conn,
-	struct iscsi_data_count *count)
-{
-	u32 length = count->data_length;
-	u32 marker, markint;
-
-	count->sync_and_steering = 1;
-
-	marker = (count->type == ISCSI_RX_DATA) ?
-			conn->of_marker : conn->if_marker;
-	markint = (count->type == ISCSI_RX_DATA) ?
-			(conn->conn_ops->OFMarkInt * 4) :
-			(conn->conn_ops->IFMarkInt * 4);
-	count->ss_iov_count = count->iov_count;
-
-	while (length > 0) {
-		if (length >= marker) {
-			count->ss_iov_count += 3;
-			count->ss_marker_count += 2;
-
-			length -= marker;
-			marker = markint;
-		} else
-			length = 0;
-	}
-
-	return 0;
-}
-
-/*
  *	Setup conn->if_marker and conn->of_marker values based upon
  *	the initial marker-less interval. (see iSCSI v19 A.2)
  */
@@ -1290,7 +1256,7 @@ int iscsit_fe_sendpage_sg(
 	struct kvec iov;
 	u32 tx_hdr_size, data_len;
 	u32 offset = cmd->first_data_sg_off;
-	int tx_sent;
+	int tx_sent, iov_off;
 
 send_hdr:
 	tx_hdr_size = ISCSI_HDR_LEN;
@@ -1310,9 +1276,19 @@ send_hdr:
 	}
 
 	data_len = cmd->tx_size - tx_hdr_size - cmd->padding;
-	if (conn->conn_ops->DataDigest)
+	/*
+	 * Set iov_off used by padding and data digest tx_data() calls below
+	 * in order to determine proper offset into cmd->iov_data[]
+	 */
+	if (conn->conn_ops->DataDigest) {
 		data_len -= ISCSI_CRC_LEN;
-
+		if (cmd->padding)
+			iov_off = (cmd->iov_data_count - 2);
+		else
+			iov_off = (cmd->iov_data_count - 1);
+	} else {
+		iov_off = (cmd->iov_data_count - 1);
+	}
 	/*
 	 * Perform sendpage() for each page in the scatterlist
 	 */
@@ -1341,8 +1317,7 @@ send_pg:
 
 send_padding:
 	if (cmd->padding) {
-		struct kvec *iov_p =
-			&cmd->iov_data[cmd->iov_data_count-1];
+		struct kvec *iov_p = &cmd->iov_data[iov_off++];
 
 		tx_sent = tx_data(conn, iov_p, 1, cmd->padding);
 		if (cmd->padding != tx_sent) {
@@ -1356,8 +1331,7 @@ send_padding:
 
 send_datacrc:
 	if (conn->conn_ops->DataDigest) {
-		struct kvec *iov_d =
-			&cmd->iov_data[cmd->iov_data_count];
+		struct kvec *iov_d = &cmd->iov_data[iov_off];
 
 		tx_sent = tx_data(conn, iov_d, 1, ISCSI_CRC_LEN);
 		if (ISCSI_CRC_LEN != tx_sent) {
@@ -1431,8 +1405,7 @@ static int iscsit_do_rx_data(
 	struct iscsi_data_count *count)
 {
 	int data = count->data_length, rx_loop = 0, total_rx = 0, iov_len;
-	u32 rx_marker_val[count->ss_marker_count], rx_marker_iov = 0;
-	struct kvec iov[count->ss_iov_count], *iov_p;
+	struct kvec *iov_p;
 	struct msghdr msg;
 
 	if (!conn || !conn->sock || !conn->conn_ops)
@@ -1440,93 +1413,8 @@ static int iscsit_do_rx_data(
 
 	memset(&msg, 0, sizeof(struct msghdr));
 
-	if (count->sync_and_steering) {
-		int size = 0;
-		u32 i, orig_iov_count = 0;
-		u32 orig_iov_len = 0, orig_iov_loc = 0;
-		u32 iov_count = 0, per_iov_bytes = 0;
-		u32 *rx_marker, old_rx_marker = 0;
-		struct kvec *iov_record;
-
-		memset(&rx_marker_val, 0,
-				count->ss_marker_count * sizeof(u32));
-		memset(&iov, 0, count->ss_iov_count * sizeof(struct kvec));
-
-		iov_record = count->iov;
-		orig_iov_count = count->iov_count;
-		rx_marker = &conn->of_marker;
-
-		i = 0;
-		size = data;
-		orig_iov_len = iov_record[orig_iov_loc].iov_len;
-		while (size > 0) {
-			pr_debug("rx_data: #1 orig_iov_len %u,"
-			" orig_iov_loc %u\n", orig_iov_len, orig_iov_loc);
-			pr_debug("rx_data: #2 rx_marker %u, size"
-				" %u\n", *rx_marker, size);
-
-			if (orig_iov_len >= *rx_marker) {
-				iov[iov_count].iov_len = *rx_marker;
-				iov[iov_count++].iov_base =
-					(iov_record[orig_iov_loc].iov_base +
-						per_iov_bytes);
-
-				iov[iov_count].iov_len = (MARKER_SIZE / 2);
-				iov[iov_count++].iov_base =
-					&rx_marker_val[rx_marker_iov++];
-				iov[iov_count].iov_len = (MARKER_SIZE / 2);
-				iov[iov_count++].iov_base =
-					&rx_marker_val[rx_marker_iov++];
-				old_rx_marker = *rx_marker;
-
-				/*
-				 * OFMarkInt is in 32-bit words.
-				 */
-				*rx_marker = (conn->conn_ops->OFMarkInt * 4);
-				size -= old_rx_marker;
-				orig_iov_len -= old_rx_marker;
-				per_iov_bytes += old_rx_marker;
-
-				pr_debug("rx_data: #3 new_rx_marker"
-					" %u, size %u\n", *rx_marker, size);
-			} else {
-				iov[iov_count].iov_len = orig_iov_len;
-				iov[iov_count++].iov_base =
-					(iov_record[orig_iov_loc].iov_base +
-						per_iov_bytes);
-
-				per_iov_bytes = 0;
-				*rx_marker -= orig_iov_len;
-				size -= orig_iov_len;
-
-				if (size)
-					orig_iov_len =
-					iov_record[++orig_iov_loc].iov_len;
-
-				pr_debug("rx_data: #4 new_rx_marker"
-					" %u, size %u\n", *rx_marker, size);
-			}
-		}
-		data += (rx_marker_iov * (MARKER_SIZE / 2));
-
-		iov_p	= &iov[0];
-		iov_len	= iov_count;
-
-		if (iov_count > count->ss_iov_count) {
-			pr_err("iov_count: %d, count->ss_iov_count:"
-				" %d\n", iov_count, count->ss_iov_count);
-			return -1;
-		}
-		if (rx_marker_iov > count->ss_marker_count) {
-			pr_err("rx_marker_iov: %d, count->ss_marker"
-				"_count: %d\n", rx_marker_iov,
-				count->ss_marker_count);
-			return -1;
-		}
-	} else {
-		iov_p = count->iov;
-		iov_len	= count->iov_count;
-	}
+	iov_p = count->iov;
+	iov_len	= count->iov_count;
 
 	while (total_rx < data) {
 		rx_loop = kernel_recvmsg(conn->sock, &msg, iov_p, iov_len,
@@ -1541,16 +1429,6 @@ static int iscsit_do_rx_data(
 				rx_loop, total_rx, data);
 	}
 
-	if (count->sync_and_steering) {
-		int j;
-		for (j = 0; j < rx_marker_iov; j++) {
-			pr_debug("rx_data: #5 j: %d, offset: %d\n",
-				j, rx_marker_val[j]);
-			conn->of_marker_offset = rx_marker_val[j];
-		}
-		total_rx -= (rx_marker_iov * (MARKER_SIZE / 2));
-	}
-
 	return total_rx;
 }
 
@@ -1559,8 +1437,7 @@ static int iscsit_do_tx_data(
 	struct iscsi_data_count *count)
 {
 	int data = count->data_length, total_tx = 0, tx_loop = 0, iov_len;
-	u32 tx_marker_val[count->ss_marker_count], tx_marker_iov = 0;
-	struct kvec iov[count->ss_iov_count], *iov_p;
+	struct kvec *iov_p;
 	struct msghdr msg;
 
 	if (!conn || !conn->sock || !conn->conn_ops)
@@ -1573,98 +1450,8 @@ static int iscsit_do_tx_data(
 
 	memset(&msg, 0, sizeof(struct msghdr));
 
-	if (count->sync_and_steering) {
-		int size = 0;
-		u32 i, orig_iov_count = 0;
-		u32 orig_iov_len = 0, orig_iov_loc = 0;
-		u32 iov_count = 0, per_iov_bytes = 0;
-		u32 *tx_marker, old_tx_marker = 0;
-		struct kvec *iov_record;
-
-		memset(&tx_marker_val, 0,
-			count->ss_marker_count * sizeof(u32));
-		memset(&iov, 0, count->ss_iov_count * sizeof(struct kvec));
-
-		iov_record = count->iov;
-		orig_iov_count = count->iov_count;
-		tx_marker = &conn->if_marker;
-
-		i = 0;
-		size = data;
-		orig_iov_len = iov_record[orig_iov_loc].iov_len;
-		while (size > 0) {
-			pr_debug("tx_data: #1 orig_iov_len %u,"
-			" orig_iov_loc %u\n", orig_iov_len, orig_iov_loc);
-			pr_debug("tx_data: #2 tx_marker %u, size"
-				" %u\n", *tx_marker, size);
-
-			if (orig_iov_len >= *tx_marker) {
-				iov[iov_count].iov_len = *tx_marker;
-				iov[iov_count++].iov_base =
-					(iov_record[orig_iov_loc].iov_base +
-						per_iov_bytes);
-
-				tx_marker_val[tx_marker_iov] =
-						(size - *tx_marker);
-				iov[iov_count].iov_len = (MARKER_SIZE / 2);
-				iov[iov_count++].iov_base =
-					&tx_marker_val[tx_marker_iov++];
-				iov[iov_count].iov_len = (MARKER_SIZE / 2);
-				iov[iov_count++].iov_base =
-					&tx_marker_val[tx_marker_iov++];
-				old_tx_marker = *tx_marker;
-
-				/*
-				 * IFMarkInt is in 32-bit words.
-				 */
-				*tx_marker = (conn->conn_ops->IFMarkInt * 4);
-				size -= old_tx_marker;
-				orig_iov_len -= old_tx_marker;
-				per_iov_bytes += old_tx_marker;
-
-				pr_debug("tx_data: #3 new_tx_marker"
-					" %u, size %u\n", *tx_marker, size);
-				pr_debug("tx_data: #4 offset %u\n",
-					tx_marker_val[tx_marker_iov-1]);
-			} else {
-				iov[iov_count].iov_len = orig_iov_len;
-				iov[iov_count++].iov_base
-					= (iov_record[orig_iov_loc].iov_base +
-						per_iov_bytes);
-
-				per_iov_bytes = 0;
-				*tx_marker -= orig_iov_len;
-				size -= orig_iov_len;
-
-				if (size)
-					orig_iov_len =
-					iov_record[++orig_iov_loc].iov_len;
-
-				pr_debug("tx_data: #5 new_tx_marker"
-					" %u, size %u\n", *tx_marker, size);
-			}
-		}
-
-		data += (tx_marker_iov * (MARKER_SIZE / 2));
-
-		iov_p = &iov[0];
-		iov_len = iov_count;
-
-		if (iov_count > count->ss_iov_count) {
-			pr_err("iov_count: %d, count->ss_iov_count:"
-				" %d\n", iov_count, count->ss_iov_count);
-			return -1;
-		}
-		if (tx_marker_iov > count->ss_marker_count) {
-			pr_err("tx_marker_iov: %d, count->ss_marker"
-				"_count: %d\n", tx_marker_iov,
-				count->ss_marker_count);
-			return -1;
-		}
-	} else {
-		iov_p = count->iov;
-		iov_len = count->iov_count;
-	}
+	iov_p = count->iov;
+	iov_len = count->iov_count;
 
 	while (total_tx < data) {
 		tx_loop = kernel_sendmsg(conn->sock, &msg, iov_p, iov_len,
@@ -1679,9 +1466,6 @@ static int iscsit_do_tx_data(
 					tx_loop, total_tx, data);
 	}
 
-	if (count->sync_and_steering)
-		total_tx -= (tx_marker_iov * (MARKER_SIZE / 2));
-
 	return total_tx;
 }
 
@@ -1702,12 +1486,6 @@ int rx_data(
 	c.data_length = data;
 	c.type = ISCSI_RX_DATA;
 
-	if (conn->conn_ops->OFMarker &&
-	   (conn->conn_state >= TARG_CONN_STATE_LOGGED_IN)) {
-		if (iscsit_determine_sync_and_steering_counts(conn, &c) < 0)
-			return -1;
-	}
-
 	return iscsit_do_rx_data(conn, &c);
 }
 
@@ -1728,12 +1506,6 @@ int tx_data(
 	c.data_length = data;
 	c.type = ISCSI_TX_DATA;
 
-	if (conn->conn_ops->IFMarker &&
-	   (conn->conn_state >= TARG_CONN_STATE_LOGGED_IN)) {
-		if (iscsit_determine_sync_and_steering_counts(conn, &c) < 0)
-			return -1;
-	}
-
 	return iscsit_do_tx_data(conn, &c);
 }
 
diff --git a/drivers/target/target_core_cdb.c b/drivers/target/target_core_cdb.c
index 89ae923c5da6..f04d4ef99dca 100644
--- a/drivers/target/target_core_cdb.c
+++ b/drivers/target/target_core_cdb.c
@@ -24,6 +24,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/ctype.h>
 #include <asm/unaligned.h>
 #include <scsi/scsi.h>
 
@@ -154,6 +155,37 @@ target_emulate_evpd_80(struct se_cmd *cmd, unsigned char *buf)
 	return 0;
 }
 
+static void
+target_parse_naa_6h_vendor_specific(struct se_device *dev, unsigned char *buf_off)
+{
+	unsigned char *p = &dev->se_sub_dev->t10_wwn.unit_serial[0];
+	unsigned char *buf = buf_off;
+	int cnt = 0, next = 1;
+	/*
+	 * Generate up to 36 bits of VENDOR SPECIFIC IDENTIFIER starting on
+	 * byte 3 bit 3-0 for NAA IEEE Registered Extended DESIGNATOR field
+	 * format, followed by 64 bits of VENDOR SPECIFIC IDENTIFIER EXTENSION
+	 * to complete the payload.  These are based from VPD=0x80 PRODUCT SERIAL
+	 * NUMBER set via vpd_unit_serial in target_core_configfs.c to ensure
+	 * per device uniqeness.
+	 */
+	while (*p != '\0') {
+		if (cnt >= 13)
+			break;
+		if (!isxdigit(*p)) {
+			p++;
+			continue;
+		}
+		if (next != 0) {
+			buf[cnt++] |= hex_to_bin(*p++);
+			next = 0;
+		} else {
+			buf[cnt] = hex_to_bin(*p++) << 4;
+			next = 1;
+		}
+	}
+}
+
 /*
  * Device identification VPD, for a complete list of
  * DESIGNATOR TYPEs see spc4r17 Table 459.
@@ -219,8 +251,7 @@ target_emulate_evpd_83(struct se_cmd *cmd, unsigned char *buf)
 	 * VENDOR_SPECIFIC_IDENTIFIER and
 	 * VENDOR_SPECIFIC_IDENTIFIER_EXTENTION
 	 */
-	buf[off++] |= hex_to_bin(dev->se_sub_dev->t10_wwn.unit_serial[0]);
-	hex2bin(&buf[off], &dev->se_sub_dev->t10_wwn.unit_serial[1], 12);
+	target_parse_naa_6h_vendor_specific(dev, &buf[off]);
 
 	len = 20;
 	off = (len + 4);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 8d0c58ea6316..a4b0a8d27f25 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -977,15 +977,17 @@ static void target_qf_do_work(struct work_struct *work)
 {
 	struct se_device *dev = container_of(work, struct se_device,
 					qf_work_queue);
+	LIST_HEAD(qf_cmd_list);
 	struct se_cmd *cmd, *cmd_tmp;
 
 	spin_lock_irq(&dev->qf_cmd_lock);
-	list_for_each_entry_safe(cmd, cmd_tmp, &dev->qf_cmd_list, se_qf_node) {
+	list_splice_init(&dev->qf_cmd_list, &qf_cmd_list);
+	spin_unlock_irq(&dev->qf_cmd_lock);
 
+	list_for_each_entry_safe(cmd, cmd_tmp, &qf_cmd_list, se_qf_node) {
 		list_del(&cmd->se_qf_node);
 		atomic_dec(&dev->dev_qf_count);
 		smp_mb__after_atomic_dec();
-		spin_unlock_irq(&dev->qf_cmd_lock);
 
 		pr_debug("Processing %s cmd: %p QUEUE_FULL in work queue"
 			" context: %s\n", cmd->se_tfo->get_fabric_name(), cmd,
@@ -997,10 +999,7 @@ static void target_qf_do_work(struct work_struct *work)
 		 * has been added to head of queue
 		 */
 		transport_add_cmd_to_queue(cmd, cmd->t_state);
-
-		spin_lock_irq(&dev->qf_cmd_lock);
 	}
-	spin_unlock_irq(&dev->qf_cmd_lock);
 }
 
 unsigned char *transport_dump_cmd_direction(struct se_cmd *cmd)
diff --git a/drivers/target/tcm_fc/tcm_fc.h b/drivers/target/tcm_fc/tcm_fc.h
index bd4fe21a23b8..3749d8b4b423 100644
--- a/drivers/target/tcm_fc/tcm_fc.h
+++ b/drivers/target/tcm_fc/tcm_fc.h
@@ -98,8 +98,7 @@ struct ft_tpg {
 	struct list_head list;		/* linkage in ft_lport_acl tpg_list */
 	struct list_head lun_list;	/* head of LUNs */
 	struct se_portal_group se_tpg;
-	struct task_struct *thread;	/* processing thread */
-	struct se_queue_obj qobj;	/* queue for processing thread */
+	struct workqueue_struct *workqueue;
 };
 
 struct ft_lport_acl {
@@ -110,16 +109,10 @@ struct ft_lport_acl {
 	struct se_wwn fc_lport_wwn;
 };
 
-enum ft_cmd_state {
-	FC_CMD_ST_NEW = 0,
-	FC_CMD_ST_REJ
-};
-
 /*
  * Commands
  */
 struct ft_cmd {
-	enum ft_cmd_state state;
 	u32 lun;                        /* LUN from request */
 	struct ft_sess *sess;		/* session held for cmd */
 	struct fc_seq *seq;		/* sequence in exchange mgr */
@@ -127,7 +120,7 @@ struct ft_cmd {
 	struct fc_frame *req_frame;
 	unsigned char *cdb;		/* pointer to CDB inside frame */
 	u32 write_data_len;		/* data received on writes */
-	struct se_queue_req se_req;
+	struct work_struct work;
 	/* Local sense buffer */
 	unsigned char ft_sense_buffer[TRANSPORT_SENSE_BUFFER];
 	u32 was_ddp_setup:1;		/* Set only if ddp is setup */
@@ -177,7 +170,6 @@ int ft_is_state_remove(struct se_cmd *);
 /*
  * other internal functions.
  */
-int ft_thread(void *);
 void ft_recv_req(struct ft_sess *, struct fc_frame *);
 struct ft_tpg *ft_lport_find_tpg(struct fc_lport *);
 struct ft_node_acl *ft_acl_get(struct ft_tpg *, struct fc_rport_priv *);
diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c
index 5654dc22f7ae..80fbcde00cb6 100644
--- a/drivers/target/tcm_fc/tfc_cmd.c
+++ b/drivers/target/tcm_fc/tfc_cmd.c
@@ -62,8 +62,8 @@ void ft_dump_cmd(struct ft_cmd *cmd, const char *caller)
 	int count;
 
 	se_cmd = &cmd->se_cmd;
-	pr_debug("%s: cmd %p state %d sess %p seq %p se_cmd %p\n",
-		caller, cmd, cmd->state, cmd->sess, cmd->seq, se_cmd);
+	pr_debug("%s: cmd %p sess %p seq %p se_cmd %p\n",
+		caller, cmd, cmd->sess, cmd->seq, se_cmd);
 	pr_debug("%s: cmd %p cdb %p\n",
 		caller, cmd, cmd->cdb);
 	pr_debug("%s: cmd %p lun %d\n", caller, cmd, cmd->lun);
@@ -90,38 +90,6 @@ void ft_dump_cmd(struct ft_cmd *cmd, const char *caller)
 		16, 4, cmd->cdb, MAX_COMMAND_SIZE, 0);
 }
 
-static void ft_queue_cmd(struct ft_sess *sess, struct ft_cmd *cmd)
-{
-	struct ft_tpg *tpg = sess->tport->tpg;
-	struct se_queue_obj *qobj = &tpg->qobj;
-	unsigned long flags;
-
-	qobj = &sess->tport->tpg->qobj;
-	spin_lock_irqsave(&qobj->cmd_queue_lock, flags);
-	list_add_tail(&cmd->se_req.qr_list, &qobj->qobj_list);
-	atomic_inc(&qobj->queue_cnt);
-	spin_unlock_irqrestore(&qobj->cmd_queue_lock, flags);
-
-	wake_up_process(tpg->thread);
-}
-
-static struct ft_cmd *ft_dequeue_cmd(struct se_queue_obj *qobj)
-{
-	unsigned long flags;
-	struct se_queue_req *qr;
-
-	spin_lock_irqsave(&qobj->cmd_queue_lock, flags);
-	if (list_empty(&qobj->qobj_list)) {
-		spin_unlock_irqrestore(&qobj->cmd_queue_lock, flags);
-		return NULL;
-	}
-	qr = list_first_entry(&qobj->qobj_list, struct se_queue_req, qr_list);
-	list_del(&qr->qr_list);
-	atomic_dec(&qobj->queue_cnt);
-	spin_unlock_irqrestore(&qobj->cmd_queue_lock, flags);
-	return container_of(qr, struct ft_cmd, se_req);
-}
-
 static void ft_free_cmd(struct ft_cmd *cmd)
 {
 	struct fc_frame *fp;
@@ -282,9 +250,7 @@ u32 ft_get_task_tag(struct se_cmd *se_cmd)
 
 int ft_get_cmd_state(struct se_cmd *se_cmd)
 {
-	struct ft_cmd *cmd = container_of(se_cmd, struct ft_cmd, se_cmd);
-
-	return cmd->state;
+	return 0;
 }
 
 int ft_is_state_remove(struct se_cmd *se_cmd)
@@ -505,6 +471,8 @@ int ft_queue_tm_resp(struct se_cmd *se_cmd)
 	return 0;
 }
 
+static void ft_send_work(struct work_struct *work);
+
 /*
  * Handle incoming FCP command.
  */
@@ -523,7 +491,9 @@ static void ft_recv_cmd(struct ft_sess *sess, struct fc_frame *fp)
 		goto busy;
 	}
 	cmd->req_frame = fp;		/* hold frame during cmd */
-	ft_queue_cmd(sess, cmd);
+
+	INIT_WORK(&cmd->work, ft_send_work);
+	queue_work(sess->tport->tpg->workqueue, &cmd->work);
 	return;
 
 busy:
@@ -563,12 +533,13 @@ void ft_recv_req(struct ft_sess *sess, struct fc_frame *fp)
 /*
  * Send new command to target.
  */
-static void ft_send_cmd(struct ft_cmd *cmd)
+static void ft_send_work(struct work_struct *work)
 {
+	struct ft_cmd *cmd = container_of(work, struct ft_cmd, work);
 	struct fc_frame_header *fh = fc_frame_header_get(cmd->req_frame);
 	struct se_cmd *se_cmd;
 	struct fcp_cmnd *fcp;
-	int data_dir;
+	int data_dir = 0;
 	u32 data_len;
 	int task_attr;
 	int ret;
@@ -675,42 +646,3 @@ static void ft_send_cmd(struct ft_cmd *cmd)
 err:
 	ft_send_resp_code_and_free(cmd, FCP_CMND_FIELDS_INVALID);
 }
-
-/*
- * Handle request in the command thread.
- */
-static void ft_exec_req(struct ft_cmd *cmd)
-{
-	pr_debug("cmd state %x\n", cmd->state);
-	switch (cmd->state) {
-	case FC_CMD_ST_NEW:
-		ft_send_cmd(cmd);
-		break;
-	default:
-		break;
-	}
-}
-
-/*
- * Processing thread.
- * Currently one thread per tpg.
- */
-int ft_thread(void *arg)
-{
-	struct ft_tpg *tpg = arg;
-	struct se_queue_obj *qobj = &tpg->qobj;
-	struct ft_cmd *cmd;
-
-	while (!kthread_should_stop()) {
-		schedule_timeout_interruptible(MAX_SCHEDULE_TIMEOUT);
-		if (kthread_should_stop())
-			goto out;
-
-		cmd = ft_dequeue_cmd(qobj);
-		if (cmd)
-			ft_exec_req(cmd);
-	}
-
-out:
-	return 0;
-}
diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c
index b15879d43e22..8fa39b74f22c 100644
--- a/drivers/target/tcm_fc/tfc_conf.c
+++ b/drivers/target/tcm_fc/tfc_conf.c
@@ -327,7 +327,6 @@ static struct se_portal_group *ft_add_tpg(
 	tpg->index = index;
 	tpg->lport_acl = lacl;
 	INIT_LIST_HEAD(&tpg->lun_list);
-	transport_init_queue_obj(&tpg->qobj);
 
 	ret = core_tpg_register(&ft_configfs->tf_ops, wwn, &tpg->se_tpg,
 				tpg, TRANSPORT_TPG_TYPE_NORMAL);
@@ -336,8 +335,8 @@ static struct se_portal_group *ft_add_tpg(
 		return NULL;
 	}
 
-	tpg->thread = kthread_run(ft_thread, tpg, "ft_tpg%lu", index);
-	if (IS_ERR(tpg->thread)) {
+	tpg->workqueue = alloc_workqueue("tcm_fc", 0, 1);
+	if (!tpg->workqueue) {
 		kfree(tpg);
 		return NULL;
 	}
@@ -356,7 +355,7 @@ static void ft_del_tpg(struct se_portal_group *se_tpg)
 	pr_debug("del tpg %s\n",
 		    config_item_name(&tpg->se_tpg.tpg_group.cg_item));
 
-	kthread_stop(tpg->thread);
+	destroy_workqueue(tpg->workqueue);
 
 	/* Wait for sessions to be freed thru RCU, for BUG_ON below */
 	synchronize_rcu();
diff --git a/drivers/target/tcm_fc/tfc_io.c b/drivers/target/tcm_fc/tfc_io.c
index c37f4cd96452..d35ea5a3d56c 100644
--- a/drivers/target/tcm_fc/tfc_io.c
+++ b/drivers/target/tcm_fc/tfc_io.c
@@ -219,43 +219,41 @@ void ft_recv_write_data(struct ft_cmd *cmd, struct fc_frame *fp)
 	if (cmd->was_ddp_setup) {
 		BUG_ON(!ep);
 		BUG_ON(!lport);
-	}
-
-	/*
-	 * Doesn't expect payload if DDP is setup. Payload
-	 * is expected to be copied directly to user buffers
-	 * due to DDP (Large Rx offload),
-	 */
-	buf = fc_frame_payload_get(fp, 1);
-	if (buf)
-		pr_err("%s: xid 0x%x, f_ctl 0x%x, cmd->sg %p, "
+		/*
+		 * Since DDP (Large Rx offload) was setup for this request,
+		 * payload is expected to be copied directly to user buffers.
+		 */
+		buf = fc_frame_payload_get(fp, 1);
+		if (buf)
+			pr_err("%s: xid 0x%x, f_ctl 0x%x, cmd->sg %p, "
 				"cmd->sg_cnt 0x%x. DDP was setup"
 				" hence not expected to receive frame with "
-				"payload, Frame will be dropped if "
-				"'Sequence Initiative' bit in f_ctl is "
+				"payload, Frame will be dropped if"
+				"'Sequence Initiative' bit in f_ctl is"
 				"not set\n", __func__, ep->xid, f_ctl,
 				cmd->sg, cmd->sg_cnt);
-	/*
- 	 * Invalidate HW DDP context if it was setup for respective
- 	 * command. Invalidation of HW DDP context is requited in both
- 	 * situation (success and error). 
- 	 */
-	ft_invl_hw_context(cmd);
+		/*
+		 * Invalidate HW DDP context if it was setup for respective
+		 * command. Invalidation of HW DDP context is requited in both
+		 * situation (success and error).
+		 */
+		ft_invl_hw_context(cmd);
 
-	/*
-	 * If "Sequence Initiative (TSI)" bit set in f_ctl, means last
-	 * write data frame is received successfully where payload is
-	 * posted directly to user buffer and only the last frame's
-	 * header is posted in receive queue.
-	 *
-	 * If "Sequence Initiative (TSI)" bit is not set, means error
-	 * condition w.r.t. DDP, hence drop the packet and let explict
-	 * ABORTS from other end of exchange timer trigger the recovery.
-	 */
-	if (f_ctl & FC_FC_SEQ_INIT)
-		goto last_frame;
-	else
-		goto drop;
+		/*
+		 * If "Sequence Initiative (TSI)" bit set in f_ctl, means last
+		 * write data frame is received successfully where payload is
+		 * posted directly to user buffer and only the last frame's
+		 * header is posted in receive queue.
+		 *
+		 * If "Sequence Initiative (TSI)" bit is not set, means error
+		 * condition w.r.t. DDP, hence drop the packet and let explict
+		 * ABORTS from other end of exchange timer trigger the recovery.
+		 */
+		if (f_ctl & FC_FC_SEQ_INIT)
+			goto last_frame;
+		else
+			goto drop;
+	}
 
 	rel_off = ntohl(fh->fh_parm_offset);
 	frame_len = fr_len(fp);
diff --git a/drivers/tty/serial/crisv10.c b/drivers/tty/serial/crisv10.c
index 225123b37f19..58be715913cd 100644
--- a/drivers/tty/serial/crisv10.c
+++ b/drivers/tty/serial/crisv10.c
@@ -4450,7 +4450,7 @@ static int __init rs_init(void)
 
 #if defined(CONFIG_ETRAX_RS485)
 #if defined(CONFIG_ETRAX_RS485_ON_PA)
-	if (cris_io_interface_allocate_pins(if_ser0, 'a', rs485_pa_bit,
+	if (cris_io_interface_allocate_pins(if_serial_0, 'a', rs485_pa_bit,
 			rs485_pa_bit)) {
 		printk(KERN_CRIT "ETRAX100LX serial: Could not allocate "
 			"RS485 pin\n");
@@ -4459,7 +4459,7 @@ static int __init rs_init(void)
 	}
 #endif
 #if defined(CONFIG_ETRAX_RS485_ON_PORT_G)
-	if (cris_io_interface_allocate_pins(if_ser0, 'g', rs485_pa_bit,
+	if (cris_io_interface_allocate_pins(if_serial_0, 'g', rs485_pa_bit,
 			rs485_port_g_bit)) {
 		printk(KERN_CRIT "ETRAX100LX serial: Could not allocate "
 			"RS485 pin\n");
diff --git a/drivers/tty/serial/lantiq.c b/drivers/tty/serial/lantiq.c
index 58cf279ed879..bc95f52cad8b 100644
--- a/drivers/tty/serial/lantiq.c
+++ b/drivers/tty/serial/lantiq.c
@@ -478,8 +478,10 @@ lqasc_set_termios(struct uart_port *port,
 	spin_unlock_irqrestore(&ltq_asc_lock, flags);
 
 	/* Don't rewrite B0 */
-        if (tty_termios_baud_rate(new))
+	if (tty_termios_baud_rate(new))
 		tty_termios_encode_baud_rate(new, baud, baud);
+
+	uart_update_timeout(port, cflag, baud);
 }
 
 static const char*
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 1e96d1f1fe6b..723f8231193d 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -761,7 +761,7 @@ int xhci_hub_status_data(struct usb_hcd *hcd, char *buf)
 	memset(buf, 0, retval);
 	status = 0;
 
-	mask = PORT_CSC | PORT_PEC | PORT_OCC | PORT_PLC;
+	mask = PORT_CSC | PORT_PEC | PORT_OCC | PORT_PLC | PORT_WRC;
 
 	spin_lock_irqsave(&xhci->lock, flags);
 	/* For each port, did anything change?  If so, set that bit in buf. */
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 54139a2f06ce..952e2ded61af 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1934,8 +1934,10 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 	int status = -EINPROGRESS;
 	struct urb_priv *urb_priv;
 	struct xhci_ep_ctx *ep_ctx;
+	struct list_head *tmp;
 	u32 trb_comp_code;
 	int ret = 0;
+	int td_num = 0;
 
 	slot_id = TRB_TO_SLOT_ID(le32_to_cpu(event->flags));
 	xdev = xhci->devs[slot_id];
@@ -1957,6 +1959,12 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 		return -ENODEV;
 	}
 
+	/* Count current td numbers if ep->skip is set */
+	if (ep->skip) {
+		list_for_each(tmp, &ep_ring->td_list)
+			td_num++;
+	}
+
 	event_dma = le64_to_cpu(event->buffer);
 	trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len));
 	/* Look for common error cases */
@@ -2068,7 +2076,18 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 			goto cleanup;
 		}
 
+		/* We've skipped all the TDs on the ep ring when ep->skip set */
+		if (ep->skip && td_num == 0) {
+			ep->skip = false;
+			xhci_dbg(xhci, "All tds on the ep_ring skipped. "
+						"Clear skip flag.\n");
+			ret = 0;
+			goto cleanup;
+		}
+
 		td = list_entry(ep_ring->td_list.next, struct xhci_td, td_list);
+		if (ep->skip)
+			td_num--;
 
 		/* Is this a TRB in the currently executing TD? */
 		event_seg = trb_in_td(ep_ring->deq_seg, ep_ring->dequeue,
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index 410fba45378d..809cbda03d7a 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -494,15 +494,16 @@ static int hpwdt_pretimeout(struct notifier_block *nb, unsigned long ulReason,
 		asminline_call(&cmn_regs, cru_rom_addr);
 	die_nmi_called = 1;
 	spin_unlock_irqrestore(&rom_lock, rom_pl);
+
+	if (allow_kdump)
+		hpwdt_stop();
+
 	if (!is_icru) {
 		if (cmn_regs.u1.ral == 0) {
-			printk(KERN_WARNING "hpwdt: An NMI occurred, "
+			panic("An NMI occurred, "
 				"but unable to determine source.\n");
 		}
 	}
-
-	if (allow_kdump)
-		hpwdt_stop();
 	panic("An NMI occurred, please see the Integrated "
 		"Management Log for details.\n");
 
diff --git a/drivers/watchdog/lantiq_wdt.c b/drivers/watchdog/lantiq_wdt.c
index 7d82adac1cb2..102aed0efbf1 100644
--- a/drivers/watchdog/lantiq_wdt.c
+++ b/drivers/watchdog/lantiq_wdt.c
@@ -51,16 +51,16 @@ static int ltq_wdt_ok_to_close;
 static void
 ltq_wdt_enable(void)
 {
-	ltq_wdt_timeout = ltq_wdt_timeout *
+	unsigned long int timeout = ltq_wdt_timeout *
 			(ltq_io_region_clk_rate / LTQ_WDT_DIVIDER) + 0x1000;
-	if (ltq_wdt_timeout > LTQ_MAX_TIMEOUT)
-		ltq_wdt_timeout = LTQ_MAX_TIMEOUT;
+	if (timeout > LTQ_MAX_TIMEOUT)
+		timeout = LTQ_MAX_TIMEOUT;
 
 	/* write the first password magic */
 	ltq_w32(LTQ_WDT_PW1, ltq_wdt_membase + LTQ_WDT_CR);
 	/* write the second magic plus the configuration and new timeout */
 	ltq_w32(LTQ_WDT_SR_EN | LTQ_WDT_SR_PWD | LTQ_WDT_SR_CLKDIV |
-		LTQ_WDT_PW2 | ltq_wdt_timeout, ltq_wdt_membase + LTQ_WDT_CR);
+		LTQ_WDT_PW2 | timeout, ltq_wdt_membase + LTQ_WDT_CR);
 }
 
 static void
diff --git a/drivers/watchdog/sbc_epx_c3.c b/drivers/watchdog/sbc_epx_c3.c
index 3066a5127ca8..eaca366b7234 100644
--- a/drivers/watchdog/sbc_epx_c3.c
+++ b/drivers/watchdog/sbc_epx_c3.c
@@ -173,7 +173,7 @@ static struct notifier_block epx_c3_notifier = {
 	.notifier_call = epx_c3_notify_sys,
 };
 
-static const char banner[] __initdata = KERN_INFO PFX
+static const char banner[] __initconst = KERN_INFO PFX
 	"Hardware Watchdog Timer for Winsystems EPX-C3 SBC: 0.1\n";
 
 static int __init watchdog_init(void)
diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
index d33520d0b4c9..1199da0f98cf 100644
--- a/drivers/watchdog/watchdog_dev.c
+++ b/drivers/watchdog/watchdog_dev.c
@@ -59,7 +59,7 @@ static struct watchdog_device *wdd;
 
 static int watchdog_ping(struct watchdog_device *wddev)
 {
-	if (test_bit(WDOG_ACTIVE, &wdd->status)) {
+	if (test_bit(WDOG_ACTIVE, &wddev->status)) {
 		if (wddev->ops->ping)
 			return wddev->ops->ping(wddev);  /* ping the watchdog */
 		else
@@ -81,12 +81,12 @@ static int watchdog_start(struct watchdog_device *wddev)
 {
 	int err;
 
-	if (!test_bit(WDOG_ACTIVE, &wdd->status)) {
+	if (!test_bit(WDOG_ACTIVE, &wddev->status)) {
 		err = wddev->ops->start(wddev);
 		if (err < 0)
 			return err;
 
-		set_bit(WDOG_ACTIVE, &wdd->status);
+		set_bit(WDOG_ACTIVE, &wddev->status);
 	}
 	return 0;
 }
@@ -105,18 +105,18 @@ static int watchdog_stop(struct watchdog_device *wddev)
 {
 	int err = -EBUSY;
 
-	if (test_bit(WDOG_NO_WAY_OUT, &wdd->status)) {
+	if (test_bit(WDOG_NO_WAY_OUT, &wddev->status)) {
 		pr_info("%s: nowayout prevents watchdog to be stopped!\n",
-							wdd->info->identity);
+							wddev->info->identity);
 		return err;
 	}
 
-	if (test_bit(WDOG_ACTIVE, &wdd->status)) {
+	if (test_bit(WDOG_ACTIVE, &wddev->status)) {
 		err = wddev->ops->stop(wddev);
 		if (err < 0)
 			return err;
 
-		clear_bit(WDOG_ACTIVE, &wdd->status);
+		clear_bit(WDOG_ACTIVE, &wddev->status);
 	}
 	return 0;
 }
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index da70f5c32eb9..7523719bf8a4 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -54,7 +54,7 @@
  * This lock protects updates to the following mapping and reference-count
  * arrays. The lock does not need to be acquired to read the mapping tables.
  */
-static DEFINE_SPINLOCK(irq_mapping_update_lock);
+static DEFINE_MUTEX(irq_mapping_update_lock);
 
 static LIST_HEAD(xen_irq_list_head);
 
@@ -631,7 +631,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
 	int irq = -1;
 	struct physdev_irq irq_op;
 
-	spin_lock(&irq_mapping_update_lock);
+	mutex_lock(&irq_mapping_update_lock);
 
 	irq = find_irq_by_gsi(gsi);
 	if (irq != -1) {
@@ -684,7 +684,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
 				handle_edge_irq, name);
 
 out:
-	spin_unlock(&irq_mapping_update_lock);
+	mutex_unlock(&irq_mapping_update_lock);
 
 	return irq;
 }
@@ -710,7 +710,7 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
 {
 	int irq, ret;
 
-	spin_lock(&irq_mapping_update_lock);
+	mutex_lock(&irq_mapping_update_lock);
 
 	irq = xen_allocate_irq_dynamic();
 	if (irq == -1)
@@ -724,10 +724,10 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
 	if (ret < 0)
 		goto error_irq;
 out:
-	spin_unlock(&irq_mapping_update_lock);
+	mutex_unlock(&irq_mapping_update_lock);
 	return irq;
 error_irq:
-	spin_unlock(&irq_mapping_update_lock);
+	mutex_unlock(&irq_mapping_update_lock);
 	xen_free_irq(irq);
 	return -1;
 }
@@ -740,7 +740,7 @@ int xen_destroy_irq(int irq)
 	struct irq_info *info = info_for_irq(irq);
 	int rc = -ENOENT;
 
-	spin_lock(&irq_mapping_update_lock);
+	mutex_lock(&irq_mapping_update_lock);
 
 	desc = irq_to_desc(irq);
 	if (!desc)
@@ -766,7 +766,7 @@ int xen_destroy_irq(int irq)
 	xen_free_irq(irq);
 
 out:
-	spin_unlock(&irq_mapping_update_lock);
+	mutex_unlock(&irq_mapping_update_lock);
 	return rc;
 }
 
@@ -776,7 +776,7 @@ int xen_irq_from_pirq(unsigned pirq)
 
 	struct irq_info *info;
 
-	spin_lock(&irq_mapping_update_lock);
+	mutex_lock(&irq_mapping_update_lock);
 
 	list_for_each_entry(info, &xen_irq_list_head, list) {
 		if (info == NULL || info->type != IRQT_PIRQ)
@@ -787,7 +787,7 @@ int xen_irq_from_pirq(unsigned pirq)
 	}
 	irq = -1;
 out:
-	spin_unlock(&irq_mapping_update_lock);
+	mutex_unlock(&irq_mapping_update_lock);
 
 	return irq;
 }
@@ -802,7 +802,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
 {
 	int irq;
 
-	spin_lock(&irq_mapping_update_lock);
+	mutex_lock(&irq_mapping_update_lock);
 
 	irq = evtchn_to_irq[evtchn];
 
@@ -818,7 +818,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
 	}
 
 out:
-	spin_unlock(&irq_mapping_update_lock);
+	mutex_unlock(&irq_mapping_update_lock);
 
 	return irq;
 }
@@ -829,7 +829,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
 	struct evtchn_bind_ipi bind_ipi;
 	int evtchn, irq;
 
-	spin_lock(&irq_mapping_update_lock);
+	mutex_lock(&irq_mapping_update_lock);
 
 	irq = per_cpu(ipi_to_irq, cpu)[ipi];
 
@@ -853,7 +853,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
 	}
 
  out:
-	spin_unlock(&irq_mapping_update_lock);
+	mutex_unlock(&irq_mapping_update_lock);
 	return irq;
 }
 
@@ -878,7 +878,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
 	struct evtchn_bind_virq bind_virq;
 	int evtchn, irq;
 
-	spin_lock(&irq_mapping_update_lock);
+	mutex_lock(&irq_mapping_update_lock);
 
 	irq = per_cpu(virq_to_irq, cpu)[virq];
 
@@ -903,7 +903,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
 	}
 
 out:
-	spin_unlock(&irq_mapping_update_lock);
+	mutex_unlock(&irq_mapping_update_lock);
 
 	return irq;
 }
@@ -913,7 +913,7 @@ static void unbind_from_irq(unsigned int irq)
 	struct evtchn_close close;
 	int evtchn = evtchn_from_irq(irq);
 
-	spin_lock(&irq_mapping_update_lock);
+	mutex_lock(&irq_mapping_update_lock);
 
 	if (VALID_EVTCHN(evtchn)) {
 		close.port = evtchn;
@@ -943,7 +943,7 @@ static void unbind_from_irq(unsigned int irq)
 
 	xen_free_irq(irq);
 
-	spin_unlock(&irq_mapping_update_lock);
+	mutex_unlock(&irq_mapping_update_lock);
 }
 
 int bind_evtchn_to_irqhandler(unsigned int evtchn,
@@ -1279,7 +1279,7 @@ void rebind_evtchn_irq(int evtchn, int irq)
 	   will also be masked. */
 	disable_irq(irq);
 
-	spin_lock(&irq_mapping_update_lock);
+	mutex_lock(&irq_mapping_update_lock);
 
 	/* After resume the irq<->evtchn mappings are all cleared out */
 	BUG_ON(evtchn_to_irq[evtchn] != -1);
@@ -1289,7 +1289,7 @@ void rebind_evtchn_irq(int evtchn, int irq)
 
 	xen_irq_info_evtchn_init(irq, evtchn);
 
-	spin_unlock(&irq_mapping_update_lock);
+	mutex_unlock(&irq_mapping_update_lock);
 
 	/* new event channels are always bound to cpu 0 */
 	irq_set_affinity(irq, cpumask_of(0));
diff --git a/drivers/zorro/zorro.c b/drivers/zorro/zorro.c
index e0c2807b0970..181fa8158a8b 100644
--- a/drivers/zorro/zorro.c
+++ b/drivers/zorro/zorro.c
@@ -148,10 +148,10 @@ static int __init amiga_zorro_probe(struct platform_device *pdev)
 	}
 	platform_set_drvdata(pdev, bus);
 
-	/* Register all devices */
 	pr_info("Zorro: Probing AutoConfig expansion devices: %u device%s\n",
 		 zorro_num_autocon, zorro_num_autocon == 1 ? "" : "s");
 
+	/* First identify all devices ... */
 	for (i = 0; i < zorro_num_autocon; i++) {
 		z = &zorro_autocon[i];
 		z->id = (z->rom.er_Manufacturer<<16) | (z->rom.er_Product<<8);
@@ -172,6 +172,11 @@ static int __init amiga_zorro_probe(struct platform_device *pdev)
 		dev_set_name(&z->dev, "%02x", i);
 		z->dev.parent = &bus->dev;
 		z->dev.bus = &zorro_bus_type;
+	}
+
+	/* ... then register them */
+	for (i = 0; i < zorro_num_autocon; i++) {
+		z = &zorro_autocon[i];
 		error = device_register(&z->dev);
 		if (error) {
 			dev_err(&bus->dev, "Error registering device %s\n",