diff options
Diffstat (limited to 'drivers/gpu/drm')
76 files changed, 1051 insertions, 705 deletions
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 66e40398b3d3..e620807418ea 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -37,6 +37,7 @@ obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o obj-$(CONFIG_DRM_TTM) += ttm/ obj-$(CONFIG_DRM_TDFX) += tdfx/ obj-$(CONFIG_DRM_R128) += r128/ +obj-$(CONFIG_HSA_AMD) += amd/amdkfd/ obj-$(CONFIG_DRM_RADEON)+= radeon/ obj-$(CONFIG_DRM_MGA) += mga/ obj-$(CONFIG_DRM_I810) += i810/ @@ -67,4 +68,3 @@ obj-$(CONFIG_DRM_IMX) += imx/ obj-y += i2c/ obj-y += panel/ obj-y += bridge/ -obj-$(CONFIG_HSA_AMD) += amd/amdkfd/ diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index be6246de5091..307a309110e6 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -8,7 +8,6 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \ kfd_process.o kfd_queue.o kfd_mqd_manager.o \ kfd_kernel_queue.o kfd_packet_manager.o \ - kfd_process_queue_manager.o kfd_device_queue_manager.o \ - kfd_interrupt.o + kfd_process_queue_manager.o kfd_device_queue_manager.o obj-$(CONFIG_HSA_AMD) += amdkfd.o diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 4f7b275f2f7b..fcfdf23e1913 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -31,7 +31,6 @@ #include <uapi/linux/kfd_ioctl.h> #include <linux/time.h> #include <linux/mm.h> -#include <linux/uaccess.h> #include <uapi/asm-generic/mman-common.h> #include <asm/processor.h> #include "kfd_priv.h" @@ -121,27 +120,20 @@ static int kfd_open(struct inode *inode, struct file *filep) if (IS_ERR(process)) return PTR_ERR(process); - process->is_32bit_user_mode = is_32bit_user_mode; - dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n", process->pasid, process->is_32bit_user_mode); - kfd_init_apertures(process); - return 0; } -static long kfd_ioctl_get_version(struct file *filep, struct kfd_process *p, - void __user *arg) +static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p, + void *data) { - struct kfd_ioctl_get_version_args args; + struct kfd_ioctl_get_version_args *args = data; int err = 0; - args.major_version = KFD_IOCTL_MAJOR_VERSION; - args.minor_version = KFD_IOCTL_MINOR_VERSION; - - if (copy_to_user(arg, &args, sizeof(args))) - err = -EFAULT; + args->major_version = KFD_IOCTL_MAJOR_VERSION; + args->minor_version = KFD_IOCTL_MINOR_VERSION; return err; } @@ -225,10 +217,10 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, return 0; } -static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, - void __user *arg) +static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, + void *data) { - struct kfd_ioctl_create_queue_args args; + struct kfd_ioctl_create_queue_args *args = data; struct kfd_dev *dev; int err = 0; unsigned int queue_id; @@ -237,16 +229,13 @@ static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, memset(&q_properties, 0, sizeof(struct queue_properties)); - if (copy_from_user(&args, arg, sizeof(args))) - return -EFAULT; - pr_debug("kfd: creating queue ioctl\n"); - err = set_queue_properties_from_user(&q_properties, &args); + err = set_queue_properties_from_user(&q_properties, args); if (err) return err; - dev = kfd_device_by_id(args.gpu_id); + dev = kfd_device_by_id(args->gpu_id); if (dev == NULL) return -EINVAL; @@ -254,7 +243,7 @@ static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, pdd = kfd_bind_process_to_device(dev, p); if (IS_ERR(pdd)) { - err = PTR_ERR(pdd); + err = -ESRCH; goto err_bind_process; } @@ -267,33 +256,26 @@ static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, if (err != 0) goto err_create_queue; - args.queue_id = queue_id; + args->queue_id = queue_id; /* Return gpu_id as doorbell offset for mmap usage */ - args.doorbell_offset = args.gpu_id << PAGE_SHIFT; - - if (copy_to_user(arg, &args, sizeof(args))) { - err = -EFAULT; - goto err_copy_args_out; - } + args->doorbell_offset = args->gpu_id << PAGE_SHIFT; mutex_unlock(&p->mutex); - pr_debug("kfd: queue id %d was created successfully\n", args.queue_id); + pr_debug("kfd: queue id %d was created successfully\n", args->queue_id); pr_debug("ring buffer address == 0x%016llX\n", - args.ring_base_address); + args->ring_base_address); pr_debug("read ptr address == 0x%016llX\n", - args.read_pointer_address); + args->read_pointer_address); pr_debug("write ptr address == 0x%016llX\n", - args.write_pointer_address); + args->write_pointer_address); return 0; -err_copy_args_out: - pqm_destroy_queue(&p->pqm, queue_id); err_create_queue: err_bind_process: mutex_unlock(&p->mutex); @@ -301,99 +283,90 @@ err_bind_process: } static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, - void __user *arg) + void *data) { int retval; - struct kfd_ioctl_destroy_queue_args args; - - if (copy_from_user(&args, arg, sizeof(args))) - return -EFAULT; + struct kfd_ioctl_destroy_queue_args *args = data; pr_debug("kfd: destroying queue id %d for PASID %d\n", - args.queue_id, + args->queue_id, p->pasid); mutex_lock(&p->mutex); - retval = pqm_destroy_queue(&p->pqm, args.queue_id); + retval = pqm_destroy_queue(&p->pqm, args->queue_id); mutex_unlock(&p->mutex); return retval; } static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, - void __user *arg) + void *data) { int retval; - struct kfd_ioctl_update_queue_args args; + struct kfd_ioctl_update_queue_args *args = data; struct queue_properties properties; - if (copy_from_user(&args, arg, sizeof(args))) - return -EFAULT; - - if (args.queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { + if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { pr_err("kfd: queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); return -EINVAL; } - if (args.queue_priority > KFD_MAX_QUEUE_PRIORITY) { + if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { pr_err("kfd: queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); return -EINVAL; } - if ((args.ring_base_address) && + if ((args->ring_base_address) && (!access_ok(VERIFY_WRITE, - (const void __user *) args.ring_base_address, + (const void __user *) args->ring_base_address, sizeof(uint64_t)))) { pr_err("kfd: can't access ring base address\n"); return -EFAULT; } - if (!is_power_of_2(args.ring_size) && (args.ring_size != 0)) { + if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { pr_err("kfd: ring size must be a power of 2 or 0\n"); return -EINVAL; } - properties.queue_address = args.ring_base_address; - properties.queue_size = args.ring_size; - properties.queue_percent = args.queue_percentage; - properties.priority = args.queue_priority; + properties.queue_address = args->ring_base_address; + properties.queue_size = args->ring_size; + properties.queue_percent = args->queue_percentage; + properties.priority = args->queue_priority; pr_debug("kfd: updating queue id %d for PASID %d\n", - args.queue_id, p->pasid); + args->queue_id, p->pasid); mutex_lock(&p->mutex); - retval = pqm_update_queue(&p->pqm, args.queue_id, &properties); + retval = pqm_update_queue(&p->pqm, args->queue_id, &properties); mutex_unlock(&p->mutex); return retval; } -static long kfd_ioctl_set_memory_policy(struct file *filep, - struct kfd_process *p, void __user *arg) +static int kfd_ioctl_set_memory_policy(struct file *filep, + struct kfd_process *p, void *data) { - struct kfd_ioctl_set_memory_policy_args args; + struct kfd_ioctl_set_memory_policy_args *args = data; struct kfd_dev *dev; int err = 0; struct kfd_process_device *pdd; enum cache_policy default_policy, alternate_policy; - if (copy_from_user(&args, arg, sizeof(args))) - return -EFAULT; - - if (args.default_policy != KFD_IOC_CACHE_POLICY_COHERENT - && args.default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { + if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT + && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { return -EINVAL; } - if (args.alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT - && args.alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { + if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT + && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { return -EINVAL; } - dev = kfd_device_by_id(args.gpu_id); + dev = kfd_device_by_id(args->gpu_id); if (dev == NULL) return -EINVAL; @@ -401,23 +374,23 @@ static long kfd_ioctl_set_memory_policy(struct file *filep, pdd = kfd_bind_process_to_device(dev, p); if (IS_ERR(pdd)) { - err = PTR_ERR(pdd); + err = -ESRCH; goto out; } - default_policy = (args.default_policy == KFD_IOC_CACHE_POLICY_COHERENT) + default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT) ? cache_policy_coherent : cache_policy_noncoherent; alternate_policy = - (args.alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT) + (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT) ? cache_policy_coherent : cache_policy_noncoherent; if (!dev->dqm->set_cache_memory_policy(dev->dqm, &pdd->qpd, default_policy, alternate_policy, - (void __user *)args.alternate_aperture_base, - args.alternate_aperture_size)) + (void __user *)args->alternate_aperture_base, + args->alternate_aperture_size)) err = -EINVAL; out: @@ -426,53 +399,44 @@ out: return err; } -static long kfd_ioctl_get_clock_counters(struct file *filep, - struct kfd_process *p, void __user *arg) +static int kfd_ioctl_get_clock_counters(struct file *filep, + struct kfd_process *p, void *data) { - struct kfd_ioctl_get_clock_counters_args args; + struct kfd_ioctl_get_clock_counters_args *args = data; struct kfd_dev *dev; struct timespec time; - if (copy_from_user(&args, arg, sizeof(args))) - return -EFAULT; - - dev = kfd_device_by_id(args.gpu_id); + dev = kfd_device_by_id(args->gpu_id); if (dev == NULL) return -EINVAL; /* Reading GPU clock counter from KGD */ - args.gpu_clock_counter = kfd2kgd->get_gpu_clock_counter(dev->kgd); + args->gpu_clock_counter = kfd2kgd->get_gpu_clock_counter(dev->kgd); /* No access to rdtsc. Using raw monotonic time */ getrawmonotonic(&time); - args.cpu_clock_counter = (uint64_t)timespec_to_ns(&time); + args->cpu_clock_counter = (uint64_t)timespec_to_ns(&time); get_monotonic_boottime(&time); - args.system_clock_counter = (uint64_t)timespec_to_ns(&time); + args->system_clock_counter = (uint64_t)timespec_to_ns(&time); /* Since the counter is in nano-seconds we use 1GHz frequency */ - args.system_clock_freq = 1000000000; - - if (copy_to_user(arg, &args, sizeof(args))) - return -EFAULT; + args->system_clock_freq = 1000000000; return 0; } static int kfd_ioctl_get_process_apertures(struct file *filp, - struct kfd_process *p, void __user *arg) + struct kfd_process *p, void *data) { - struct kfd_ioctl_get_process_apertures_args args; + struct kfd_ioctl_get_process_apertures_args *args = data; struct kfd_process_device_apertures *pAperture; struct kfd_process_device *pdd; dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); - if (copy_from_user(&args, arg, sizeof(args))) - return -EFAULT; - - args.num_of_nodes = 0; + args->num_of_nodes = 0; mutex_lock(&p->mutex); @@ -481,7 +445,8 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, /* Run over all pdd of the process */ pdd = kfd_get_first_process_device_data(p); do { - pAperture = &args.process_apertures[args.num_of_nodes]; + pAperture = + &args->process_apertures[args->num_of_nodes]; pAperture->gpu_id = pdd->dev->id; pAperture->lds_base = pdd->lds_base; pAperture->lds_limit = pdd->lds_limit; @@ -491,7 +456,7 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, pAperture->scratch_limit = pdd->scratch_limit; dev_dbg(kfd_device, - "node id %u\n", args.num_of_nodes); + "node id %u\n", args->num_of_nodes); dev_dbg(kfd_device, "gpu id %u\n", pdd->dev->id); dev_dbg(kfd_device, @@ -507,80 +472,131 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, dev_dbg(kfd_device, "scratch_limit %llX\n", pdd->scratch_limit); - args.num_of_nodes++; + args->num_of_nodes++; } while ((pdd = kfd_get_next_process_device_data(p, pdd)) != NULL && - (args.num_of_nodes < NUM_OF_SUPPORTED_GPUS)); + (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS)); } mutex_unlock(&p->mutex); - if (copy_to_user(arg, &args, sizeof(args))) - return -EFAULT; - return 0; } +#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ + [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl} + +/** Ioctl table */ +static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION, + kfd_ioctl_get_version, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE, + kfd_ioctl_create_queue, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE, + kfd_ioctl_destroy_queue, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY, + kfd_ioctl_set_memory_policy, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS, + kfd_ioctl_get_clock_counters, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES, + kfd_ioctl_get_process_apertures, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE, + kfd_ioctl_update_queue, 0), +}; + +#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) + static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { struct kfd_process *process; - long err = -EINVAL; + amdkfd_ioctl_t *func; + const struct amdkfd_ioctl_desc *ioctl = NULL; + unsigned int nr = _IOC_NR(cmd); + char stack_kdata[128]; + char *kdata = NULL; + unsigned int usize, asize; + int retcode = -EINVAL; + + if (nr >= AMDKFD_CORE_IOCTL_COUNT) + goto err_i1; + + if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) { + u32 amdkfd_size; + + ioctl = &amdkfd_ioctls[nr]; - dev_dbg(kfd_device, - "ioctl cmd 0x%x (#%d), arg 0x%lx\n", - cmd, _IOC_NR(cmd), arg); + amdkfd_size = _IOC_SIZE(ioctl->cmd); + usize = asize = _IOC_SIZE(cmd); + if (amdkfd_size > asize) + asize = amdkfd_size; + + cmd = ioctl->cmd; + } else + goto err_i1; + + dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg); process = kfd_get_process(current); - if (IS_ERR(process)) - return PTR_ERR(process); + if (IS_ERR(process)) { + dev_dbg(kfd_device, "no process\n"); + goto err_i1; + } - switch (cmd) { - case KFD_IOC_GET_VERSION: - err = kfd_ioctl_get_version(filep, process, (void __user *)arg); - break; - case KFD_IOC_CREATE_QUEUE: - err = kfd_ioctl_create_queue(filep, process, - (void __user *)arg); - break; - - case KFD_IOC_DESTROY_QUEUE: - err = kfd_ioctl_destroy_queue(filep, process, - (void __user *)arg); - break; - - case KFD_IOC_SET_MEMORY_POLICY: - err = kfd_ioctl_set_memory_policy(filep, process, - (void __user *)arg); - break; - - case KFD_IOC_GET_CLOCK_COUNTERS: - err = kfd_ioctl_get_clock_counters(filep, process, - (void __user *)arg); - break; - - case KFD_IOC_GET_PROCESS_APERTURES: - err = kfd_ioctl_get_process_apertures(filep, process, - (void __user *)arg); - break; - - case KFD_IOC_UPDATE_QUEUE: - err = kfd_ioctl_update_queue(filep, process, - (void __user *)arg); - break; - - default: - dev_err(kfd_device, - "unknown ioctl cmd 0x%x, arg 0x%lx)\n", - cmd, arg); - err = -EINVAL; - break; + /* Do not trust userspace, use our own definition */ + func = ioctl->func; + + if (unlikely(!func)) { + dev_dbg(kfd_device, "no function\n"); + retcode = -EINVAL; + goto err_i1; } - if (err < 0) - dev_err(kfd_device, - "ioctl error %ld for ioctl cmd 0x%x (#%d)\n", - err, cmd, _IOC_NR(cmd)); + if (cmd & (IOC_IN | IOC_OUT)) { + if (asize <= sizeof(stack_kdata)) { + kdata = stack_kdata; + } else { + kdata = kmalloc(asize, GFP_KERNEL); + if (!kdata) { + retcode = -ENOMEM; + goto err_i1; + } + } + if (asize > usize) + memset(kdata + usize, 0, asize - usize); + } - return err; + if (cmd & IOC_IN) { + if (copy_from_user(kdata, (void __user *)arg, usize) != 0) { + retcode = -EFAULT; + goto err_i1; + } + } else if (cmd & IOC_OUT) { + memset(kdata, 0, usize); + } + + retcode = func(filep, process, kdata); + + if (cmd & IOC_OUT) + if (copy_to_user((void __user *)arg, kdata, usize) != 0) + retcode = -EFAULT; + +err_i1: + if (!ioctl) + dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n", + task_pid_nr(current), cmd, nr); + + if (kdata != stack_kdata) + kfree(kdata); + + if (retcode) + dev_dbg(kfd_device, "ret = %d\n", retcode); + + return retcode; } static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 43884ebd4303..633532a2e7ec 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -192,13 +192,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto kfd_topology_add_device_error; } - if (kfd_interrupt_init(kfd)) { - dev_err(kfd_device, - "Error initializing interrupts for device (%x:%x)\n", - kfd->pdev->vendor, kfd->pdev->device); - goto kfd_interrupt_error; - } - if (!device_iommu_pasid_init(kfd)) { dev_err(kfd_device, "Error initializing iommuv2 for device (%x:%x)\n", @@ -237,8 +230,6 @@ dqm_start_error: device_queue_manager_error: amd_iommu_free_device(kfd->pdev); device_iommu_pasid_error: - kfd_interrupt_exit(kfd); -kfd_interrupt_error: kfd_topology_remove_device(kfd); kfd_topology_add_device_error: kfd2kgd->fini_sa_manager(kfd->kgd); @@ -254,7 +245,6 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) if (kfd->init_complete) { device_queue_manager_uninit(kfd->dqm); amd_iommu_free_device(kfd->pdev); - kfd_interrupt_exit(kfd); kfd_topology_remove_device(kfd); } @@ -296,13 +286,5 @@ int kgd2kfd_resume(struct kfd_dev *kfd) /* This is called directly from KGD at ISR. */ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) { - if (kfd->init_complete) { - spin_lock(&kfd->interrupt_lock); - - if (kfd->interrupts_active - && enqueue_ih_ring_entry(kfd, ih_ring_entry)) - schedule_work(&kfd->interrupt_work); - - spin_unlock(&kfd->interrupt_lock); - } + /* Process interrupts / schedule work as necessary */ } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 924e90c072e5..30c8fda9622e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -161,6 +161,9 @@ static void deallocate_vmid(struct device_queue_manager *dqm, { int bit = qpd->vmid - KFD_VMID_START_OFFSET; + /* Release the vmid mapping */ + set_pasid_vmid_mapping(dqm, 0, qpd->vmid); + set_bit(bit, (unsigned long *)&dqm->vmid_bitmap); qpd->vmid = 0; q->properties.vmid = 0; @@ -272,6 +275,18 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, return retval; } + pr_debug("kfd: loading mqd to hqd on pipe (%d) queue (%d)\n", + q->pipe, + q->queue); + + retval = mqd->load_mqd(mqd, q->mqd, q->pipe, + q->queue, (uint32_t __user *) q->properties.write_ptr); + if (retval != 0) { + deallocate_hqd(dqm, q); + mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + return retval; + } + return 0; } @@ -320,6 +335,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) { int retval; struct mqd_manager *mqd; + bool prev_active = false; BUG_ON(!dqm || !q || !q->mqd); @@ -330,10 +346,18 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) return -ENOMEM; } - retval = mqd->update_mqd(mqd, q->mqd, &q->properties); if (q->properties.is_active == true) + prev_active = true; + + /* + * + * check active state vs. the previous state + * and modify counter accordingly + */ + retval = mqd->update_mqd(mqd, q->mqd, &q->properties); + if ((q->properties.is_active == true) && (prev_active == false)) dqm->queue_count++; - else + else if ((q->properties.is_active == false) && (prev_active == true)) dqm->queue_count--; if (sched_policy != KFD_SCHED_POLICY_NO_HWS) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 66df4da01c29..e64aa99e5e41 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -299,13 +299,13 @@ int kfd_init_apertures(struct kfd_process *process) struct kfd_dev *dev; struct kfd_process_device *pdd; - mutex_lock(&process->mutex); - /*Iterating over all devices*/ while ((dev = kfd_topology_enum_kfd_devices(id)) != NULL && id < NUM_OF_SUPPORTED_GPUS) { pdd = kfd_get_process_device_data(dev, process, 1); + if (!pdd) + return -1; /* * For 64 bit process aperture will be statically reserved in @@ -348,8 +348,6 @@ int kfd_init_apertures(struct kfd_process *process) id++; } - mutex_unlock(&process->mutex); - return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c deleted file mode 100644 index 5b999095a1f7..000000000000 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright 2014 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/* - * KFD Interrupts. - * - * AMD GPUs deliver interrupts by pushing an interrupt description onto the - * interrupt ring and then sending an interrupt. KGD receives the interrupt - * in ISR and sends us a pointer to each new entry on the interrupt ring. - * - * We generally can't process interrupt-signaled events from ISR, so we call - * out to each interrupt client module (currently only the scheduler) to ask if - * each interrupt is interesting. If they return true, then it requires further - * processing so we copy it to an internal interrupt ring and call each - * interrupt client again from a work-queue. - * - * There's no acknowledgment for the interrupts we use. The hardware simply - * queues a new interrupt each time without waiting. - * - * The fixed-size internal queue means that it's possible for us to lose - * interrupts because we have no back-pressure to the hardware. - */ - -#include <linux/slab.h> -#include <linux/device.h> -#include "kfd_priv.h" - -#define KFD_INTERRUPT_RING_SIZE 256 - -static void interrupt_wq(struct work_struct *); - -int kfd_interrupt_init(struct kfd_dev *kfd) -{ - void *interrupt_ring = kmalloc_array(KFD_INTERRUPT_RING_SIZE, - kfd->device_info->ih_ring_entry_size, - GFP_KERNEL); - if (!interrupt_ring) - return -ENOMEM; - - kfd->interrupt_ring = interrupt_ring; - kfd->interrupt_ring_size = - KFD_INTERRUPT_RING_SIZE * kfd->device_info->ih_ring_entry_size; - atomic_set(&kfd->interrupt_ring_wptr, 0); - atomic_set(&kfd->interrupt_ring_rptr, 0); - - spin_lock_init(&kfd->interrupt_lock); - - INIT_WORK(&kfd->interrupt_work, interrupt_wq); - - kfd->interrupts_active = true; - - /* - * After this function returns, the interrupt will be enabled. This - * barrier ensures that the interrupt running on a different processor - * sees all the above writes. - */ - smp_wmb(); - - return 0; -} - -void kfd_interrupt_exit(struct kfd_dev *kfd) -{ - /* - * Stop the interrupt handler from writing to the ring and scheduling - * workqueue items. The spinlock ensures that any interrupt running - * after we have unlocked sees interrupts_active = false. - */ - unsigned long flags; - - spin_lock_irqsave(&kfd->interrupt_lock, flags); - kfd->interrupts_active = false; - spin_unlock_irqrestore(&kfd->interrupt_lock, flags); - - /* - * Flush_scheduled_work ensures that there are no outstanding - * work-queue items that will access interrupt_ring. New work items - * can't be created because we stopped interrupt handling above. - */ - flush_scheduled_work(); - - kfree(kfd->interrupt_ring); -} - -/* - * This assumes that it can't be called concurrently with itself - * but only with dequeue_ih_ring_entry. - */ -bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry) -{ - unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr); - unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr); - - if ((rptr - wptr) % kfd->interrupt_ring_size == - kfd->device_info->ih_ring_entry_size) { - /* This is very bad, the system is likely to hang. */ - dev_err_ratelimited(kfd_chardev(), - "Interrupt ring overflow, dropping interrupt.\n"); - return false; - } - - memcpy(kfd->interrupt_ring + wptr, ih_ring_entry, - kfd->device_info->ih_ring_entry_size); - - wptr = (wptr + kfd->device_info->ih_ring_entry_size) % - kfd->interrupt_ring_size; - smp_wmb(); /* Ensure memcpy'd data is visible before wptr update. */ - atomic_set(&kfd->interrupt_ring_wptr, wptr); - - return true; -} - -/* - * This assumes that it can't be called concurrently with itself - * but only with enqueue_ih_ring_entry. - */ -static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry) -{ - /* - * Assume that wait queues have an implicit barrier, i.e. anything that - * happened in the ISR before it queued work is visible. - */ - - unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr); - unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr); - - if (rptr == wptr) - return false; - - memcpy(ih_ring_entry, kfd->interrupt_ring + rptr, - kfd->device_info->ih_ring_entry_size); - - rptr = (rptr + kfd->device_info->ih_ring_entry_size) % - kfd->interrupt_ring_size; - - /* - * Ensure the rptr write update is not visible until - * memcpy has finished reading. - */ - smp_mb(); - atomic_set(&kfd->interrupt_ring_rptr, rptr); - - return true; -} - -static void interrupt_wq(struct work_struct *work) -{ - struct kfd_dev *dev = container_of(work, struct kfd_dev, - interrupt_work); - - uint32_t ih_ring_entry[DIV_ROUND_UP( - dev->device_info->ih_ring_entry_size, - sizeof(uint32_t))]; - - while (dequeue_ih_ring_entry(dev, ih_ring_entry)) - ; -} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index adc31474e786..4c3828cf45bf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -184,7 +184,7 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd, uint32_t queue_id) { - return kfd2kgd->hqd_is_occupies(mm->dev->kgd, queue_address, + return kfd2kgd->hqd_is_occupied(mm->dev->kgd, queue_address, pipe_id, queue_id); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c index 71699ad97d74..4c25ef504f79 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c @@ -32,7 +32,7 @@ int kfd_pasid_init(void) { pasid_limit = max_num_of_processes; - pasid_bitmap = kzalloc(BITS_TO_LONGS(pasid_limit), GFP_KERNEL); + pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), GFP_KERNEL); if (!pasid_bitmap) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index f9fb81e3bb09..b3dc13c83169 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -135,22 +135,10 @@ struct kfd_dev { struct kgd2kfd_shared_resources shared_resources; - void *interrupt_ring; - size_t interrupt_ring_size; - atomic_t interrupt_ring_rptr; - atomic_t interrupt_ring_wptr; - struct work_struct interrupt_work; - spinlock_t interrupt_lock; - /* QCM Device instance */ struct device_queue_manager *dqm; bool init_complete; - /* - * Interrupts of interest to KFD are copied - * from the HW ring into a SW ring. - */ - bool interrupts_active; }; /* KGD2KFD callbacks */ @@ -463,6 +451,24 @@ struct kfd_process { bool is_32bit_user_mode; }; +/** + * Ioctl function type. + * + * \param filep pointer to file structure. + * \param p amdkfd process pointer. + * \param data pointer to arg that was copied from user. + */ +typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p, + void *data); + +struct amdkfd_ioctl_desc { + unsigned int cmd; + int flags; + amdkfd_ioctl_t *func; + unsigned int cmd_drv; + const char *name; +}; + void kfd_process_create_wq(void); void kfd_process_destroy_wq(void); struct kfd_process *kfd_create_process(const struct task_struct *); @@ -513,10 +519,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx); /* Interrupts */ -int kfd_interrupt_init(struct kfd_dev *dev); -void kfd_interrupt_exit(struct kfd_dev *dev); void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); -bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry); /* Power Management */ void kgd2kfd_suspend(struct kfd_dev *kfd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index b85eb0b830b4..3c76ef05cbcf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -26,6 +26,8 @@ #include <linux/slab.h> #include <linux/amd-iommu.h> #include <linux/notifier.h> +#include <linux/compat.h> + struct mm_struct; #include "kfd_priv.h" @@ -285,8 +287,15 @@ static struct kfd_process *create_process(const struct task_struct *thread) if (err != 0) goto err_process_pqm_init; + /* init process apertures*/ + process->is_32bit_user_mode = is_compat_task(); + if (kfd_init_apertures(process) != 0) + goto err_init_apretures; + return process; +err_init_apretures: + pqm_uninit(&process->pqm); err_process_pqm_init: hash_del_rcu(&process->kfd_processes); synchronize_rcu(); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 5733e2859e8a..cca1708fd811 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -700,8 +700,6 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.simd_per_cu); sysfs_show_32bit_prop(buffer, "max_slots_scratch_cu", dev->node_props.max_slots_scratch_cu); - sysfs_show_32bit_prop(buffer, "engine_id", - dev->node_props.engine_id); sysfs_show_32bit_prop(buffer, "vendor_id", dev->node_props.vendor_id); sysfs_show_32bit_prop(buffer, "device_id", @@ -715,6 +713,12 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->gpu->kgd)); sysfs_show_64bit_prop(buffer, "local_mem_size", kfd2kgd->get_vmem_size(dev->gpu->kgd)); + + sysfs_show_32bit_prop(buffer, "fw_version", + kfd2kgd->get_fw_version( + dev->gpu->kgd, + KGD_ENGINE_MEC1)); + } ret = sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute", @@ -917,7 +921,7 @@ static int kfd_build_sysfs_node_tree(void) uint32_t i = 0; list_for_each_entry(dev, &topology_device_list, list) { - ret = kfd_build_sysfs_node_entry(dev, 0); + ret = kfd_build_sysfs_node_entry(dev, i); if (ret < 0) return ret; i++; diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 9c729dd8dd50..96a512208fad 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -45,6 +45,17 @@ enum kgd_memory_pool { KGD_POOL_FRAMEBUFFER = 3, }; +enum kgd_engine_type { + KGD_ENGINE_PFP = 1, + KGD_ENGINE_ME, + KGD_ENGINE_CE, + KGD_ENGINE_MEC1, + KGD_ENGINE_MEC2, + KGD_ENGINE_RLC, + KGD_ENGINE_SDMA, + KGD_ENGINE_MAX +}; + struct kgd2kfd_shared_resources { /* Bit n == 1 means VMID n is available for KFD. */ unsigned int compute_vmid_bitmap; @@ -137,6 +148,8 @@ struct kgd2kfd_calls { * * @hqd_destroy: Destructs and preempts the queue assigned to that hqd slot. * + * @get_fw_version: Returns FW versions from the header + * * This structure contains function pointers to services that the kgd driver * provides to amdkfd driver. * @@ -170,12 +183,14 @@ struct kfd2kgd_calls { int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr); - bool (*hqd_is_occupies)(struct kgd_dev *kgd, uint64_t queue_address, + bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); int (*hqd_destroy)(struct kgd_dev *kgd, uint32_t reset_type, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id); + uint16_t (*get_fw_version)(struct kgd_dev *kgd, + enum kgd_engine_type type); }; bool kgd2kfd_init(unsigned interface_version, diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 4a78a773151c..bbdbe4721573 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -61,7 +61,7 @@ drm_atomic_helper_plane_changed(struct drm_atomic_state *state, struct drm_crtc_state *crtc_state; if (plane->state->crtc) { - crtc_state = state->crtc_states[drm_crtc_index(plane->crtc)]; + crtc_state = state->crtc_states[drm_crtc_index(plane->state->crtc)]; if (WARN_ON(!crtc_state)) return; diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 52ce26d6b4fb..cf775a4449c1 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -741,7 +741,9 @@ int drm_fb_helper_setcmap(struct fb_cmap *cmap, struct fb_info *info) int i, j, rc = 0; int start; - drm_modeset_lock_all(dev); + if (__drm_modeset_lock_all(dev, !!oops_in_progress)) { + return -EBUSY; + } if (!drm_fb_helper_is_bound(fb_helper)) { drm_modeset_unlock_all(dev); return -EBUSY; @@ -915,7 +917,9 @@ int drm_fb_helper_pan_display(struct fb_var_screeninfo *var, int ret = 0; int i; - drm_modeset_lock_all(dev); + if (__drm_modeset_lock_all(dev, !!oops_in_progress)) { + return -EBUSY; + } if (!drm_fb_helper_is_bound(fb_helper)) { drm_modeset_unlock_all(dev); return -EBUSY; diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index f5a5f18efa5b..4d79dad9d44f 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -830,6 +830,8 @@ drm_get_last_vbltimestamp(struct drm_device *dev, int crtc, * vblank events since the system was booted, including lost events due to * modesetting activity. * + * This is the legacy version of drm_crtc_vblank_count(). + * * Returns: * The software vblank counter. */ @@ -844,6 +846,25 @@ u32 drm_vblank_count(struct drm_device *dev, int crtc) EXPORT_SYMBOL(drm_vblank_count); /** + * drm_crtc_vblank_count - retrieve "cooked" vblank counter value + * @crtc: which counter to retrieve + * + * Fetches the "cooked" vblank count value that represents the number of + * vblank events since the system was booted, including lost events due to + * modesetting activity. + * + * This is the native KMS version of drm_vblank_count(). + * + * Returns: + * The software vblank counter. + */ +u32 drm_crtc_vblank_count(struct drm_crtc *crtc) +{ + return drm_vblank_count(crtc->dev, drm_crtc_index(crtc)); +} +EXPORT_SYMBOL(drm_crtc_vblank_count); + +/** * drm_vblank_count_and_time - retrieve "cooked" vblank counter value * and the system timestamp corresponding to that vblank counter value. * @@ -904,6 +925,8 @@ static void send_vblank_event(struct drm_device *dev, * * Updates sequence # and timestamp on event, and sends it to userspace. * Caller must hold event lock. + * + * This is the legacy version of drm_crtc_send_vblank_event(). */ void drm_send_vblank_event(struct drm_device *dev, int crtc, struct drm_pending_vblank_event *e) @@ -923,6 +946,23 @@ void drm_send_vblank_event(struct drm_device *dev, int crtc, EXPORT_SYMBOL(drm_send_vblank_event); /** + * drm_crtc_send_vblank_event - helper to send vblank event after pageflip + * @crtc: the source CRTC of the vblank event + * @e: the event to send + * + * Updates sequence # and timestamp on event, and sends it to userspace. + * Caller must hold event lock. + * + * This is the native KMS version of drm_send_vblank_event(). + */ +void drm_crtc_send_vblank_event(struct drm_crtc *crtc, + struct drm_pending_vblank_event *e) +{ + drm_send_vblank_event(crtc->dev, drm_crtc_index(crtc), e); +} +EXPORT_SYMBOL(drm_crtc_send_vblank_event); + +/** * drm_vblank_enable - enable the vblank interrupt on a CRTC * @dev: DRM device * @crtc: CRTC in question @@ -1594,6 +1634,8 @@ static void drm_handle_vblank_events(struct drm_device *dev, int crtc) * * Drivers should call this routine in their vblank interrupt handlers to * update the vblank counter and send any signals that may be pending. + * + * This is the legacy version of drm_crtc_handle_vblank(). */ bool drm_handle_vblank(struct drm_device *dev, int crtc) { @@ -1670,3 +1712,21 @@ bool drm_handle_vblank(struct drm_device *dev, int crtc) return true; } EXPORT_SYMBOL(drm_handle_vblank); + +/** + * drm_crtc_handle_vblank - handle a vblank event + * @crtc: where this event occurred + * + * Drivers should call this routine in their vblank interrupt handlers to + * update the vblank counter and send any signals that may be pending. + * + * This is the native KMS version of drm_handle_vblank(). + * + * Returns: + * True if the event was successfully handled, false on failure. + */ +bool drm_crtc_handle_vblank(struct drm_crtc *crtc) +{ + return drm_handle_vblank(crtc->dev, drm_crtc_index(crtc)); +} +EXPORT_SYMBOL(drm_crtc_handle_vblank); diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index 121470a83d1a..1bcbe07cecfc 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -645,18 +645,6 @@ static int exynos_drm_init(void) if (!is_exynos) return -ENODEV; - /* - * Register device object only in case of Exynos SoC. - * - * Below codes resolves temporarily infinite loop issue incurred - * by Exynos drm driver when using multi-platform kernel. - * So these codes will be replaced with more generic way later. - */ - if (!of_machine_is_compatible("samsung,exynos3") && - !of_machine_is_compatible("samsung,exynos4") && - !of_machine_is_compatible("samsung,exynos5")) - return -ENODEV; - exynos_drm_pdev = platform_device_register_simple("exynos-drm", -1, NULL, 0); if (IS_ERR(exynos_drm_pdev)) diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index 5765a161abdd..98051e8e855a 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -1669,7 +1669,6 @@ static void hdmi_mode_apply(struct hdmi_context *hdata) static void hdmiphy_conf_reset(struct hdmi_context *hdata) { - u8 buffer[2]; u32 reg; clk_disable_unprepare(hdata->res.sclk_hdmi); @@ -1677,11 +1676,8 @@ static void hdmiphy_conf_reset(struct hdmi_context *hdata) clk_prepare_enable(hdata->res.sclk_hdmi); /* operation mode */ - buffer[0] = 0x1f; - buffer[1] = 0x00; - - if (hdata->hdmiphy_port) - i2c_master_send(hdata->hdmiphy_port, buffer, 2); + hdmiphy_reg_writeb(hdata, HDMIPHY_MODE_SET_DONE, + HDMI_PHY_ENABLE_MODE_SET); if (hdata->type == HDMI_TYPE13) reg = HDMI_V13_PHY_RSTOUT; diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 820b76234ef4..064ed6597def 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -1026,6 +1026,7 @@ static void mixer_win_disable(struct exynos_drm_manager *mgr, int zpos) static void mixer_wait_for_vblank(struct exynos_drm_manager *mgr) { struct mixer_context *mixer_ctx = mgr_to_mixer(mgr); + int err; mutex_lock(&mixer_ctx->mixer_mutex); if (!mixer_ctx->powered) { @@ -1034,7 +1035,11 @@ static void mixer_wait_for_vblank(struct exynos_drm_manager *mgr) } mutex_unlock(&mixer_ctx->mixer_mutex); - drm_vblank_get(mgr->crtc->dev, mixer_ctx->pipe); + err = drm_vblank_get(mgr->crtc->dev, mixer_ctx->pipe); + if (err < 0) { + DRM_DEBUG_KMS("failed to acquire vblank counter\n"); + return; + } atomic_set(&mixer_ctx->wait_vsync_event, 1); @@ -1262,8 +1267,6 @@ static int mixer_bind(struct device *dev, struct device *manager, void *data) return ret; } - pm_runtime_enable(dev); - return 0; } @@ -1272,8 +1275,6 @@ static void mixer_unbind(struct device *dev, struct device *master, void *data) struct mixer_context *ctx = dev_get_drvdata(dev); mixer_mgr_remove(&ctx->manager); - - pm_runtime_disable(dev); } static const struct component_ops mixer_component_ops = { diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index f990ab4c3efb..574057cd1d09 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -811,6 +811,8 @@ int i915_reset(struct drm_device *dev) if (!i915.reset) return 0; + intel_reset_gt_powersave(dev); + mutex_lock(&dev->struct_mutex); i915_gem_reset(dev); @@ -880,7 +882,7 @@ int i915_reset(struct drm_device *dev) * of re-init after reset. */ if (INTEL_INFO(dev)->gen > 5) - intel_reset_gt_powersave(dev); + intel_enable_gt_powersave(dev); } else { mutex_unlock(&dev->struct_mutex); } @@ -1584,7 +1586,7 @@ static struct drm_driver driver = { .gem_prime_import = i915_gem_prime_import, .dumb_create = i915_gem_dumb_create, - .dumb_map_offset = i915_gem_dumb_map_offset, + .dumb_map_offset = i915_gem_mmap_gtt, .dumb_destroy = drm_gem_dumb_destroy, .ioctls = i915_ioctls, .fops = &i915_driver_fops, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 63bcda5541ec..e9f891c432f8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1756,8 +1756,6 @@ struct drm_i915_private { */ struct workqueue_struct *dp_wq; - uint32_t bios_vgacntr; - /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ struct { int (*do_execbuf)(struct drm_device *dev, struct drm_file *file, @@ -2501,9 +2499,8 @@ void i915_vma_move_to_active(struct i915_vma *vma, int i915_gem_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args); -int i915_gem_dumb_map_offset(struct drm_file *file_priv, - struct drm_device *dev, uint32_t handle, - uint64_t *offset); +int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev, + uint32_t handle, uint64_t *offset); /** * Returns true if seq1 is later than seq2. */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4a9faea626db..76354d3ba925 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -401,7 +401,6 @@ static int i915_gem_create(struct drm_file *file, struct drm_device *dev, uint64_t size, - bool dumb, uint32_t *handle_p) { struct drm_i915_gem_object *obj; @@ -417,7 +416,6 @@ i915_gem_create(struct drm_file *file, if (obj == NULL) return -ENOMEM; - obj->base.dumb = dumb; ret = drm_gem_handle_create(file, &obj->base, &handle); /* drop reference from allocate - handle holds it now */ drm_gem_object_unreference_unlocked(&obj->base); @@ -437,7 +435,7 @@ i915_gem_dumb_create(struct drm_file *file, args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); args->size = args->pitch * args->height; return i915_gem_create(file, dev, - args->size, true, &args->handle); + args->size, &args->handle); } /** @@ -450,7 +448,7 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_create *args = data; return i915_gem_create(file, dev, - args->size, false, &args->handle); + args->size, &args->handle); } static inline int @@ -1050,6 +1048,7 @@ int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_pwrite *args = data; struct drm_i915_gem_object *obj; int ret; @@ -1069,9 +1068,11 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, return -EFAULT; } + intel_runtime_pm_get(dev_priv); + ret = i915_mutex_lock_interruptible(dev); if (ret) - return ret; + goto put_rpm; obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); if (&obj->base == NULL) { @@ -1123,6 +1124,9 @@ out: drm_gem_object_unreference(&obj->base); unlock: mutex_unlock(&dev->struct_mutex); +put_rpm: + intel_runtime_pm_put(dev_priv); + return ret; } @@ -1840,10 +1844,10 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) drm_gem_free_mmap_offset(&obj->base); } -static int +int i915_gem_mmap_gtt(struct drm_file *file, struct drm_device *dev, - uint32_t handle, bool dumb, + uint32_t handle, uint64_t *offset) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -1860,13 +1864,6 @@ i915_gem_mmap_gtt(struct drm_file *file, goto unlock; } - /* - * We don't allow dumb mmaps on objects created using another - * interface. - */ - WARN_ONCE(dumb && !(obj->base.dumb || obj->base.import_attach), - "Illegal dumb map of accelerated buffer.\n"); - if (obj->base.size > dev_priv->gtt.mappable_end) { ret = -E2BIG; goto out; @@ -1891,15 +1888,6 @@ unlock: return ret; } -int -i915_gem_dumb_map_offset(struct drm_file *file, - struct drm_device *dev, - uint32_t handle, - uint64_t *offset) -{ - return i915_gem_mmap_gtt(file, dev, handle, true, offset); -} - /** * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing * @dev: DRM device @@ -1921,7 +1909,7 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_mmap_gtt *args = data; - return i915_gem_mmap_gtt(file, dev, args->handle, false, &args->offset); + return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); } static inline int @@ -5167,7 +5155,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) if (!mutex_is_locked(mutex)) return false; -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) +#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES) return mutex->owner == task; #else /* Since UP may be pre-empted, we cannot assume that we own the lock */ diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index d17ff435f276..d011ec82ef1e 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -473,7 +473,12 @@ mi_set_context(struct intel_engine_cs *ring, u32 hw_flags) { u32 flags = hw_flags | MI_MM_SPACE_GTT; - int ret; + const int num_rings = + /* Use an extended w/a on ivb+ if signalling from other rings */ + i915_semaphore_is_enabled(ring->dev) ? + hweight32(INTEL_INFO(ring->dev)->ring_mask) - 1 : + 0; + int len, i, ret; /* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value @@ -490,15 +495,31 @@ mi_set_context(struct intel_engine_cs *ring, if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8) flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN); - ret = intel_ring_begin(ring, 6); + + len = 4; + if (INTEL_INFO(ring->dev)->gen >= 7) + len += 2 + (num_rings ? 4*num_rings + 2 : 0); + + ret = intel_ring_begin(ring, len); if (ret) return ret; /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ - if (INTEL_INFO(ring->dev)->gen >= 7) + if (INTEL_INFO(ring->dev)->gen >= 7) { intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE); - else - intel_ring_emit(ring, MI_NOOP); + if (num_rings) { + struct intel_engine_cs *signaller; + + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings)); + for_each_ring(signaller, to_i915(ring->dev), i) { + if (signaller == ring) + continue; + + intel_ring_emit(ring, RING_PSMI_CTL(signaller->mmio_base)); + intel_ring_emit(ring, _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); + } + } + } intel_ring_emit(ring, MI_NOOP); intel_ring_emit(ring, MI_SET_CONTEXT); @@ -510,10 +531,21 @@ mi_set_context(struct intel_engine_cs *ring, */ intel_ring_emit(ring, MI_NOOP); - if (INTEL_INFO(ring->dev)->gen >= 7) + if (INTEL_INFO(ring->dev)->gen >= 7) { + if (num_rings) { + struct intel_engine_cs *signaller; + + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings)); + for_each_ring(signaller, to_i915(ring->dev), i) { + if (signaller == ring) + continue; + + intel_ring_emit(ring, RING_PSMI_CTL(signaller->mmio_base)); + intel_ring_emit(ring, _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); + } + } intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE); - else - intel_ring_emit(ring, MI_NOOP); + } intel_ring_advance(ring); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index f06027ba3ee5..11738316394a 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -121,9 +121,6 @@ eb_lookup_vmas(struct eb_vmas *eb, goto err; } - WARN_ONCE(obj->base.dumb, - "GPU use of dumb buffer is illegal.\n"); - drm_gem_object_reference(&obj->base); list_add_tail(&obj->obj_exec_link, &objects); } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 981834b0f9b6..b051a238baf9 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -281,13 +281,34 @@ void gen6_enable_rps_interrupts(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; spin_lock_irq(&dev_priv->irq_lock); + WARN_ON(dev_priv->rps.pm_iir); WARN_ON(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events); dev_priv->rps.interrupts_enabled = true; + I915_WRITE(gen6_pm_ier(dev_priv), I915_READ(gen6_pm_ier(dev_priv)) | + dev_priv->pm_rps_events); gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); + spin_unlock_irq(&dev_priv->irq_lock); } +u32 gen6_sanitize_rps_pm_mask(struct drm_i915_private *dev_priv, u32 mask) +{ + /* + * SNB,IVB can while VLV,CHV may hard hang on looping batchbuffer + * if GEN6_PM_UP_EI_EXPIRED is masked. + * + * TODO: verify if this can be reproduced on VLV,CHV. + */ + if (INTEL_INFO(dev_priv)->gen <= 7 && !IS_HASWELL(dev_priv)) + mask &= ~GEN6_PM_RP_UP_EI_EXPIRED; + + if (INTEL_INFO(dev_priv)->gen >= 8) + mask &= ~GEN8_PMINTR_REDIRECT_TO_NON_DISP; + + return mask; +} + void gen6_disable_rps_interrupts(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -300,8 +321,7 @@ void gen6_disable_rps_interrupts(struct drm_device *dev) spin_lock_irq(&dev_priv->irq_lock); - I915_WRITE(GEN6_PMINTRMSK, INTEL_INFO(dev_priv)->gen >= 8 ? - ~GEN8_PMINTR_REDIRECT_TO_NON_DISP : ~0); + I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0)); __gen6_disable_pm_irq(dev_priv, dev_priv->pm_rps_events); I915_WRITE(gen6_pm_ier(dev_priv), I915_READ(gen6_pm_ier(dev_priv)) & @@ -3307,8 +3327,10 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev) GEN5_IRQ_INIT(GT, dev_priv->gt_irq_mask, gt_irqs); if (INTEL_INFO(dev)->gen >= 6) { - pm_irqs |= dev_priv->pm_rps_events; - + /* + * RPS interrupts will get enabled/disabled on demand when RPS + * itself is enabled/disabled. + */ if (HAS_VEBOX(dev)) pm_irqs |= PM_VEBOX_USER_INTERRUPT; @@ -3520,7 +3542,11 @@ static void gen8_gt_irq_postinstall(struct drm_i915_private *dev_priv) dev_priv->pm_irq_mask = 0xffffffff; GEN8_IRQ_INIT_NDX(GT, 0, ~gt_interrupts[0], gt_interrupts[0]); GEN8_IRQ_INIT_NDX(GT, 1, ~gt_interrupts[1], gt_interrupts[1]); - GEN8_IRQ_INIT_NDX(GT, 2, dev_priv->pm_irq_mask, dev_priv->pm_rps_events); + /* + * RPS interrupts will get enabled/disabled on demand when RPS itself + * is enabled/disabled. + */ + GEN8_IRQ_INIT_NDX(GT, 2, dev_priv->pm_irq_mask, 0); GEN8_IRQ_INIT_NDX(GT, 3, ~gt_interrupts[3], gt_interrupts[3]); } @@ -3609,7 +3635,7 @@ static void vlv_display_irq_uninstall(struct drm_i915_private *dev_priv) vlv_display_irq_reset(dev_priv); - dev_priv->irq_mask = 0; + dev_priv->irq_mask = ~0; } static void valleyview_irq_uninstall(struct drm_device *dev) @@ -3715,8 +3741,6 @@ static bool i8xx_handle_vblank(struct drm_device *dev, if ((iir & flip_pending) == 0) goto check_page_flip; - intel_prepare_page_flip(dev, plane); - /* We detect FlipDone by looking for the change in PendingFlip from '1' * to '0' on the following vblank, i.e. IIR has the Pendingflip * asserted following the MI_DISPLAY_FLIP, but ISR is deasserted, hence @@ -3726,6 +3750,7 @@ static bool i8xx_handle_vblank(struct drm_device *dev, if (I915_READ16(ISR) & flip_pending) goto check_page_flip; + intel_prepare_page_flip(dev, plane); intel_finish_page_flip(dev, pipe); return true; @@ -3897,8 +3922,6 @@ static bool i915_handle_vblank(struct drm_device *dev, if ((iir & flip_pending) == 0) goto check_page_flip; - intel_prepare_page_flip(dev, plane); - /* We detect FlipDone by looking for the change in PendingFlip from '1' * to '0' on the following vblank, i.e. IIR has the Pendingflip * asserted following the MI_DISPLAY_FLIP, but ISR is deasserted, hence @@ -3908,6 +3931,7 @@ static bool i915_handle_vblank(struct drm_device *dev, if (I915_READ(ISR) & flip_pending) goto check_page_flip; + intel_prepare_page_flip(dev, plane); intel_finish_page_flip(dev, pipe); return true; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index eefdc238f70b..172de3b3433b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -395,6 +395,7 @@ #define PIPE_CONTROL_STORE_DATA_INDEX (1<<21) #define PIPE_CONTROL_CS_STALL (1<<20) #define PIPE_CONTROL_TLB_INVALIDATE (1<<18) +#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16) #define PIPE_CONTROL_QW_WRITE (1<<14) #define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14) #define PIPE_CONTROL_DEPTH_STALL (1<<13) @@ -1128,6 +1129,7 @@ enum punit_power_well { #define GEN6_VERSYNC (RING_SYNC_1(VEBOX_RING_BASE)) #define GEN6_VEVSYNC (RING_SYNC_2(VEBOX_RING_BASE)) #define GEN6_NOSYNC 0 +#define RING_PSMI_CTL(base) ((base)+0x50) #define RING_MAX_IDLE(base) ((base)+0x54) #define RING_HWS_PGA(base) ((base)+0x80) #define RING_HWS_PGA_GEN6(base) ((base)+0x2080) @@ -1458,6 +1460,7 @@ enum punit_power_well { #define GEN6_BLITTER_FBC_NOTIFY (1<<3) #define GEN6_RC_SLEEP_PSMI_CONTROL 0x2050 +#define GEN6_PSMI_SLEEP_MSG_DISABLE (1 << 0) #define GEN8_RC_SEMA_IDLE_MSG_DISABLE (1 << 12) #define GEN8_FF_DOP_CLOCK_GATE_DISABLE (1<<10) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index fb3e3d429191..e7a16f119a29 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9815,7 +9815,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, if (obj->tiling_mode != work->old_fb_obj->tiling_mode) /* vlv: DISPLAY_FLIP fails to change tiling */ ring = NULL; - } else if (IS_IVYBRIDGE(dev)) { + } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) { ring = &dev_priv->ring[BCS]; } else if (INTEL_INFO(dev)->gen >= 7) { ring = obj->ring; @@ -13057,11 +13057,7 @@ static void i915_disable_vga(struct drm_device *dev) vga_put(dev->pdev, VGA_RSRC_LEGACY_IO); udelay(300); - /* - * Fujitsu-Siemens Lifebook S6010 (830) has problems resuming - * from S3 without preserving (some of?) the other bits. - */ - I915_WRITE(vga_reg, dev_priv->bios_vgacntr | VGA_DISP_DISABLE); + I915_WRITE(vga_reg, VGA_DISP_DISABLE); POSTING_READ(vga_reg); } @@ -13146,8 +13142,6 @@ void intel_modeset_init(struct drm_device *dev) intel_shared_dpll_init(dev); - /* save the BIOS value before clobbering it */ - dev_priv->bios_vgacntr = I915_READ(i915_vgacntrl_reg(dev)); /* Just disable it once at startup */ i915_disable_vga(dev); intel_setup_outputs(dev); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 25fdbb16d4e0..3b40a17b8852 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -794,6 +794,7 @@ void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask); void gen6_reset_rps_interrupts(struct drm_device *dev); void gen6_enable_rps_interrupts(struct drm_device *dev); void gen6_disable_rps_interrupts(struct drm_device *dev); +u32 gen6_sanitize_rps_pm_mask(struct drm_i915_private *dev_priv, u32 mask); void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv); void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv); static inline bool intel_irqs_enabled(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 1f4b56e273c8..bf814a64582a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4363,16 +4363,7 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) mask |= dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED); mask &= dev_priv->pm_rps_events; - /* IVB and SNB hard hangs on looping batchbuffer - * if GEN6_PM_UP_EI_EXPIRED is masked. - */ - if (INTEL_INFO(dev_priv->dev)->gen <= 7 && !IS_HASWELL(dev_priv->dev)) - mask |= GEN6_PM_RP_UP_EI_EXPIRED; - - if (IS_GEN8(dev_priv->dev)) - mask |= GEN8_PMINTR_REDIRECT_TO_NON_DISP; - - return ~mask; + return gen6_sanitize_rps_pm_mask(dev_priv, ~mask); } /* gen6_set_rps is called to update the frequency request, but should also be @@ -4441,7 +4432,8 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) return; /* Mask turbo interrupt so that they will not come in between */ - I915_WRITE(GEN6_PMINTRMSK, 0xffffffff); + I915_WRITE(GEN6_PMINTRMSK, + gen6_sanitize_rps_pm_mask(dev_priv, ~0)); vlv_force_gfx_clock(dev_priv, true); @@ -6191,6 +6183,20 @@ void intel_cleanup_gt_powersave(struct drm_device *dev) valleyview_cleanup_gt_powersave(dev); } +static void gen6_suspend_rps(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + flush_delayed_work(&dev_priv->rps.delayed_resume_work); + + /* + * TODO: disable RPS interrupts on GEN9+ too once RPS support + * is added for it. + */ + if (INTEL_INFO(dev)->gen < 9) + gen6_disable_rps_interrupts(dev); +} + /** * intel_suspend_gt_powersave - suspend PM work and helper threads * @dev: drm device @@ -6206,14 +6212,7 @@ void intel_suspend_gt_powersave(struct drm_device *dev) if (INTEL_INFO(dev)->gen < 6) return; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - - /* - * TODO: disable RPS interrupts on GEN9+ too once RPS support - * is added for it. - */ - if (INTEL_INFO(dev)->gen < 9) - gen6_disable_rps_interrupts(dev); + gen6_suspend_rps(dev); /* Force GPU to min freq during suspend */ gen6_rps_idle(dev_priv); @@ -6316,8 +6315,11 @@ void intel_reset_gt_powersave(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + if (INTEL_INFO(dev)->gen < 6) + return; + + gen6_suspend_rps(dev); dev_priv->rps.enabled = false; - intel_enable_gt_powersave(dev); } static void ibx_init_clock_gating(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 9f445e9a75d1..c7bc93d28d84 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -362,12 +362,15 @@ gen7_render_ring_flush(struct intel_engine_cs *ring, flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR; /* * TLB invalidate requires a post-sync write. */ flags |= PIPE_CONTROL_QW_WRITE; flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; + /* Workaround: we must issue a pipe_control with CS-stall bit * set before a pipe_control command that has the state cache * invalidate bit set. */ diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index f5a78d53e297..ac6da7102fbb 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -615,29 +615,6 @@ static void chv_pipe_power_well_disable(struct drm_i915_private *dev_priv, vlv_power_sequencer_reset(dev_priv); } -static void check_power_well_state(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - bool enabled = power_well->ops->is_enabled(dev_priv, power_well); - - if (power_well->always_on || !i915.disable_power_well) { - if (!enabled) - goto mismatch; - - return; - } - - if (enabled != (power_well->count > 0)) - goto mismatch; - - return; - -mismatch: - WARN(1, "state mismatch for '%s' (always_on %d hw state %d use-count %d disable_power_well %d\n", - power_well->name, power_well->always_on, enabled, - power_well->count, i915.disable_power_well); -} - /** * intel_display_power_get - grab a power domain reference * @dev_priv: i915 device instance @@ -669,8 +646,6 @@ void intel_display_power_get(struct drm_i915_private *dev_priv, power_well->ops->enable(dev_priv, power_well); power_well->hw_enabled = true; } - - check_power_well_state(dev_priv, power_well); } power_domains->domain_use_count[domain]++; @@ -709,8 +684,6 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, power_well->hw_enabled = false; power_well->ops->disable(dev_priv, power_well); } - - check_power_well_state(dev_priv, power_well); } mutex_unlock(&power_domains->lock); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index aa873048308b..94a5bee69fe7 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -386,9 +386,7 @@ void adreno_gpu_cleanup(struct adreno_gpu *gpu) msm_gem_put_iova(gpu->memptrs_bo, gpu->base.id); drm_gem_object_unreference(gpu->memptrs_bo); } - if (gpu->pm4) - release_firmware(gpu->pm4); - if (gpu->pfp) - release_firmware(gpu->pfp); + release_firmware(gpu->pm4); + release_firmware(gpu->pfp); msm_gpu_cleanup(&gpu->base); } diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c index fbebb0405d76..b4e70e0e3cfa 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c @@ -141,6 +141,15 @@ static int hpd_enable(struct hdmi_connector *hdmi_connector) uint32_t hpd_ctrl; int i, ret; + for (i = 0; i < config->hpd_reg_cnt; i++) { + ret = regulator_enable(hdmi->hpd_regs[i]); + if (ret) { + dev_err(dev->dev, "failed to enable hpd regulator: %s (%d)\n", + config->hpd_reg_names[i], ret); + goto fail; + } + } + ret = gpio_config(hdmi, true); if (ret) { dev_err(dev->dev, "failed to configure GPIOs: %d\n", ret); @@ -164,15 +173,6 @@ static int hpd_enable(struct hdmi_connector *hdmi_connector) } } - for (i = 0; i < config->hpd_reg_cnt; i++) { - ret = regulator_enable(hdmi->hpd_regs[i]); - if (ret) { - dev_err(dev->dev, "failed to enable hpd regulator: %s (%d)\n", - config->hpd_reg_names[i], ret); - goto fail; - } - } - hdmi_set_mode(hdmi, false); phy->funcs->reset(phy); hdmi_set_mode(hdmi, true); @@ -200,7 +200,7 @@ fail: return ret; } -static int hdp_disable(struct hdmi_connector *hdmi_connector) +static void hdp_disable(struct hdmi_connector *hdmi_connector) { struct hdmi *hdmi = hdmi_connector->hdmi; const struct hdmi_platform_config *config = hdmi->config; @@ -212,28 +212,19 @@ static int hdp_disable(struct hdmi_connector *hdmi_connector) hdmi_set_mode(hdmi, false); - for (i = 0; i < config->hpd_reg_cnt; i++) { - ret = regulator_disable(hdmi->hpd_regs[i]); - if (ret) { - dev_err(dev->dev, "failed to disable hpd regulator: %s (%d)\n", - config->hpd_reg_names[i], ret); - goto fail; - } - } - for (i = 0; i < config->hpd_clk_cnt; i++) clk_disable_unprepare(hdmi->hpd_clks[i]); ret = gpio_config(hdmi, false); - if (ret) { - dev_err(dev->dev, "failed to unconfigure GPIOs: %d\n", ret); - goto fail; - } - - return 0; + if (ret) + dev_warn(dev->dev, "failed to unconfigure GPIOs: %d\n", ret); -fail: - return ret; + for (i = 0; i < config->hpd_reg_cnt; i++) { + ret = regulator_disable(hdmi->hpd_regs[i]); + if (ret) + dev_warn(dev->dev, "failed to disable hpd regulator: %s (%d)\n", + config->hpd_reg_names[i], ret); + } } static void @@ -260,11 +251,11 @@ void hdmi_connector_irq(struct drm_connector *connector) (hpd_int_status & HDMI_HPD_INT_STATUS_INT)) { bool detected = !!(hpd_int_status & HDMI_HPD_INT_STATUS_CABLE_DETECTED); - DBG("status=%04x, ctrl=%04x", hpd_int_status, hpd_int_ctrl); - - /* ack the irq: */ + /* ack & disable (temporarily) HPD events: */ hdmi_write(hdmi, REG_HDMI_HPD_INT_CTRL, - hpd_int_ctrl | HDMI_HPD_INT_CTRL_INT_ACK); + HDMI_HPD_INT_CTRL_INT_ACK); + + DBG("status=%04x, ctrl=%04x", hpd_int_status, hpd_int_ctrl); /* detect disconnect if we are connected or visa versa: */ hpd_int_ctrl = HDMI_HPD_INT_CTRL_INT_EN; diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c index a7672e100d8b..3449213f1e76 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c @@ -331,17 +331,8 @@ static int mdp4_crtc_atomic_check(struct drm_crtc *crtc, struct drm_crtc_state *state) { struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); - struct drm_device *dev = crtc->dev; - DBG("%s: check", mdp4_crtc->name); - - if (mdp4_crtc->event) { - dev_err(dev->dev, "already pending flip!\n"); - return -EBUSY; - } - // TODO anything else to check? - return 0; } @@ -357,7 +348,7 @@ static void mdp4_crtc_atomic_flush(struct drm_crtc *crtc) struct drm_device *dev = crtc->dev; unsigned long flags; - DBG("%s: flush", mdp4_crtc->name); + DBG("%s: event: %p", mdp4_crtc->name, crtc->state->event); WARN_ON(mdp4_crtc->event); diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c index 0e9a2e3a82d7..f021f960a8a2 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c @@ -303,11 +303,6 @@ static int mdp5_crtc_atomic_check(struct drm_crtc *crtc, DBG("%s: check", mdp5_crtc->name); - if (mdp5_crtc->event) { - dev_err(dev->dev, "already pending flip!\n"); - return -EBUSY; - } - /* request a free CTL, if none is already allocated for this CRTC */ if (state->enable && !mdp5_crtc->ctl) { mdp5_crtc->ctl = mdp5_ctlm_request(mdp5_kms->ctlm, crtc); @@ -364,7 +359,7 @@ static void mdp5_crtc_atomic_flush(struct drm_crtc *crtc) struct drm_device *dev = crtc->dev; unsigned long flags; - DBG("%s: flush", mdp5_crtc->name); + DBG("%s: event: %p", mdp5_crtc->name, crtc->state->event); WARN_ON(mdp5_crtc->event); @@ -460,10 +455,7 @@ void mdp5_crtc_set_intf(struct drm_crtc *crtc, int intf, /* now that we know what irq's we want: */ mdp5_crtc->err.irqmask = intf2err(intf); mdp5_crtc->vblank.irqmask = intf2vblank(intf); - - /* when called from modeset_init(), skip the rest until later: */ - if (!mdp5_kms) - return; + mdp_irq_update(&mdp5_kms->base); spin_lock_irqsave(&mdp5_kms->resource_lock, flags); intf_sel = mdp5_read(mdp5_kms, REG_MDP5_DISP_INTF_SEL); diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c index a11f1b80c488..9f01a4f21af2 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c @@ -216,17 +216,7 @@ static int modeset_init(struct mdp5_kms *mdp5_kms) goto fail; } - /* NOTE: the vsync and error irq's are actually associated with - * the INTF/encoder.. the easiest way to deal with this (ie. what - * we do now) is assume a fixed relationship between crtc's and - * encoders. I'm not sure if there is ever a need to more freely - * assign crtcs to encoders, but if there is then we need to take - * care of error and vblank irq's that the crtc has registered, - * and also update user-requested vblank_mask. - */ - encoder->possible_crtcs = BIT(0); - mdp5_crtc_set_intf(priv->crtcs[0], 3, INTF_HDMI); - + encoder->possible_crtcs = (1 << priv->num_crtcs) - 1;; priv->encoders[priv->num_encoders++] = encoder; /* Construct bridge/connector for HDMI: */ diff --git a/drivers/gpu/drm/msm/mdp/mdp_kms.c b/drivers/gpu/drm/msm/mdp/mdp_kms.c index 03455b64a245..2a731722d840 100644 --- a/drivers/gpu/drm/msm/mdp/mdp_kms.c +++ b/drivers/gpu/drm/msm/mdp/mdp_kms.c @@ -42,7 +42,10 @@ static void update_irq(struct mdp_kms *mdp_kms) mdp_kms->funcs->set_irqmask(mdp_kms, irqmask); } -static void update_irq_unlocked(struct mdp_kms *mdp_kms) +/* if an mdp_irq's irqmask has changed, such as when mdp5 crtc<->encoder + * link changes, this must be called to figure out the new global irqmask + */ +void mdp_irq_update(struct mdp_kms *mdp_kms) { unsigned long flags; spin_lock_irqsave(&list_lock, flags); @@ -122,7 +125,7 @@ void mdp_irq_register(struct mdp_kms *mdp_kms, struct mdp_irq *irq) spin_unlock_irqrestore(&list_lock, flags); if (needs_update) - update_irq_unlocked(mdp_kms); + mdp_irq_update(mdp_kms); } void mdp_irq_unregister(struct mdp_kms *mdp_kms, struct mdp_irq *irq) @@ -141,5 +144,5 @@ void mdp_irq_unregister(struct mdp_kms *mdp_kms, struct mdp_irq *irq) spin_unlock_irqrestore(&list_lock, flags); if (needs_update) - update_irq_unlocked(mdp_kms); + mdp_irq_update(mdp_kms); } diff --git a/drivers/gpu/drm/msm/mdp/mdp_kms.h b/drivers/gpu/drm/msm/mdp/mdp_kms.h index 99557b5ad4fd..b268ce95d394 100644 --- a/drivers/gpu/drm/msm/mdp/mdp_kms.h +++ b/drivers/gpu/drm/msm/mdp/mdp_kms.h @@ -75,7 +75,7 @@ void mdp_update_vblank_mask(struct mdp_kms *mdp_kms, uint32_t mask, bool enable) void mdp_irq_wait(struct mdp_kms *mdp_kms, uint32_t irqmask); void mdp_irq_register(struct mdp_kms *mdp_kms, struct mdp_irq *irq); void mdp_irq_unregister(struct mdp_kms *mdp_kms, struct mdp_irq *irq); - +void mdp_irq_update(struct mdp_kms *mdp_kms); /* * pixel format helpers: diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c index f0de412e13dc..191968256c58 100644 --- a/drivers/gpu/drm/msm/msm_atomic.c +++ b/drivers/gpu/drm/msm/msm_atomic.c @@ -23,10 +23,41 @@ struct msm_commit { struct drm_atomic_state *state; uint32_t fence; struct msm_fence_cb fence_cb; + uint32_t crtc_mask; }; static void fence_cb(struct msm_fence_cb *cb); +/* block until specified crtcs are no longer pending update, and + * atomically mark them as pending update + */ +static int start_atomic(struct msm_drm_private *priv, uint32_t crtc_mask) +{ + int ret; + + spin_lock(&priv->pending_crtcs_event.lock); + ret = wait_event_interruptible_locked(priv->pending_crtcs_event, + !(priv->pending_crtcs & crtc_mask)); + if (ret == 0) { + DBG("start: %08x", crtc_mask); + priv->pending_crtcs |= crtc_mask; + } + spin_unlock(&priv->pending_crtcs_event.lock); + + return ret; +} + +/* clear specified crtcs (no longer pending update) + */ +static void end_atomic(struct msm_drm_private *priv, uint32_t crtc_mask) +{ + spin_lock(&priv->pending_crtcs_event.lock); + DBG("end: %08x", crtc_mask); + priv->pending_crtcs &= ~crtc_mask; + wake_up_all_locked(&priv->pending_crtcs_event); + spin_unlock(&priv->pending_crtcs_event.lock); +} + static struct msm_commit *new_commit(struct drm_atomic_state *state) { struct msm_commit *c = kzalloc(sizeof(*c), GFP_KERNEL); @@ -58,12 +89,27 @@ static void complete_commit(struct msm_commit *c) drm_atomic_helper_commit_post_planes(dev, state); + /* NOTE: _wait_for_vblanks() only waits for vblank on + * enabled CRTCs. So we end up faulting when disabling + * due to (potentially) unref'ing the outgoing fb's + * before the vblank when the disable has latched. + * + * But if it did wait on disabled (or newly disabled) + * CRTCs, that would be racy (ie. we could have missed + * the irq. We need some way to poll for pipe shut + * down. Or just live with occasionally hitting the + * timeout in the CRTC disable path (which really should + * not be critical path) + */ + drm_atomic_helper_wait_for_vblanks(dev, state); drm_atomic_helper_cleanup_planes(dev, state); drm_atomic_state_free(state); + end_atomic(dev->dev_private, c->crtc_mask); + kfree(c); } @@ -97,8 +143,9 @@ static void add_fb(struct msm_commit *c, struct drm_framebuffer *fb) int msm_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state, bool async) { - struct msm_commit *c; int nplanes = dev->mode_config.num_total_plane; + int ncrtcs = dev->mode_config.num_crtc; + struct msm_commit *c; int i, ret; ret = drm_atomic_helper_prepare_planes(dev, state); @@ -106,6 +153,18 @@ int msm_atomic_commit(struct drm_device *dev, return ret; c = new_commit(state); + if (!c) + return -ENOMEM; + + /* + * Figure out what crtcs we have: + */ + for (i = 0; i < ncrtcs; i++) { + struct drm_crtc *crtc = state->crtcs[i]; + if (!crtc) + continue; + c->crtc_mask |= (1 << drm_crtc_index(crtc)); + } /* * Figure out what fence to wait for: @@ -122,6 +181,14 @@ int msm_atomic_commit(struct drm_device *dev, } /* + * Wait for pending updates on any of the same crtc's and then + * mark our set of crtc's as busy: + */ + ret = start_atomic(dev->dev_private, c->crtc_mask); + if (ret) + return ret; + + /* * This is the point of no return - everything below never fails except * when the hw goes bonghits. Which means we can commit the new state on * the software side now. diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index c795217e1bfc..9a61546a0b05 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -193,6 +193,7 @@ static int msm_load(struct drm_device *dev, unsigned long flags) priv->wq = alloc_ordered_workqueue("msm", 0); init_waitqueue_head(&priv->fence_event); + init_waitqueue_head(&priv->pending_crtcs_event); INIT_LIST_HEAD(&priv->inactive_list); INIT_LIST_HEAD(&priv->fence_cbs); diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 136303818436..b69ef2d5a26c 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -96,6 +96,10 @@ struct msm_drm_private { /* callbacks deferred until bo is inactive: */ struct list_head fence_cbs; + /* crtcs pending async atomic updates: */ + uint32_t pending_crtcs; + wait_queue_head_t pending_crtcs_event; + /* registered MMUs: */ unsigned int num_mmus; struct msm_mmu *mmus[NUM_DOMAINS]; diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c index 94d55e526b4e..1f3af13ccede 100644 --- a/drivers/gpu/drm/msm/msm_fbdev.c +++ b/drivers/gpu/drm/msm/msm_fbdev.c @@ -190,8 +190,7 @@ fail_unlock: fail: if (ret) { - if (fbi) - framebuffer_release(fbi); + framebuffer_release(fbi); if (fb) { drm_framebuffer_unregister_private(fb); drm_framebuffer_remove(fb); diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 4a6f0e49d5b5..49dea4fb55ac 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -535,8 +535,7 @@ void msm_gem_free_object(struct drm_gem_object *obj) drm_free_large(msm_obj->pages); } else { - if (msm_obj->vaddr) - vunmap(msm_obj->vaddr); + vunmap(msm_obj->vaddr); put_pages(obj); } diff --git a/drivers/gpu/drm/nouveau/core/core/event.c b/drivers/gpu/drm/nouveau/core/core/event.c index ff2b434b3db4..760947e380c9 100644 --- a/drivers/gpu/drm/nouveau/core/core/event.c +++ b/drivers/gpu/drm/nouveau/core/core/event.c @@ -26,7 +26,7 @@ void nvkm_event_put(struct nvkm_event *event, u32 types, int index) { - BUG_ON(!spin_is_locked(&event->refs_lock)); + assert_spin_locked(&event->refs_lock); while (types) { int type = __ffs(types); types &= ~(1 << type); if (--event->refs[index * event->types_nr + type] == 0) { @@ -39,7 +39,7 @@ nvkm_event_put(struct nvkm_event *event, u32 types, int index) void nvkm_event_get(struct nvkm_event *event, u32 types, int index) { - BUG_ON(!spin_is_locked(&event->refs_lock)); + assert_spin_locked(&event->refs_lock); while (types) { int type = __ffs(types); types &= ~(1 << type); if (++event->refs[index * event->types_nr + type] == 1) { diff --git a/drivers/gpu/drm/nouveau/core/core/notify.c b/drivers/gpu/drm/nouveau/core/core/notify.c index d1bcde55e9d7..839a32577680 100644 --- a/drivers/gpu/drm/nouveau/core/core/notify.c +++ b/drivers/gpu/drm/nouveau/core/core/notify.c @@ -98,7 +98,7 @@ nvkm_notify_send(struct nvkm_notify *notify, void *data, u32 size) struct nvkm_event *event = notify->event; unsigned long flags; - BUG_ON(!spin_is_locked(&event->list_lock)); + assert_spin_locked(&event->list_lock); BUG_ON(size != notify->size); spin_lock_irqsave(&event->refs_lock, flags); diff --git a/drivers/gpu/drm/nouveau/core/engine/device/nve0.c b/drivers/gpu/drm/nouveau/core/engine/device/nve0.c index 674da1f095b2..732922690653 100644 --- a/drivers/gpu/drm/nouveau/core/engine/device/nve0.c +++ b/drivers/gpu/drm/nouveau/core/engine/device/nve0.c @@ -249,6 +249,39 @@ nve0_identify(struct nouveau_device *device) device->oclass[NVDEV_ENGINE_PPP ] = &nvc0_ppp_oclass; device->oclass[NVDEV_ENGINE_PERFMON] = &nvf0_perfmon_oclass; break; + case 0x106: + device->cname = "GK208B"; + device->oclass[NVDEV_SUBDEV_VBIOS ] = &nouveau_bios_oclass; + device->oclass[NVDEV_SUBDEV_GPIO ] = nve0_gpio_oclass; + device->oclass[NVDEV_SUBDEV_I2C ] = nve0_i2c_oclass; + device->oclass[NVDEV_SUBDEV_FUSE ] = &gf100_fuse_oclass; + device->oclass[NVDEV_SUBDEV_CLOCK ] = &nve0_clock_oclass; + device->oclass[NVDEV_SUBDEV_THERM ] = &nvd0_therm_oclass; + device->oclass[NVDEV_SUBDEV_MXM ] = &nv50_mxm_oclass; + device->oclass[NVDEV_SUBDEV_DEVINIT] = nvc0_devinit_oclass; + device->oclass[NVDEV_SUBDEV_MC ] = gk20a_mc_oclass; + device->oclass[NVDEV_SUBDEV_BUS ] = nvc0_bus_oclass; + device->oclass[NVDEV_SUBDEV_TIMER ] = &nv04_timer_oclass; + device->oclass[NVDEV_SUBDEV_FB ] = nve0_fb_oclass; + device->oclass[NVDEV_SUBDEV_LTC ] = gk104_ltc_oclass; + device->oclass[NVDEV_SUBDEV_IBUS ] = &nve0_ibus_oclass; + device->oclass[NVDEV_SUBDEV_INSTMEM] = nv50_instmem_oclass; + device->oclass[NVDEV_SUBDEV_VM ] = &nvc0_vmmgr_oclass; + device->oclass[NVDEV_SUBDEV_BAR ] = &nvc0_bar_oclass; + device->oclass[NVDEV_SUBDEV_PWR ] = nv108_pwr_oclass; + device->oclass[NVDEV_SUBDEV_VOLT ] = &nv40_volt_oclass; + device->oclass[NVDEV_ENGINE_DMAOBJ ] = nvd0_dmaeng_oclass; + device->oclass[NVDEV_ENGINE_FIFO ] = nv108_fifo_oclass; + device->oclass[NVDEV_ENGINE_SW ] = nvc0_software_oclass; + device->oclass[NVDEV_ENGINE_GR ] = nv108_graph_oclass; + device->oclass[NVDEV_ENGINE_DISP ] = nvf0_disp_oclass; + device->oclass[NVDEV_ENGINE_COPY0 ] = &nve0_copy0_oclass; + device->oclass[NVDEV_ENGINE_COPY1 ] = &nve0_copy1_oclass; + device->oclass[NVDEV_ENGINE_COPY2 ] = &nve0_copy2_oclass; + device->oclass[NVDEV_ENGINE_BSP ] = &nve0_bsp_oclass; + device->oclass[NVDEV_ENGINE_VP ] = &nve0_vp_oclass; + device->oclass[NVDEV_ENGINE_PPP ] = &nvc0_ppp_oclass; + break; case 0x108: device->cname = "GK208"; device->oclass[NVDEV_SUBDEV_VBIOS ] = &nouveau_bios_oclass; diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/shadowramin.c b/drivers/gpu/drm/nouveau/core/subdev/bios/shadowramin.c index 5e58bba0dd5c..a7a890fad1e5 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/bios/shadowramin.c +++ b/drivers/gpu/drm/nouveau/core/subdev/bios/shadowramin.c @@ -44,8 +44,10 @@ static void pramin_fini(void *data) { struct priv *priv = data; - nv_wr32(priv->bios, 0x001700, priv->bar0); - kfree(priv); + if (priv) { + nv_wr32(priv->bios, 0x001700, priv->bar0); + kfree(priv); + } } static void * diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvaa.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvaa.c index 00f2ca7e44a5..033a8e999497 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvaa.c +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvaa.c @@ -24,34 +24,71 @@ #include "nv50.h" +struct nvaa_ram_priv { + struct nouveau_ram base; + u64 poller_base; +}; + static int nvaa_ram_ctor(struct nouveau_object *parent, struct nouveau_object *engine, struct nouveau_oclass *oclass, void *data, u32 datasize, struct nouveau_object **pobject) { - const u32 rsvd_head = ( 256 * 1024) >> 12; /* vga memory */ - const u32 rsvd_tail = (1024 * 1024) >> 12; /* vbios etc */ + u32 rsvd_head = ( 256 * 1024); /* vga memory */ + u32 rsvd_tail = (1024 * 1024); /* vbios etc */ struct nouveau_fb *pfb = nouveau_fb(parent); - struct nouveau_ram *ram; + struct nvaa_ram_priv *priv; int ret; - ret = nouveau_ram_create(parent, engine, oclass, &ram); - *pobject = nv_object(ram); + ret = nouveau_ram_create(parent, engine, oclass, &priv); + *pobject = nv_object(priv); if (ret) return ret; - ram->size = nv_rd32(pfb, 0x10020c); - ram->size = (ram->size & 0xffffff00) | ((ram->size & 0x000000ff) << 32); + priv->base.type = NV_MEM_TYPE_STOLEN; + priv->base.stolen = (u64)nv_rd32(pfb, 0x100e10) << 12; + priv->base.size = (u64)nv_rd32(pfb, 0x100e14) << 12; - ret = nouveau_mm_init(&pfb->vram, rsvd_head, (ram->size >> 12) - - (rsvd_head + rsvd_tail), 1); + rsvd_tail += 0x1000; + priv->poller_base = priv->base.size - rsvd_tail; + + ret = nouveau_mm_init(&pfb->vram, rsvd_head >> 12, + (priv->base.size - (rsvd_head + rsvd_tail)) >> 12, + 1); if (ret) return ret; - ram->type = NV_MEM_TYPE_STOLEN; - ram->stolen = (u64)nv_rd32(pfb, 0x100e10) << 12; - ram->get = nv50_ram_get; - ram->put = nv50_ram_put; + priv->base.get = nv50_ram_get; + priv->base.put = nv50_ram_put; + return 0; +} + +static int +nvaa_ram_init(struct nouveau_object *object) +{ + struct nouveau_fb *pfb = nouveau_fb(object); + struct nvaa_ram_priv *priv = (void *)object; + int ret; + u64 dniso, hostnb, flush; + + ret = nouveau_ram_init(&priv->base); + if (ret) + return ret; + + dniso = ((priv->base.size - (priv->poller_base + 0x00)) >> 5) - 1; + hostnb = ((priv->base.size - (priv->poller_base + 0x20)) >> 5) - 1; + flush = ((priv->base.size - (priv->poller_base + 0x40)) >> 5) - 1; + + /* Enable NISO poller for various clients and set their associated + * read address, only for MCP77/78 and MCP79/7A. (fd#25701) + */ + nv_wr32(pfb, 0x100c18, dniso); + nv_mask(pfb, 0x100c14, 0x00000000, 0x00000001); + nv_wr32(pfb, 0x100c1c, hostnb); + nv_mask(pfb, 0x100c14, 0x00000000, 0x00000002); + nv_wr32(pfb, 0x100c24, flush); + nv_mask(pfb, 0x100c14, 0x00000000, 0x00010000); + return 0; } @@ -60,7 +97,7 @@ nvaa_ram_oclass = { .ofuncs = &(struct nouveau_ofuncs) { .ctor = nvaa_ram_ctor, .dtor = _nouveau_ram_dtor, - .init = _nouveau_ram_init, + .init = nvaa_ram_init, .fini = _nouveau_ram_fini, }, }; diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c index a75c35ccf25c..165401c4045c 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c +++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c @@ -24,13 +24,6 @@ #include "nv04.h" -static void -nv4c_mc_msi_rearm(struct nouveau_mc *pmc) -{ - struct nv04_mc_priv *priv = (void *)pmc; - nv_wr08(priv, 0x088050, 0xff); -} - struct nouveau_oclass * nv4c_mc_oclass = &(struct nouveau_mc_oclass) { .base.handle = NV_SUBDEV(MC, 0x4c), @@ -41,5 +34,4 @@ nv4c_mc_oclass = &(struct nouveau_mc_oclass) { .fini = _nouveau_mc_fini, }, .intr = nv04_mc_intr, - .msi_rearm = nv4c_mc_msi_rearm, }.base; diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 21ec561edc99..bba2960d3dfb 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1572,8 +1572,10 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm) * so use the DMA API for them. */ if (!nv_device_is_cpu_coherent(device) && - ttm->caching_state == tt_uncached) + ttm->caching_state == tt_uncached) { ttm_dma_unpopulate(ttm_dma, dev->dev); + return; + } #if __OS_HAS_AGP if (drm->agp.stat == ENABLED) { diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 5d93902a91ab..f8042433752b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -876,7 +876,6 @@ nouveau_display_dumb_create(struct drm_file *file_priv, struct drm_device *dev, if (ret) return ret; - bo->gem.dumb = true; ret = drm_gem_handle_create(file_priv, &bo->gem, &args->handle); drm_gem_object_unreference_unlocked(&bo->gem); return ret; @@ -892,14 +891,6 @@ nouveau_display_dumb_map_offset(struct drm_file *file_priv, gem = drm_gem_object_lookup(dev, file_priv, handle); if (gem) { struct nouveau_bo *bo = nouveau_gem_object(gem); - - /* - * We don't allow dumb mmaps on objects created using another - * interface. - */ - WARN_ONCE(!(gem->dumb || gem->import_attach), - "Illegal dumb map of accelerated buffer.\n"); - *poffset = drm_vma_node_offset_addr(&bo->bo.vma_node); drm_gem_object_unreference_unlocked(gem); return 0; diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 28d51a22a4bf..bf0f9e21d714 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -36,7 +36,14 @@ void nouveau_gem_object_del(struct drm_gem_object *gem) { struct nouveau_bo *nvbo = nouveau_gem_object(gem); + struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); struct ttm_buffer_object *bo = &nvbo->bo; + struct device *dev = drm->dev->dev; + int ret; + + ret = pm_runtime_get_sync(dev); + if (WARN_ON(ret < 0 && ret != -EACCES)) + return; if (gem->import_attach) drm_prime_gem_destroy(gem, nvbo->bo.sg); @@ -46,6 +53,9 @@ nouveau_gem_object_del(struct drm_gem_object *gem) /* reset filp so nouveau_bo_del_ttm() can test for it */ gem->filp = NULL; ttm_bo_unref(&bo); + + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); } int @@ -53,7 +63,9 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv) { struct nouveau_cli *cli = nouveau_cli(file_priv); struct nouveau_bo *nvbo = nouveau_gem_object(gem); + struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); struct nouveau_vma *vma; + struct device *dev = drm->dev->dev; int ret; if (!cli->vm) @@ -71,11 +83,16 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv) goto out; } + ret = pm_runtime_get_sync(dev); + if (ret < 0 && ret != -EACCES) + goto out; + ret = nouveau_bo_vma_add(nvbo, cli->vm, vma); - if (ret) { + if (ret) kfree(vma); - goto out; - } + + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); } else { vma->refcount++; } @@ -129,6 +146,8 @@ nouveau_gem_object_close(struct drm_gem_object *gem, struct drm_file *file_priv) { struct nouveau_cli *cli = nouveau_cli(file_priv); struct nouveau_bo *nvbo = nouveau_gem_object(gem); + struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); + struct device *dev = drm->dev->dev; struct nouveau_vma *vma; int ret; @@ -141,8 +160,14 @@ nouveau_gem_object_close(struct drm_gem_object *gem, struct drm_file *file_priv) vma = nouveau_bo_vma_find(nvbo, cli->vm); if (vma) { - if (--vma->refcount == 0) - nouveau_gem_object_unmap(nvbo, vma); + if (--vma->refcount == 0) { + ret = pm_runtime_get_sync(dev); + if (!WARN_ON(ret < 0 && ret != -EACCES)) { + nouveau_gem_object_unmap(nvbo, vma); + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + } + } } ttm_bo_unreserve(&nvbo->bo); } @@ -444,9 +469,6 @@ validate_list(struct nouveau_channel *chan, struct nouveau_cli *cli, list_for_each_entry(nvbo, list, entry) { struct drm_nouveau_gem_pushbuf_bo *b = &pbbo[nvbo->pbbo_index]; - WARN_ONCE(nvbo->gem.dumb, - "GPU use of dumb buffer is illegal.\n"); - ret = nouveau_gem_set_domain(&nvbo->gem, b->read_domains, b->write_domains, b->valid_domains); diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c index 753a6def61e7..3d1cfcb96b6b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c @@ -28,6 +28,7 @@ #include "nouveau_ttm.h" #include "nouveau_gem.h" +#include "drm_legacy.h" static int nouveau_vram_manager_init(struct ttm_mem_type_manager *man, unsigned long psize) { @@ -281,7 +282,7 @@ nouveau_ttm_mmap(struct file *filp, struct vm_area_struct *vma) struct nouveau_drm *drm = nouveau_drm(file_priv->minor->dev); if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET)) - return -EINVAL; + return drm_legacy_mmap(filp, vma); return ttm_bo_mmap(filp, vma, &drm->ttm.bdev); } diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index d59ec491dbb9..ed644a4f6f57 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1851,10 +1851,9 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc) return pll; } /* otherwise, pick one of the plls */ - if ((rdev->family == CHIP_KAVERI) || - (rdev->family == CHIP_KABINI) || + if ((rdev->family == CHIP_KABINI) || (rdev->family == CHIP_MULLINS)) { - /* KB/KV/ML has PPLL1 and PPLL2 */ + /* KB/ML has PPLL1 and PPLL2 */ pll_in_use = radeon_get_pll_use_mask(crtc); if (!(pll_in_use & (1 << ATOM_PPLL2))) return ATOM_PPLL2; @@ -1863,7 +1862,7 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc) DRM_ERROR("unable to allocate a PPLL\n"); return ATOM_PPLL_INVALID; } else { - /* CI has PPLL0, PPLL1, and PPLL2 */ + /* CI/KV has PPLL0, PPLL1, and PPLL2 */ pll_in_use = radeon_get_pll_use_mask(crtc); if (!(pll_in_use & (1 << ATOM_PPLL2))) return ATOM_PPLL2; @@ -2155,6 +2154,7 @@ static void atombios_crtc_disable(struct drm_crtc *crtc) case ATOM_PPLL0: /* disable the ppll */ if ((rdev->family == CHIP_ARUBA) || + (rdev->family == CHIP_KAVERI) || (rdev->family == CHIP_BONAIRE) || (rdev->family == CHIP_HAWAII)) atombios_crtc_program_pll(crtc, radeon_crtc->crtc_id, radeon_crtc->pll_id, diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 11ba9d21b89b..db42a670f995 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -492,6 +492,10 @@ int radeon_dp_mode_valid_helper(struct drm_connector *connector, struct radeon_connector_atom_dig *dig_connector; int dp_clock; + if ((mode->clock > 340000) && + (!radeon_connector_is_dp12_capable(connector))) + return MODE_CLOCK_HIGH; + if (!radeon_connector->con_priv) return MODE_CLOCK_HIGH; dig_connector = radeon_connector->con_priv; diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 6dcde3798b45..64fdae558d36 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -6033,6 +6033,17 @@ void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, 0); radeon_ring_write(ring, 1 << vm_id); + /* wait for the invalidate to complete */ + radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ + WAIT_REG_MEM_FUNCTION(0) | /* always */ + WAIT_REG_MEM_ENGINE(0))); /* me */ + radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); /* ref */ + radeon_ring_write(ring, 0); /* mask */ + radeon_ring_write(ring, 0x20); /* poll interval */ + /* compute doesn't have PFP */ if (usepfp) { /* sync PFP to ME, otherwise we might get invalid PFP reads */ diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index dde5c7e29eb2..a0133c74f4cf 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -903,6 +903,9 @@ void cik_sdma_vm_pad_ib(struct radeon_ib *ib) void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, unsigned vm_id, uint64_t pd_addr) { + u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) | + SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */ + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); if (vm_id < 8) { radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2); @@ -943,5 +946,12 @@ void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); radeon_ring_write(ring, 1 << vm_id); + + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); + radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); /* reference */ + radeon_ring_write(ring, 0); /* mask */ + radeon_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */ } diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index ba85986febea..03003f8a6de6 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -2156,4 +2156,6 @@ #define ATC_VM_APERTURE1_HIGH_ADDR 0x330Cu #define ATC_VM_APERTURE1_LOW_ADDR 0x3304u +#define IH_VMID_0_LUT 0x3D40u + #endif diff --git a/drivers/gpu/drm/radeon/dce3_1_afmt.c b/drivers/gpu/drm/radeon/dce3_1_afmt.c index 2fe8cfc966d9..bafdf92a5732 100644 --- a/drivers/gpu/drm/radeon/dce3_1_afmt.c +++ b/drivers/gpu/drm/radeon/dce3_1_afmt.c @@ -103,7 +103,7 @@ static void dce3_2_afmt_write_sad_regs(struct drm_encoder *encoder) } sad_count = drm_edid_to_sad(radeon_connector->edid, &sads); - if (sad_count < 0) { + if (sad_count <= 0) { DRM_ERROR("Couldn't read SADs: %d\n", sad_count); return; } diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c index 9b42001295ba..e3e9c10cfba9 100644 --- a/drivers/gpu/drm/radeon/kv_dpm.c +++ b/drivers/gpu/drm/radeon/kv_dpm.c @@ -2745,13 +2745,11 @@ int kv_dpm_init(struct radeon_device *rdev) pi->enable_auto_thermal_throttling = true; pi->disable_nb_ps3_in_battery = false; if (radeon_bapm == -1) { - /* There are stability issues reported on with - * bapm enabled on an asrock system. - */ - if (rdev->pdev->subsystem_vendor == 0x1849) - pi->bapm_enable = false; - else + /* only enable bapm on KB, ML by default */ + if (rdev->family == CHIP_KABINI || rdev->family == CHIP_MULLINS) pi->bapm_enable = true; + else + pi->bapm_enable = false; } else if (radeon_bapm == 0) { pi->bapm_enable = false; } else { diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 360de9f1f491..aea48c89b241 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2516,6 +2516,16 @@ void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0)); radeon_ring_write(ring, 1 << vm_id); + /* wait for the invalidate to complete */ + radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) | /* always */ + WAIT_REG_MEM_ENGINE(0))); /* me */ + radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); /* ref */ + radeon_ring_write(ring, 0); /* mask */ + radeon_ring_write(ring, 0x20); /* poll interval */ + /* sync PFP to ME, otherwise we might get invalid PFP reads */ radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); radeon_ring_write(ring, 0x0); diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c index 50f88611ff60..4be2bb7cbef3 100644 --- a/drivers/gpu/drm/radeon/ni_dma.c +++ b/drivers/gpu/drm/radeon/ni_dma.c @@ -463,5 +463,11 @@ void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); radeon_ring_write(ring, 1 << vm_id); + + /* wait for invalidate to complete */ + radeon_ring_write(ring, DMA_SRBM_READ_PACKET); + radeon_ring_write(ring, (0xff << 20) | (VM_INVALIDATE_REQUEST >> 2)); + radeon_ring_write(ring, 0); /* mask */ + radeon_ring_write(ring, 0); /* value */ } diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h index 2e12e4d69253..ad7125486894 100644 --- a/drivers/gpu/drm/radeon/nid.h +++ b/drivers/gpu/drm/radeon/nid.h @@ -1133,6 +1133,23 @@ #define PACKET3_MEM_SEMAPHORE 0x39 #define PACKET3_MPEG_INDEX 0x3A #define PACKET3_WAIT_REG_MEM 0x3C +#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0) + /* 0 - always + * 1 - < + * 2 - <= + * 3 - == + * 4 - != + * 5 - >= + * 6 - > + */ +#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4) + /* 0 - reg + * 1 - mem + */ +#define WAIT_REG_MEM_ENGINE(x) ((x) << 8) + /* 0 - me + * 1 - pfp + */ #define PACKET3_MEM_WRITE 0x3D #define PACKET3_PFP_SYNC_ME 0x42 #define PACKET3_SURFACE_SYNC 0x43 @@ -1272,6 +1289,13 @@ (1 << 21) | \ (((n) & 0xFFFFF) << 0)) +#define DMA_SRBM_POLL_PACKET ((9 << 28) | \ + (1 << 27) | \ + (1 << 26)) + +#define DMA_SRBM_READ_PACKET ((9 << 28) | \ + (1 << 27)) + /* async DMA Packet types */ #define DMA_PACKET_WRITE 0x2 #define DMA_PACKET_COPY 0x3 diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 850de57069be..121aff6a3b41 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -333,6 +333,20 @@ static struct radeon_asic_ring r300_gfx_ring = { .set_wptr = &r100_gfx_set_wptr, }; +static struct radeon_asic_ring rv515_gfx_ring = { + .ib_execute = &r100_ring_ib_execute, + .emit_fence = &r300_fence_ring_emit, + .emit_semaphore = &r100_semaphore_ring_emit, + .cs_parse = &r300_cs_parse, + .ring_start = &rv515_ring_start, + .ring_test = &r100_ring_test, + .ib_test = &r100_ib_test, + .is_lockup = &r100_gpu_is_lockup, + .get_rptr = &r100_gfx_get_rptr, + .get_wptr = &r100_gfx_get_wptr, + .set_wptr = &r100_gfx_set_wptr, +}; + static struct radeon_asic r300_asic = { .init = &r300_init, .fini = &r300_fini, @@ -748,7 +762,7 @@ static struct radeon_asic rv515_asic = { .set_page = &rv370_pcie_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring + [RADEON_RING_TYPE_GFX_INDEX] = &rv515_gfx_ring }, .irq = { .set = &rs600_irq_set, @@ -814,7 +828,7 @@ static struct radeon_asic r520_asic = { .set_page = &rv370_pcie_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring + [RADEON_RING_TYPE_GFX_INDEX] = &rv515_gfx_ring }, .irq = { .set = &rs600_irq_set, diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index fe48f229043e..d0b4f7d1140d 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -394,10 +394,9 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, return r; } -static int radeon_mode_mmap(struct drm_file *filp, - struct drm_device *dev, - uint32_t handle, bool dumb, - uint64_t *offset_p) +int radeon_mode_dumb_mmap(struct drm_file *filp, + struct drm_device *dev, + uint32_t handle, uint64_t *offset_p) { struct drm_gem_object *gobj; struct radeon_bo *robj; @@ -406,14 +405,6 @@ static int radeon_mode_mmap(struct drm_file *filp, if (gobj == NULL) { return -ENOENT; } - - /* - * We don't allow dumb mmaps on objects created using another - * interface. - */ - WARN_ONCE(dumb && !(gobj->dumb || gobj->import_attach), - "Illegal dumb map of GPU buffer.\n"); - robj = gem_to_radeon_bo(gobj); if (radeon_ttm_tt_has_userptr(robj->tbo.ttm)) { drm_gem_object_unreference_unlocked(gobj); @@ -424,20 +415,12 @@ static int radeon_mode_mmap(struct drm_file *filp, return 0; } -int radeon_mode_dumb_mmap(struct drm_file *filp, - struct drm_device *dev, - uint32_t handle, uint64_t *offset_p) -{ - return radeon_mode_mmap(filp, dev, handle, true, offset_p); -} - int radeon_gem_mmap_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct drm_radeon_gem_mmap *args = data; - return radeon_mode_mmap(filp, dev, args->handle, false, - &args->addr_ptr); + return radeon_mode_dumb_mmap(filp, dev, args->handle, &args->addr_ptr); } int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, @@ -593,7 +576,7 @@ error_unreserve: error_free: drm_free_large(vm_bos); - if (r) + if (r && r != -ERESTARTSYS) DRM_ERROR("Couldn't update BO_VA (%d)\n", r); } @@ -763,7 +746,6 @@ int radeon_mode_dumb_create(struct drm_file *file_priv, return -ENOMEM; r = drm_gem_handle_create(file_priv, gobj, &handle); - gobj->dumb = true; /* drop reference from allocate - handle holds it now */ drm_gem_object_unreference_unlocked(gobj); if (r) { diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c index 065d02068ec3..8bf87f1203cc 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ b/drivers/gpu/drm/radeon/radeon_kfd.c @@ -28,6 +28,8 @@ #include "cikd.h" #include "cik_reg.h" #include "radeon_kfd.h" +#include "radeon_ucode.h" +#include <linux/firmware.h> #define CIK_PIPE_PER_MEC (4) @@ -49,6 +51,7 @@ static uint64_t get_vmem_size(struct kgd_dev *kgd); static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); +static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); /* * Register access functions @@ -69,7 +72,7 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr); -static bool kgd_hqd_is_occupies(struct kgd_dev *kgd, uint64_t queue_address, +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, @@ -89,14 +92,16 @@ static const struct kfd2kgd_calls kfd2kgd = { .init_memory = kgd_init_memory, .init_pipeline = kgd_init_pipeline, .hqd_load = kgd_hqd_load, - .hqd_is_occupies = kgd_hqd_is_occupies, + .hqd_is_occupied = kgd_hqd_is_occupied, .hqd_destroy = kgd_hqd_destroy, + .get_fw_version = get_fw_version }; static const struct kgd2kfd_calls *kgd2kfd; bool radeon_kfd_init(void) { +#if defined(CONFIG_HSA_AMD_MODULE) bool (*kgd2kfd_init_p)(unsigned, const struct kfd2kgd_calls*, const struct kgd2kfd_calls**); @@ -113,6 +118,17 @@ bool radeon_kfd_init(void) } return true; +#elif defined(CONFIG_HSA_AMD) + if (!kgd2kfd_init(KFD_INTERFACE_VERSION, &kfd2kgd, &kgd2kfd)) { + kgd2kfd = NULL; + + return false; + } + + return true; +#else + return false; +#endif } void radeon_kfd_fini(void) @@ -374,6 +390,10 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, cpu_relax(); write_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid); + /* Mapping vmid to pasid also for IH block */ + write_register(kgd, IH_VMID_0_LUT + vmid * sizeof(uint32_t), + pasid_mapping); + return 0; } @@ -513,7 +533,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, return 0; } -static bool kgd_hqd_is_occupies(struct kgd_dev *kgd, uint64_t queue_address, +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id) { uint32_t act; @@ -552,6 +572,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, if (timeout == 0) { pr_err("kfd: cp queue preemption time out (%dms)\n", temp); + release_queue(kgd); return -ETIME; } msleep(20); @@ -561,3 +582,52 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, release_queue(kgd); return 0; } + +static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) +{ + struct radeon_device *rdev = (struct radeon_device *) kgd; + const union radeon_firmware_header *hdr; + + BUG_ON(kgd == NULL || rdev->mec_fw == NULL); + + switch (type) { + case KGD_ENGINE_PFP: + hdr = (const union radeon_firmware_header *) rdev->pfp_fw->data; + break; + + case KGD_ENGINE_ME: + hdr = (const union radeon_firmware_header *) rdev->me_fw->data; + break; + + case KGD_ENGINE_CE: + hdr = (const union radeon_firmware_header *) rdev->ce_fw->data; + break; + + case KGD_ENGINE_MEC1: + hdr = (const union radeon_firmware_header *) rdev->mec_fw->data; + break; + + case KGD_ENGINE_MEC2: + hdr = (const union radeon_firmware_header *) + rdev->mec2_fw->data; + break; + + case KGD_ENGINE_RLC: + hdr = (const union radeon_firmware_header *) rdev->rlc_fw->data; + break; + + case KGD_ENGINE_SDMA: + hdr = (const union radeon_firmware_header *) + rdev->sdma_fw->data; + break; + + default: + return 0; + } + + if (hdr == NULL) + return 0; + + /* Only 12 bit in use*/ + return hdr->common.ucode_version; +} diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 7d68223eb469..86fc56434b28 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -529,9 +529,6 @@ int radeon_bo_list_validate(struct radeon_device *rdev, u32 current_domain = radeon_mem_type_to_domain(bo->tbo.mem.mem_type); - WARN_ONCE(bo->gem_base.dumb, - "GPU use of dumb buffer is illegal.\n"); - /* Check if this buffer will be moved and don't move it * if we have moved too many buffers for this IB already. * diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 32522cc940a1..f7da8fe96a66 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -1287,8 +1287,39 @@ dpm_failed: return ret; } +struct radeon_dpm_quirk { + u32 chip_vendor; + u32 chip_device; + u32 subsys_vendor; + u32 subsys_device; +}; + +/* cards with dpm stability problems */ +static struct radeon_dpm_quirk radeon_dpm_quirk_list[] = { + /* TURKS - https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1386534 */ + { PCI_VENDOR_ID_ATI, 0x6759, 0x1682, 0x3195 }, + /* TURKS - https://bugzilla.kernel.org/show_bug.cgi?id=83731 */ + { PCI_VENDOR_ID_ATI, 0x6840, 0x1179, 0xfb81 }, + { 0, 0, 0, 0 }, +}; + int radeon_pm_init(struct radeon_device *rdev) { + struct radeon_dpm_quirk *p = radeon_dpm_quirk_list; + bool disable_dpm = false; + + /* Apply dpm quirks */ + while (p && p->chip_device != 0) { + if (rdev->pdev->vendor == p->chip_vendor && + rdev->pdev->device == p->chip_device && + rdev->pdev->subsystem_vendor == p->subsys_vendor && + rdev->pdev->subsystem_device == p->subsys_device) { + disable_dpm = true; + break; + } + ++p; + } + /* enable dpm on rv6xx+ */ switch (rdev->family) { case CHIP_RV610: @@ -1344,6 +1375,8 @@ int radeon_pm_init(struct radeon_device *rdev) (!(rdev->flags & RADEON_IS_IGP)) && (!rdev->smc_fw)) rdev->pm.pm_method = PM_METHOD_PROFILE; + else if (disable_dpm && (radeon_dpm == -1)) + rdev->pm.pm_method = PM_METHOD_PROFILE; else if (radeon_dpm == 0) rdev->pm.pm_method = PM_METHOD_PROFILE; else diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c index 535403e0c8a2..15aee723db77 100644 --- a/drivers/gpu/drm/radeon/radeon_state.c +++ b/drivers/gpu/drm/radeon/radeon_state.c @@ -1703,7 +1703,7 @@ static int radeon_cp_dispatch_texture(struct drm_device * dev, u32 format; u32 *buffer; const u8 __user *data; - int size, dwords, tex_width, blit_width, spitch; + unsigned int size, dwords, tex_width, blit_width, spitch; u32 height; int i; u32 texpitch, microtile; diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 60df444bd075..5d89b874a1a2 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -5057,6 +5057,16 @@ void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, 0); radeon_ring_write(ring, 1 << vm_id); + /* wait for the invalidate to complete */ + radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) | /* always */ + WAIT_REG_MEM_ENGINE(0))); /* me */ + radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); /* ref */ + radeon_ring_write(ring, 0); /* mask */ + radeon_ring_write(ring, 0x20); /* poll interval */ + /* sync PFP to ME, otherwise we might get invalid PFP reads */ radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); radeon_ring_write(ring, 0x0); diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c index f5cc777e1c5f..aa7b872b2c43 100644 --- a/drivers/gpu/drm/radeon/si_dma.c +++ b/drivers/gpu/drm/radeon/si_dma.c @@ -206,6 +206,14 @@ void si_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); radeon_ring_write(ring, 1 << vm_id); + + /* wait for invalidate to complete */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0)); + radeon_ring_write(ring, VM_INVALIDATE_REQUEST); + radeon_ring_write(ring, 0xff << 16); /* retry */ + radeon_ring_write(ring, 1 << vm_id); /* mask */ + radeon_ring_write(ring, 0); /* value */ + radeon_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */ } /** diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 32e354b8b0ab..eff8a6444956 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2908,6 +2908,22 @@ static int si_init_smc_spll_table(struct radeon_device *rdev) return ret; } +struct si_dpm_quirk { + u32 chip_vendor; + u32 chip_device; + u32 subsys_vendor; + u32 subsys_device; + u32 max_sclk; + u32 max_mclk; +}; + +/* cards with dpm stability problems */ +static struct si_dpm_quirk si_dpm_quirk_list[] = { + /* PITCAIRN - https://bugs.freedesktop.org/show_bug.cgi?id=76490 */ + { PCI_VENDOR_ID_ATI, 0x6810, 0x1462, 0x3036, 0, 120000 }, + { 0, 0, 0, 0 }, +}; + static void si_apply_state_adjust_rules(struct radeon_device *rdev, struct radeon_ps *rps) { @@ -2918,7 +2934,22 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, u32 mclk, sclk; u16 vddc, vddci; u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc; + u32 max_sclk = 0, max_mclk = 0; int i; + struct si_dpm_quirk *p = si_dpm_quirk_list; + + /* Apply dpm quirks */ + while (p && p->chip_device != 0) { + if (rdev->pdev->vendor == p->chip_vendor && + rdev->pdev->device == p->chip_device && + rdev->pdev->subsystem_vendor == p->subsys_vendor && + rdev->pdev->subsystem_device == p->subsys_device) { + max_sclk = p->max_sclk; + max_mclk = p->max_mclk; + break; + } + ++p; + } if ((rdev->pm.dpm.new_active_crtc_count > 1) || ni_dpm_vblank_too_short(rdev)) @@ -2972,6 +3003,14 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, if (ps->performance_levels[i].mclk > max_mclk_vddc) ps->performance_levels[i].mclk = max_mclk_vddc; } + if (max_mclk) { + if (ps->performance_levels[i].mclk > max_mclk) + ps->performance_levels[i].mclk = max_mclk; + } + if (max_sclk) { + if (ps->performance_levels[i].sclk > max_sclk) + ps->performance_levels[i].sclk = max_sclk; + } } /* XXX validate the min clocks required for display */ diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 4069be89e585..84999242c747 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -1632,6 +1632,23 @@ #define PACKET3_MPEG_INDEX 0x3A #define PACKET3_COPY_DW 0x3B #define PACKET3_WAIT_REG_MEM 0x3C +#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0) + /* 0 - always + * 1 - < + * 2 - <= + * 3 - == + * 4 - != + * 5 - >= + * 6 - > + */ +#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4) + /* 0 - reg + * 1 - mem + */ +#define WAIT_REG_MEM_ENGINE(x) ((x) << 8) + /* 0 - me + * 1 - pfp + */ #define PACKET3_MEM_WRITE 0x3D #define PACKET3_COPY_DATA 0x40 #define PACKET3_CP_DMA 0x41 @@ -1835,6 +1852,7 @@ #define DMA_PACKET_TRAP 0x7 #define DMA_PACKET_SRBM_WRITE 0x9 #define DMA_PACKET_CONSTANT_FILL 0xd +#define DMA_PACKET_POLL_REG_MEM 0xe #define DMA_PACKET_NOP 0xf #define VCE_STATUS 0x20004 diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 3367960286a6..978993fa3a36 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -168,7 +168,7 @@ static int tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index, const struct tegra_dc_window *window) { unsigned h_offset, v_offset, h_size, v_size, h_dda, v_dda, bpp; - unsigned long value; + unsigned long value, flags; bool yuv, planar; /* @@ -181,6 +181,8 @@ static int tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index, else bpp = planar ? 1 : 2; + spin_lock_irqsave(&dc->lock, flags); + value = WINDOW_A_SELECT << index; tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER); @@ -273,6 +275,7 @@ static int tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index, case TEGRA_BO_TILING_MODE_BLOCK: DRM_ERROR("hardware doesn't support block linear mode\n"); + spin_unlock_irqrestore(&dc->lock, flags); return -EINVAL; } @@ -331,6 +334,8 @@ static int tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index, tegra_dc_window_commit(dc, index); + spin_unlock_irqrestore(&dc->lock, flags); + return 0; } @@ -338,11 +343,14 @@ static int tegra_window_plane_disable(struct drm_plane *plane) { struct tegra_dc *dc = to_tegra_dc(plane->crtc); struct tegra_plane *p = to_tegra_plane(plane); + unsigned long flags; u32 value; if (!plane->crtc) return 0; + spin_lock_irqsave(&dc->lock, flags); + value = WINDOW_A_SELECT << p->index; tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER); @@ -352,6 +360,8 @@ static int tegra_window_plane_disable(struct drm_plane *plane) tegra_dc_window_commit(dc, p->index); + spin_unlock_irqrestore(&dc->lock, flags); + return 0; } @@ -699,14 +709,16 @@ static int tegra_dc_set_base(struct tegra_dc *dc, int x, int y, struct tegra_bo *bo = tegra_fb_get_plane(fb, 0); unsigned int h_offset = 0, v_offset = 0; struct tegra_bo_tiling tiling; + unsigned long value, flags; unsigned int format, swap; - unsigned long value; int err; err = tegra_fb_get_tiling(fb, &tiling); if (err < 0) return err; + spin_lock_irqsave(&dc->lock, flags); + tegra_dc_writel(dc, WINDOW_A_SELECT, DC_CMD_DISPLAY_WINDOW_HEADER); value = fb->offsets[0] + y * fb->pitches[0] + @@ -752,6 +764,7 @@ static int tegra_dc_set_base(struct tegra_dc *dc, int x, int y, case TEGRA_BO_TILING_MODE_BLOCK: DRM_ERROR("hardware doesn't support block linear mode\n"); + spin_unlock_irqrestore(&dc->lock, flags); return -EINVAL; } @@ -778,6 +791,8 @@ static int tegra_dc_set_base(struct tegra_dc *dc, int x, int y, tegra_dc_writel(dc, value << 8, DC_CMD_STATE_CONTROL); tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL); + spin_unlock_irqrestore(&dc->lock, flags); + return 0; } @@ -814,23 +829,32 @@ static void tegra_dc_finish_page_flip(struct tegra_dc *dc) unsigned long flags, base; struct tegra_bo *bo; - if (!dc->event) + spin_lock_irqsave(&drm->event_lock, flags); + + if (!dc->event) { + spin_unlock_irqrestore(&drm->event_lock, flags); return; + } bo = tegra_fb_get_plane(crtc->primary->fb, 0); + spin_lock_irqsave(&dc->lock, flags); + /* check if new start address has been latched */ + tegra_dc_writel(dc, WINDOW_A_SELECT, DC_CMD_DISPLAY_WINDOW_HEADER); tegra_dc_writel(dc, READ_MUX, DC_CMD_STATE_ACCESS); base = tegra_dc_readl(dc, DC_WINBUF_START_ADDR); tegra_dc_writel(dc, 0, DC_CMD_STATE_ACCESS); + spin_unlock_irqrestore(&dc->lock, flags); + if (base == bo->paddr + crtc->primary->fb->offsets[0]) { - spin_lock_irqsave(&drm->event_lock, flags); - drm_send_vblank_event(drm, dc->pipe, dc->event); - drm_vblank_put(drm, dc->pipe); + drm_crtc_send_vblank_event(crtc, dc->event); + drm_crtc_vblank_put(crtc); dc->event = NULL; - spin_unlock_irqrestore(&drm->event_lock, flags); } + + spin_unlock_irqrestore(&drm->event_lock, flags); } void tegra_dc_cancel_page_flip(struct drm_crtc *crtc, struct drm_file *file) @@ -843,7 +867,7 @@ void tegra_dc_cancel_page_flip(struct drm_crtc *crtc, struct drm_file *file) if (dc->event && dc->event->base.file_priv == file) { dc->event->base.destroy(&dc->event->base); - drm_vblank_put(drm, dc->pipe); + drm_crtc_vblank_put(crtc); dc->event = NULL; } @@ -853,16 +877,16 @@ void tegra_dc_cancel_page_flip(struct drm_crtc *crtc, struct drm_file *file) static int tegra_dc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_pending_vblank_event *event, uint32_t page_flip_flags) { + unsigned int pipe = drm_crtc_index(crtc); struct tegra_dc *dc = to_tegra_dc(crtc); - struct drm_device *drm = crtc->dev; if (dc->event) return -EBUSY; if (event) { - event->pipe = dc->pipe; + event->pipe = pipe; dc->event = event; - drm_vblank_get(drm, dc->pipe); + drm_crtc_vblank_get(crtc); } tegra_dc_set_base(dc, 0, 0, fb); @@ -1127,7 +1151,7 @@ static irqreturn_t tegra_dc_irq(int irq, void *data) /* dev_dbg(dc->dev, "%s(): vertical blank\n", __func__); */ - drm_handle_vblank(dc->base.dev, dc->pipe); + drm_crtc_handle_vblank(&dc->base); tegra_dc_finish_page_flip(dc); } diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index e549afeece1f..d4f827593dfa 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -694,24 +694,28 @@ static const struct file_operations tegra_drm_fops = { .llseek = noop_llseek, }; -static struct drm_crtc *tegra_crtc_from_pipe(struct drm_device *drm, int pipe) +static struct drm_crtc *tegra_crtc_from_pipe(struct drm_device *drm, + unsigned int pipe) { struct drm_crtc *crtc; list_for_each_entry(crtc, &drm->mode_config.crtc_list, head) { - struct tegra_dc *dc = to_tegra_dc(crtc); - - if (dc->pipe == pipe) + if (pipe == drm_crtc_index(crtc)) return crtc; } return NULL; } -static u32 tegra_drm_get_vblank_counter(struct drm_device *dev, int crtc) +static u32 tegra_drm_get_vblank_counter(struct drm_device *drm, int pipe) { + struct drm_crtc *crtc = tegra_crtc_from_pipe(drm, pipe); + + if (!crtc) + return 0; + /* TODO: implement real hardware counter using syncpoints */ - return drm_vblank_count(dev, crtc); + return drm_crtc_vblank_count(crtc); } static int tegra_drm_enable_vblank(struct drm_device *drm, int pipe) diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index da32086cbeaf..8777b7f75791 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -216,32 +216,58 @@ static void tegra_bo_free(struct drm_device *drm, struct tegra_bo *bo) } } -static int tegra_bo_get_pages(struct drm_device *drm, struct tegra_bo *bo, - size_t size) +static int tegra_bo_get_pages(struct drm_device *drm, struct tegra_bo *bo) { + struct scatterlist *s; + struct sg_table *sgt; + unsigned int i; + bo->pages = drm_gem_get_pages(&bo->gem); if (IS_ERR(bo->pages)) return PTR_ERR(bo->pages); - bo->num_pages = size >> PAGE_SHIFT; - - bo->sgt = drm_prime_pages_to_sg(bo->pages, bo->num_pages); - if (IS_ERR(bo->sgt)) { - drm_gem_put_pages(&bo->gem, bo->pages, false, false); - return PTR_ERR(bo->sgt); + bo->num_pages = bo->gem.size >> PAGE_SHIFT; + + sgt = drm_prime_pages_to_sg(bo->pages, bo->num_pages); + if (IS_ERR(sgt)) + goto put_pages; + + /* + * Fake up the SG table so that dma_map_sg() can be used to flush the + * pages associated with it. Note that this relies on the fact that + * the DMA API doesn't hook into IOMMU on Tegra, therefore mapping is + * only cache maintenance. + * + * TODO: Replace this by drm_clflash_sg() once it can be implemented + * without relying on symbols that are not exported. + */ + for_each_sg(sgt->sgl, s, sgt->nents, i) + sg_dma_address(s) = sg_phys(s); + + if (dma_map_sg(drm->dev, sgt->sgl, sgt->nents, DMA_TO_DEVICE) == 0) { + sgt = ERR_PTR(-ENOMEM); + goto release_sgt; } + bo->sgt = sgt; + return 0; + +release_sgt: + sg_free_table(sgt); + kfree(sgt); +put_pages: + drm_gem_put_pages(&bo->gem, bo->pages, false, false); + return PTR_ERR(sgt); } -static int tegra_bo_alloc(struct drm_device *drm, struct tegra_bo *bo, - size_t size) +static int tegra_bo_alloc(struct drm_device *drm, struct tegra_bo *bo) { struct tegra_drm *tegra = drm->dev_private; int err; if (tegra->domain) { - err = tegra_bo_get_pages(drm, bo, size); + err = tegra_bo_get_pages(drm, bo); if (err < 0) return err; @@ -251,6 +277,8 @@ static int tegra_bo_alloc(struct drm_device *drm, struct tegra_bo *bo, return err; } } else { + size_t size = bo->gem.size; + bo->vaddr = dma_alloc_writecombine(drm->dev, size, &bo->paddr, GFP_KERNEL | __GFP_NOWARN); if (!bo->vaddr) { @@ -274,7 +302,7 @@ struct tegra_bo *tegra_bo_create(struct drm_device *drm, size_t size, if (IS_ERR(bo)) return bo; - err = tegra_bo_alloc(drm, bo, size); + err = tegra_bo_alloc(drm, bo); if (err < 0) goto release; |