From 59bc1d89d6a4d67c94a9b70fa81bda1d5b04f0cb Mon Sep 17 00:00:00 2001 From: Lauri Kasanen Date: Sun, 20 Apr 2014 20:29:33 +0300 Subject: drm/radeon: Inline r100_mm_rreg, -wreg, v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was originally un-inlined by Andi Kleen in 2011 citing size concerns. Indeed, a first attempt at inlining it grew radeon.ko by 7%. However, 2% of cpu is spent in this function. Simply inlining it gave 1% more fps in Urban Terror. v2: We know the minimum MMIO size. Adding it to the if allows the compiler to optimize the branch out, improving both performance and size. The v2 patch decreases radeon.ko size by 2%. I didn't re-benchmark, but common sense says perf is now more than 1% better. v3: Also change _wreg, make the threshold a define. Inlining _wreg increased the size a bit compared to v2, so now radeon.ko is only 1% smaller. Signed-off-by: Lauri Kasanen Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/radeon/radeon.h') diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 7561b1332152..6aadfe43bad0 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2357,10 +2357,42 @@ int radeon_device_init(struct radeon_device *rdev, void radeon_device_fini(struct radeon_device *rdev); int radeon_gpu_wait_for_idle(struct radeon_device *rdev); -uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg, - bool always_indirect); -void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v, - bool always_indirect); +#define RADEON_MIN_MMIO_SIZE 0x10000 + +static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg, + bool always_indirect) +{ + /* The mmio size is 64kb at minimum. Allows the if to be optimized out. */ + if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect) + return readl(((void __iomem *)rdev->rmmio) + reg); + else { + unsigned long flags; + uint32_t ret; + + spin_lock_irqsave(&rdev->mmio_idx_lock, flags); + writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); + ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); + spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); + + return ret; + } +} + +static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v, + bool always_indirect) +{ + if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect) + writel(v, ((void __iomem *)rdev->rmmio) + reg); + else { + unsigned long flags; + + spin_lock_irqsave(&rdev->mmio_idx_lock, flags); + writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); + writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); + spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); + } +} + u32 r100_io_rreg(struct radeon_device *rdev, u32 reg); void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v); -- cgit v1.2.3