diff options
author | Jerome Glisse <jglisse@redhat.com> | 2009-09-08 10:10:24 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2009-09-08 11:15:52 +1000 |
commit | 3ce0a23d2d253185df24e22e3d5f89800bb3dd1c (patch) | |
tree | 4b4defdbe33aec7317101cce0f89c33083f8d17b /drivers | |
parent | 4ce001abafafe77e5dd943d1480fc9f87894e96f (diff) |
drm/radeon/kms: add r600 KMS support
This adds the r600 KMS + CS support to the Linux kernel.
The r600 TTM support is quite basic and still needs more
work esp around using interrupts, but the polled fencing
should work okay for now.
Also currently TTM is using memcpy to do VRAM moves,
the code is here to use a 3D blit to do this, but
isn't fully debugged yet.
Authors:
Alex Deucher <alexdeucher@gmail.com>
Dave Airlie <airlied@redhat.com>
Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers')
33 files changed, 8289 insertions, 606 deletions
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index c5db0c4fe788..14c3fe692723 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -46,8 +46,9 @@ radeon-$(CONFIG_DRM_RADEON_KMS) += radeon_device.o radeon_kms.o \ radeon_encoders.o radeon_display.o radeon_cursor.o radeon_i2c.o \ radeon_clocks.o radeon_fb.o radeon_gem.o radeon_ring.o radeon_irq_kms.o \ radeon_cs.o radeon_bios.o radeon_benchmark.o r100.o r300.o r420.o \ - rs400.o rs600.o rs690.o rv515.o r520.o r600.o rs780.o rv770.o \ - radeon_test.o r200.o radeon_legacy_tv.o + rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o radeon_test.o \ + r200.o radeon_legacy_tv.o r600_cs.o r600_blit.o r600_blit_shaders.o \ + r600_blit_kms.o radeon-$(CONFIG_COMPAT) += radeon_ioc32.o diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 8e31e992ec53..a7edd0f2ac37 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -389,6 +389,7 @@ void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode) pll_flags |= RADEON_PLL_USE_REF_DIV; } radeon_encoder = to_radeon_encoder(encoder); + break; } } diff --git a/drivers/gpu/drm/radeon/r300.h b/drivers/gpu/drm/radeon/avivod.h index 8486b4da9d69..d4e6e6e4a938 100644 --- a/drivers/gpu/drm/radeon/r300.h +++ b/drivers/gpu/drm/radeon/avivod.h @@ -1,7 +1,6 @@ /* - * Copyright 2008 Advanced Micro Devices, Inc. - * Copyright 2008 Red Hat Inc. - * Copyright 2009 Jerome Glisse. + * Copyright 2009 Advanced Micro Devices, Inc. + * Copyright 2009 Red Hat Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -25,12 +24,37 @@ * Alex Deucher * Jerome Glisse */ -#ifndef R300_H -#define R300_H +#ifndef AVIVOD_H +#define AVIVOD_H -struct r300_asic { - const unsigned *reg_safe_bm; - unsigned reg_safe_bm_size; -}; + +#define D1CRTC_CONTROL 0x6080 +#define CRTC_EN (1 << 0) +#define D1CRTC_UPDATE_LOCK 0x60E8 +#define D1GRPH_PRIMARY_SURFACE_ADDRESS 0x6110 +#define D1GRPH_SECONDARY_SURFACE_ADDRESS 0x6118 + +#define D2CRTC_CONTROL 0x6880 +#define D2CRTC_UPDATE_LOCK 0x68E8 +#define D2GRPH_PRIMARY_SURFACE_ADDRESS 0x6910 +#define D2GRPH_SECONDARY_SURFACE_ADDRESS 0x6918 + +#define D1VGA_CONTROL 0x0330 +#define DVGA_CONTROL_MODE_ENABLE (1 << 0) +#define DVGA_CONTROL_TIMING_SELECT (1 << 8) +#define DVGA_CONTROL_SYNC_POLARITY_SELECT (1 << 9) +#define DVGA_CONTROL_OVERSCAN_TIMING_SELECT (1 << 10) +#define DVGA_CONTROL_OVERSCAN_COLOR_EN (1 << 16) +#define DVGA_CONTROL_ROTATE (1 << 24) +#define D2VGA_CONTROL 0x0338 + +#define VGA_HDP_CONTROL 0x328 +#define VGA_MEM_PAGE_SELECT_EN (1 << 0) +#define VGA_MEMORY_DISABLE (1 << 4) +#define VGA_RBBM_LOCK_DISABLE (1 << 8) +#define VGA_SOFT_RESET (1 << 16) +#define VGA_MEMORY_BASE_ADDRESS 0x0310 +#define VGA_RENDER_CONTROL 0x0300 +#define VGA_VSTATUS_CNTL_MASK 0x00030000 #endif diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index ee3ab62417e2..5708c07ce733 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -31,6 +31,8 @@ #include "radeon_drm.h" #include "radeon_reg.h" #include "radeon.h" +#include "r100d.h" + #include <linux/firmware.h> #include <linux/platform_device.h> @@ -391,9 +393,9 @@ int r100_wb_init(struct radeon_device *rdev) return r; } } - WREG32(0x774, rdev->wb.gpu_addr); - WREG32(0x70C, rdev->wb.gpu_addr + 1024); - WREG32(0x770, 0xff); + WREG32(RADEON_SCRATCH_ADDR, rdev->wb.gpu_addr); + WREG32(RADEON_CP_RB_RPTR_ADDR, rdev->wb.gpu_addr + 1024); + WREG32(RADEON_SCRATCH_UMSK, 0xff); return 0; } @@ -559,18 +561,18 @@ static int r100_cp_init_microcode(struct radeon_device *rdev) fw_name = FIRMWARE_R520; } - err = request_firmware(&rdev->fw, fw_name, &pdev->dev); + err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev); platform_device_unregister(pdev); if (err) { printk(KERN_ERR "radeon_cp: Failed to load firmware \"%s\"\n", fw_name); - } else if (rdev->fw->size % 8) { + } else if (rdev->me_fw->size % 8) { printk(KERN_ERR "radeon_cp: Bogus length %zu in firmware \"%s\"\n", - rdev->fw->size, fw_name); + rdev->me_fw->size, fw_name); err = -EINVAL; - release_firmware(rdev->fw); - rdev->fw = NULL; + release_firmware(rdev->me_fw); + rdev->me_fw = NULL; } return err; } @@ -584,9 +586,9 @@ static void r100_cp_load_microcode(struct radeon_device *rdev) "programming pipes. Bad things might happen.\n"); } - if (rdev->fw) { - size = rdev->fw->size / 4; - fw_data = (const __be32 *)&rdev->fw->data[0]; + if (rdev->me_fw) { + size = rdev->me_fw->size / 4; + fw_data = (const __be32 *)&rdev->me_fw->data[0]; WREG32(RADEON_CP_ME_RAM_ADDR, 0); for (i = 0; i < size; i += 2) { WREG32(RADEON_CP_ME_RAM_DATAH, @@ -632,7 +634,7 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) DRM_INFO("radeon: cp idle (0x%08X)\n", tmp); } - if (!rdev->fw) { + if (!rdev->me_fw) { r = r100_cp_init_microcode(rdev); if (r) { DRM_ERROR("Failed to load firmware!\n"); @@ -765,6 +767,12 @@ int r100_cp_reset(struct radeon_device *rdev) return -1; } +void r100_cp_commit(struct radeon_device *rdev) +{ + WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr); + (void)RREG32(RADEON_CP_RB_WPTR); +} + /* * CS functions @@ -2954,3 +2962,106 @@ void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track } } } + +int r100_ring_test(struct radeon_device *rdev) +{ + uint32_t scratch; + uint32_t tmp = 0; + unsigned i; + int r; + + r = radeon_scratch_get(rdev, &scratch); + if (r) { + DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r); + return r; + } + WREG32(scratch, 0xCAFEDEAD); + r = radeon_ring_lock(rdev, 2); + if (r) { + DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); + radeon_scratch_free(rdev, scratch); + return r; + } + radeon_ring_write(rdev, PACKET0(scratch, 0)); + radeon_ring_write(rdev, 0xDEADBEEF); + radeon_ring_unlock_commit(rdev); + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) { + break; + } + DRM_UDELAY(1); + } + if (i < rdev->usec_timeout) { + DRM_INFO("ring test succeeded in %d usecs\n", i); + } else { + DRM_ERROR("radeon: ring test failed (sracth(0x%04X)=0x%08X)\n", + scratch, tmp); + r = -EINVAL; + } + radeon_scratch_free(rdev, scratch); + return r; +} + +void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) +{ + radeon_ring_write(rdev, PACKET0(RADEON_CP_IB_BASE, 1)); + radeon_ring_write(rdev, ib->gpu_addr); + radeon_ring_write(rdev, ib->length_dw); +} + +int r100_ib_test(struct radeon_device *rdev) +{ + struct radeon_ib *ib; + uint32_t scratch; + uint32_t tmp = 0; + unsigned i; + int r; + + r = radeon_scratch_get(rdev, &scratch); + if (r) { + DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r); + return r; + } + WREG32(scratch, 0xCAFEDEAD); + r = radeon_ib_get(rdev, &ib); + if (r) { + return r; + } + ib->ptr[0] = PACKET0(scratch, 0); + ib->ptr[1] = 0xDEADBEEF; + ib->ptr[2] = PACKET2(0); + ib->ptr[3] = PACKET2(0); + ib->ptr[4] = PACKET2(0); + ib->ptr[5] = PACKET2(0); + ib->ptr[6] = PACKET2(0); + ib->ptr[7] = PACKET2(0); + ib->length_dw = 8; + r = radeon_ib_schedule(rdev, ib); + if (r) { + radeon_scratch_free(rdev, scratch); + radeon_ib_free(rdev, &ib); + return r; + } + r = radeon_fence_wait(ib->fence, false); + if (r) { + return r; + } + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) { + break; + } + DRM_UDELAY(1); + } + if (i < rdev->usec_timeout) { + DRM_INFO("ib test succeeded in %u usecs\n", i); + } else { + DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n", + scratch, tmp); + r = -EINVAL; + } + radeon_scratch_free(rdev, scratch); + radeon_ib_free(rdev, &ib); + return r; +} diff --git a/drivers/gpu/drm/radeon/r100d.h b/drivers/gpu/drm/radeon/r100d.h new file mode 100644 index 000000000000..6da7d92c321c --- /dev/null +++ b/drivers/gpu/drm/radeon/r100d.h @@ -0,0 +1,76 @@ +/* + * Copyright 2008 Advanced Micro Devices, Inc. + * Copyright 2008 Red Hat Inc. + * Copyright 2009 Jerome Glisse. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + * Alex Deucher + * Jerome Glisse + */ +#ifndef __R100D_H__ +#define __R100D_H__ + +#define CP_PACKET0 0x00000000 +#define PACKET0_BASE_INDEX_SHIFT 0 +#define PACKET0_BASE_INDEX_MASK (0x1ffff << 0) +#define PACKET0_COUNT_SHIFT 16 +#define PACKET0_COUNT_MASK (0x3fff << 16) +#define CP_PACKET1 0x40000000 +#define CP_PACKET2 0x80000000 +#define PACKET2_PAD_SHIFT 0 +#define PACKET2_PAD_MASK (0x3fffffff << 0) +#define CP_PACKET3 0xC0000000 +#define PACKET3_IT_OPCODE_SHIFT 8 +#define PACKET3_IT_OPCODE_MASK (0xff << 8) +#define PACKET3_COUNT_SHIFT 16 +#define PACKET3_COUNT_MASK (0x3fff << 16) +/* PACKET3 op code */ +#define PACKET3_NOP 0x10 +#define PACKET3_3D_DRAW_VBUF 0x28 +#define PACKET3_3D_DRAW_IMMD 0x29 +#define PACKET3_3D_DRAW_INDX 0x2A +#define PACKET3_3D_LOAD_VBPNTR 0x2F +#define PACKET3_INDX_BUFFER 0x33 +#define PACKET3_3D_DRAW_VBUF_2 0x34 +#define PACKET3_3D_DRAW_IMMD_2 0x35 +#define PACKET3_3D_DRAW_INDX_2 0x36 +#define PACKET3_BITBLT_MULTI 0x9B + +#define PACKET0(reg, n) (CP_PACKET0 | \ + REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) | \ + REG_SET(PACKET0_COUNT, (n))) +#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) +#define PACKET3(op, n) (CP_PACKET3 | \ + REG_SET(PACKET3_IT_OPCODE, (op)) | \ + REG_SET(PACKET3_COUNT, (n))) + +#define PACKET_TYPE0 0 +#define PACKET_TYPE1 1 +#define PACKET_TYPE2 2 +#define PACKET_TYPE3 3 + +#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) +#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) +#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2) +#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1) +#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) + +#endif diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 33a2c557eac4..a5f82f7beed6 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -33,6 +33,7 @@ #include "radeon_drm.h" #include "radeon_share.h" #include "r100_track.h" +#include "r300d.h" #include "r300_reg_safe.h" @@ -127,7 +128,7 @@ int rv370_pcie_gart_enable(struct radeon_device *rdev) WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp); rv370_pcie_gart_tlb_flush(rdev); DRM_INFO("PCIE GART of %uM enabled (table at 0x%08X).\n", - rdev->mc.gtt_size >> 20, table_addr); + (unsigned)(rdev->mc.gtt_size >> 20), table_addr); rdev->gart.ready = true; return 0; } diff --git a/drivers/gpu/drm/radeon/r300d.h b/drivers/gpu/drm/radeon/r300d.h new file mode 100644 index 000000000000..63ec076f2cd4 --- /dev/null +++ b/drivers/gpu/drm/radeon/r300d.h @@ -0,0 +1,76 @@ +/* + * Copyright 2008 Advanced Micro Devices, Inc. + * Copyright 2008 Red Hat Inc. + * Copyright 2009 Jerome Glisse. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + * Alex Deucher + * Jerome Glisse + */ +#ifndef __R300D_H__ +#define __R300D_H__ + +#define CP_PACKET0 0x00000000 +#define PACKET0_BASE_INDEX_SHIFT 0 +#define PACKET0_BASE_INDEX_MASK (0x1ffff << 0) +#define PACKET0_COUNT_SHIFT 16 +#define PACKET0_COUNT_MASK (0x3fff << 16) +#define CP_PACKET1 0x40000000 +#define CP_PACKET2 0x80000000 +#define PACKET2_PAD_SHIFT 0 +#define PACKET2_PAD_MASK (0x3fffffff << 0) +#define CP_PACKET3 0xC0000000 +#define PACKET3_IT_OPCODE_SHIFT 8 +#define PACKET3_IT_OPCODE_MASK (0xff << 8) +#define PACKET3_COUNT_SHIFT 16 +#define PACKET3_COUNT_MASK (0x3fff << 16) +/* PACKET3 op code */ +#define PACKET3_NOP 0x10 +#define PACKET3_3D_DRAW_VBUF 0x28 +#define PACKET3_3D_DRAW_IMMD 0x29 +#define PACKET3_3D_DRAW_INDX 0x2A +#define PACKET3_3D_LOAD_VBPNTR 0x2F +#define PACKET3_INDX_BUFFER 0x33 +#define PACKET3_3D_DRAW_VBUF_2 0x34 +#define PACKET3_3D_DRAW_IMMD_2 0x35 +#define PACKET3_3D_DRAW_INDX_2 0x36 +#define PACKET3_BITBLT_MULTI 0x9B + +#define PACKET0(reg, n) (CP_PACKET0 | \ + REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) | \ + REG_SET(PACKET0_COUNT, (n))) +#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) +#define PACKET3(op, n) (CP_PACKET3 | \ + REG_SET(PACKET3_IT_OPCODE, (op)) | \ + REG_SET(PACKET3_COUNT, (n))) + +#define PACKET_TYPE0 0 +#define PACKET_TYPE1 1 +#define PACKET_TYPE2 2 +#define PACKET_TYPE3 3 + +#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) +#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) +#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2) +#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1) +#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) + +#endif diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 538cd907df69..d8fcef44a69f 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -25,12 +25,46 @@ * Alex Deucher * Jerome Glisse */ +#include <linux/seq_file.h> +#include <linux/firmware.h> +#include <linux/platform_device.h> #include "drmP.h" -#include "radeon_reg.h" +#include "radeon_drm.h" #include "radeon.h" +#include "radeon_mode.h" +#include "radeon_share.h" +#include "r600d.h" +#include "avivod.h" +#include "atom.h" -/* r600,rv610,rv630,rv620,rv635,rv670 depends on : */ -void rs600_mc_disable_clients(struct radeon_device *rdev); +#define PFP_UCODE_SIZE 576 +#define PM4_UCODE_SIZE 1792 +#define R700_PFP_UCODE_SIZE 848 +#define R700_PM4_UCODE_SIZE 1360 + +/* Firmware Names */ +MODULE_FIRMWARE("radeon/R600_pfp.bin"); +MODULE_FIRMWARE("radeon/R600_me.bin"); +MODULE_FIRMWARE("radeon/RV610_pfp.bin"); +MODULE_FIRMWARE("radeon/RV610_me.bin"); +MODULE_FIRMWARE("radeon/RV630_pfp.bin"); +MODULE_FIRMWARE("radeon/RV630_me.bin"); +MODULE_FIRMWARE("radeon/RV620_pfp.bin"); +MODULE_FIRMWARE("radeon/RV620_me.bin"); +MODULE_FIRMWARE("radeon/RV635_pfp.bin"); +MODULE_FIRMWARE("radeon/RV635_me.bin"); +MODULE_FIRMWARE("radeon/RV670_pfp.bin"); +MODULE_FIRMWARE("radeon/RV670_me.bin"); +MODULE_FIRMWARE("radeon/RS780_pfp.bin"); +MODULE_FIRMWARE("radeon/RS780_me.bin"); +MODULE_FIRMWARE("radeon/RV770_pfp.bin"); +MODULE_FIRMWARE("radeon/RV770_me.bin"); +MODULE_FIRMWARE("radeon/RV730_pfp.bin"); +MODULE_FIRMWARE("radeon/RV730_me.bin"); +MODULE_FIRMWARE("radeon/RV710_pfp.bin"); +MODULE_FIRMWARE("radeon/RV710_me.bin"); + +int r600_debugfs_mc_info_init(struct radeon_device *rdev); /* This files gather functions specifics to: * r600,rv610,rv630,rv620,rv635,rv670 @@ -39,87 +73,270 @@ void rs600_mc_disable_clients(struct radeon_device *rdev); */ int r600_mc_wait_for_idle(struct radeon_device *rdev); void r600_gpu_init(struct radeon_device *rdev); +void r600_fini(struct radeon_device *rdev); /* - * MC + * R600 PCIE GART */ -int r600_mc_init(struct radeon_device *rdev) +int r600_gart_clear_page(struct radeon_device *rdev, int i) { - uint32_t tmp; + void __iomem *ptr = (void *)rdev->gart.table.vram.ptr; + u64 pte; - r600_gpu_init(rdev); + if (i < 0 || i > rdev->gart.num_gpu_pages) + return -EINVAL; + pte = 0; + writeq(pte, ((void __iomem *)ptr) + (i * 8)); + return 0; +} - /* setup the gart before changing location so we can ask to - * discard unmapped mc request - */ - /* FIXME: disable out of gart access */ - tmp = rdev->mc.gtt_location / 4096; - tmp = REG_SET(R600_LOGICAL_PAGE_NUMBER, tmp); - WREG32(R600_MC_VM_SYSTEM_APERTURE_LOW_ADDR, tmp); - tmp = (rdev->mc.gtt_location + rdev->mc.gtt_size) / 4096; - tmp = REG_SET(R600_LOGICAL_PAGE_NUMBER, tmp); - WREG32(R600_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, tmp); - - rs600_mc_disable_clients(rdev); - if (r600_mc_wait_for_idle(rdev)) { - printk(KERN_WARNING "Failed to wait MC idle while " - "programming pipes. Bad things might happen.\n"); +void r600_pcie_gart_tlb_flush(struct radeon_device *rdev) +{ + unsigned i; + u32 tmp; + + WREG32(VM_CONTEXT0_INVALIDATION_LOW_ADDR, rdev->mc.gtt_start >> 12); + WREG32(VM_CONTEXT0_INVALIDATION_HIGH_ADDR, (rdev->mc.gtt_end - 1) >> 12); + WREG32(VM_CONTEXT0_REQUEST_RESPONSE, REQUEST_TYPE(1)); + for (i = 0; i < rdev->usec_timeout; i++) { + /* read MC_STATUS */ + tmp = RREG32(VM_CONTEXT0_REQUEST_RESPONSE); + tmp = (tmp & RESPONSE_TYPE_MASK) >> RESPONSE_TYPE_SHIFT; + if (tmp == 2) { + printk(KERN_WARNING "[drm] r600 flush TLB failed\n"); + return; + } + if (tmp) { + return; + } + udelay(1); + } +} + +int r600_pcie_gart_enable(struct radeon_device *rdev) +{ + u32 tmp; + int r, i; + + /* Initialize common gart structure */ + r = radeon_gart_init(rdev); + if (r) { + return r; + } + rdev->gart.table_size = rdev->gart.num_gpu_pages * 8; + r = radeon_gart_table_vram_alloc(rdev); + if (r) { + return r; } + for (i = 0; i < rdev->gart.num_gpu_pages; i++) + r600_gart_clear_page(rdev, i); + /* Setup L2 cache */ + WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING | + ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | + EFFECTIVE_L2_QUEUE_SIZE(7)); + WREG32(VM_L2_CNTL2, 0); + WREG32(VM_L2_CNTL3, BANK_SELECT_0(0) | BANK_SELECT_1(1)); + /* Setup TLB control */ + tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING | + SYSTEM_ACCESS_MODE_NOT_IN_SYS | + EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5) | + ENABLE_WAIT_L2_QUERY; + WREG32(MC_VM_L1_TLB_MCB_RD_SYS_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp | ENABLE_L1_STRICT_ORDERING); + WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCD_RD_A_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCD_WR_A_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCD_RD_B_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCD_WR_B_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_GFX_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE); + WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE); + WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end - 1) >> 12); + WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); + WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT); + WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, + (u32)(rdev->dummy_page.addr >> 12)); + for (i = 1; i < 7; i++) + WREG32(VM_CONTEXT0_CNTL + (i * 4), 0); - tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1; - tmp = REG_SET(R600_MC_FB_TOP, tmp >> 24); - tmp |= REG_SET(R600_MC_FB_BASE, rdev->mc.vram_location >> 24); - WREG32(R600_MC_VM_FB_LOCATION, tmp); - tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1; - tmp = REG_SET(R600_MC_AGP_TOP, tmp >> 22); - WREG32(R600_MC_VM_AGP_TOP, tmp); - tmp = REG_SET(R600_MC_AGP_BOT, rdev->mc.gtt_location >> 22); - WREG32(R600_MC_VM_AGP_BOT, tmp); + r600_pcie_gart_tlb_flush(rdev); + rdev->gart.ready = true; return 0; } -void r600_mc_fini(struct radeon_device *rdev) +void r600_pcie_gart_disable(struct radeon_device *rdev) { - /* FIXME: implement */ -} + u32 tmp; + int i; + /* Clear ptes*/ + for (i = 0; i < rdev->gart.num_gpu_pages; i++) + r600_gart_clear_page(rdev, i); + r600_pcie_gart_tlb_flush(rdev); + /* Disable all tables */ + for (i = 0; i < 7; i++) + WREG32(VM_CONTEXT0_CNTL + (i * 4), 0); -/* - * Global GPU functions - */ -void r600_errata(struct radeon_device *rdev) -{ - rdev->pll_errata = 0; + /* Disable L2 cache */ + WREG32(VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING | + EFFECTIVE_L2_QUEUE_SIZE(7)); + WREG32(VM_L2_CNTL3, BANK_SELECT_0(0) | BANK_SELECT_1(1)); + /* Setup L1 TLB control */ + tmp = EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5) | + ENABLE_WAIT_L2_QUERY; + WREG32(MC_VM_L1_TLB_MCD_RD_A_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCD_WR_A_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCD_RD_B_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCD_WR_B_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_GFX_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_SYS_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp); } int r600_mc_wait_for_idle(struct radeon_device *rdev) { - /* FIXME: implement */ - return 0; + unsigned i; + u32 tmp; + + for (i = 0; i < rdev->usec_timeout; i++) { + /* read MC_STATUS */ + tmp = RREG32(R_000E50_SRBM_STATUS) & 0x3F00; + if (!tmp) + return 0; + udelay(1); + } + return -1; } -void r600_gpu_init(struct radeon_device *rdev) +static void r600_mc_resume(struct radeon_device *rdev) { - /* FIXME: implement */ -} + u32 d1vga_control, d2vga_control; + u32 vga_render_control, vga_hdp_control; + u32 d1crtc_control, d2crtc_control; + u32 new_d1grph_primary, new_d1grph_secondary; + u32 new_d2grph_primary, new_d2grph_secondary; + u64 old_vram_start; + u32 tmp; + int i, j; + /* Initialize HDP */ + for (i = 0, j = 0; i < 32; i++, j += 0x18) { + WREG32((0x2c14 + j), 0x00000000); + WREG32((0x2c18 + j), 0x00000000); + WREG32((0x2c1c + j), 0x00000000); + WREG32((0x2c20 + j), 0x00000000); + WREG32((0x2c24 + j), 0x00000000); + } + WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0); -/* - * VRAM info - */ -void r600_vram_get_type(struct radeon_device *rdev) + d1vga_control = RREG32(D1VGA_CONTROL); + d2vga_control = RREG32(D2VGA_CONTROL); + vga_render_control = RREG32(VGA_RENDER_CONTROL); + vga_hdp_control = RREG32(VGA_HDP_CONTROL); + d1crtc_control = RREG32(D1CRTC_CONTROL); + d2crtc_control = RREG32(D2CRTC_CONTROL); + old_vram_start = (u64)(RREG32(MC_VM_FB_LOCATION) & 0xFFFF) << 24; + new_d1grph_primary = RREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS); + new_d1grph_secondary = RREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS); + new_d1grph_primary += rdev->mc.vram_start - old_vram_start; + new_d1grph_secondary += rdev->mc.vram_start - old_vram_start; + new_d2grph_primary = RREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS); + new_d2grph_secondary = RREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS); + new_d2grph_primary += rdev->mc.vram_start - old_vram_start; + new_d2grph_secondary += rdev->mc.vram_start - old_vram_start; + + /* Stop all video */ + WREG32(D1VGA_CONTROL, 0); + WREG32(D2VGA_CONTROL, 0); + WREG32(VGA_RENDER_CONTROL, 0); + WREG32(D1CRTC_UPDATE_LOCK, 1); + WREG32(D2CRTC_UPDATE_LOCK, 1); + WREG32(D1CRTC_CONTROL, 0); + WREG32(D2CRTC_CONTROL, 0); + WREG32(D1CRTC_UPDATE_LOCK, 0); + WREG32(D2CRTC_UPDATE_LOCK, 0); + + mdelay(1); + if (r600_mc_wait_for_idle(rdev)) { + printk(KERN_WARNING "[drm] MC not idle !\n"); + } + + /* Lockout access through VGA aperture*/ + WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE); + + /* Update configuration */ + WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, rdev->mc.vram_start >> 12); + WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (rdev->mc.vram_end - 1) >> 12); + WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0); + tmp = (((rdev->mc.vram_end - 1) >> 24) & 0xFFFF) << 16; + tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF); + WREG32(MC_VM_FB_LOCATION, tmp); + WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8)); + WREG32(HDP_NONSURFACE_INFO, (2 << 7)); + WREG32(HDP_NONSURFACE_SIZE, (rdev->mc.mc_vram_size - 1) | 0x3FF); + if (rdev->flags & RADEON_IS_AGP) { + WREG32(MC_VM_AGP_TOP, (rdev->mc.gtt_end - 1) >> 16); + WREG32(MC_VM_AGP_BOT, rdev->mc.gtt_start >> 16); + WREG32(MC_VM_AGP_BASE, rdev->mc.agp_base >> 22); + } else { + WREG32(MC_VM_AGP_BASE, 0); + WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF); + WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF); + } + WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS, new_d1grph_primary); + WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS, new_d1grph_secondary); + WREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS, new_d2grph_primary); + WREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS, new_d2grph_secondary); + WREG32(VGA_MEMORY_BASE_ADDRESS, rdev->mc.vram_start); + + /* Unlock host access */ + WREG32(VGA_HDP_CONTROL, vga_hdp_control); + + mdelay(1); + if (r600_mc_wait_for_idle(rdev)) { + printk(KERN_WARNING "[drm] MC not idle !\n"); + } + + /* Restore video state */ + WREG32(D1CRTC_UPDATE_LOCK, 1); + WREG32(D2CRTC_UPDATE_LOCK, 1); + WREG32(D1CRTC_CONTROL, d1crtc_control); + WREG32(D2CRTC_CONTROL, d2crtc_control); + WREG32(D1CRTC_UPDATE_LOCK, 0); + WREG32(D2CRTC_UPDATE_LOCK, 0); + WREG32(D1VGA_CONTROL, d1vga_control); + WREG32(D2VGA_CONTROL, d2vga_control); + WREG32(VGA_RENDER_CONTROL, vga_render_control); +} + +int r600_mc_init(struct radeon_device *rdev) { - uint32_t tmp; + fixed20_12 a; + u32 tmp; int chansize; + int r; + /* Get VRAM informations */ rdev->mc.vram_width = 128; rdev->mc.vram_is_ddr = true; - - tmp = RREG32(R600_RAMCFG); - if (tmp & R600_CHANSIZE_OVERRIDE) { + tmp = RREG32(RAMCFG); + if (tmp & CHANSIZE_OVERRIDE) { chansize = 16; - } else if (tmp & R600_CHANSIZE) { + } else if (tmp & CHANSIZE_MASK) { chansize = 64; } else { chansize = 32; @@ -135,36 +352,1391 @@ void r600_vram_get_type(struct radeon_device *rdev) (rdev->family == CHIP_RV635)) { rdev->mc.vram_width = 2 * chansize; } + /* Could aper size report 0 ? */ + rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0); + rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0); + /* Setup GPU memory space */ + rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE); + rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE); + if (rdev->flags & RADEON_IS_AGP) { + r = radeon_agp_init(rdev); + if (r) + return r; + /* gtt_size is setup by radeon_agp_init */ + rdev->mc.gtt_location = rdev->mc.agp_base; + tmp = 0xFFFFFFFFUL - rdev->mc.agp_base - rdev->mc.gtt_size; + /* Try to put vram before or after AGP because we + * we want SYSTEM_APERTURE to cover both VRAM and + * AGP so that GPU can catch out of VRAM/AGP access + */ + if (rdev->mc.gtt_location > rdev->mc.mc_vram_size) { + /* Enought place before */ + rdev->mc.vram_location = rdev->mc.gtt_location - + rdev->mc.mc_vram_size; + } else if (tmp > rdev->mc.mc_vram_size) { + /* Enought place after */ + rdev->mc.vram_location = rdev->mc.gtt_location + + rdev->mc.gtt_size; + } else { + /* Try to setup VRAM then AGP might not + * not work on some card + */ + rdev->mc.vram_location = 0x00000000UL; + rdev->mc.gtt_location = rdev->mc.mc_vram_size; + } + } else { + if (rdev->family == CHIP_RS780 || rdev->family == CHIP_RS880) { + rdev->mc.vram_location = (RREG32(MC_VM_FB_LOCATION) & + 0xFFFF) << 24; + rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024; + tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size; + if ((0xFFFFFFFFUL - tmp) >= rdev->mc.gtt_size) { + /* Enough place after vram */ + rdev->mc.gtt_location = tmp; + } else if (rdev->mc.vram_location >= rdev->mc.gtt_size) { + /* Enough place before vram */ + rdev->mc.gtt_location = 0; + } else { + /* Not enough place after or before shrink + * gart size + */ + if (rdev->mc.vram_location > (0xFFFFFFFFUL - tmp)) { + rdev->mc.gtt_location = 0; + rdev->mc.gtt_size = rdev->mc.vram_location; + } else { + rdev->mc.gtt_location = tmp; + rdev->mc.gtt_size = 0xFFFFFFFFUL - tmp; + } + } + rdev->mc.gtt_location = rdev->mc.mc_vram_size; + } else { + rdev->mc.vram_location = 0x00000000UL; + rdev->mc.gtt_location = rdev->mc.mc_vram_size; + rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024; + } + } + rdev->mc.vram_start = rdev->mc.vram_location; + rdev->mc.vram_end = rdev->mc.vram_location + rdev->mc.mc_vram_size; + rdev->mc.gtt_start = rdev->mc.gtt_location; + rdev->mc.gtt_end = rdev->mc.gtt_location + rdev->mc.gtt_size; + /* FIXME: we should enforce default clock in case GPU is not in + * default setup + */ + a.full = rfixed_const(100); + rdev->pm.sclk.full = rfixed_const(rdev->clock.default_sclk); + rdev->pm.sclk.full = rfixed_div(rdev->pm.sclk, a); + return 0; } -void r600_vram_info(struct radeon_device *rdev) +/* We doesn't check that the GPU really needs a reset we simply do the + * reset, it's up to the caller to determine if the GPU needs one. We + * might add an helper function to check that. + */ +int r600_gpu_soft_reset(struct radeon_device *rdev) { - r600_vram_get_type(rdev); - rdev->mc.real_vram_size = RREG32(R600_CONFIG_MEMSIZE); - rdev->mc.mc_vram_size = rdev->mc.real_vram_size; + u32 grbm_busy_mask = S_008010_VC_BUSY(1) | S_008010_VGT_BUSY_NO_DMA(1) | + S_008010_VGT_BUSY(1) | S_008010_TA03_BUSY(1) | + S_008010_TC_BUSY(1) | S_008010_SX_BUSY(1) | + S_008010_SH_BUSY(1) | S_008010_SPI03_BUSY(1) | + S_008010_SMX_BUSY(1) | S_008010_SC_BUSY(1) | + S_008010_PA_BUSY(1) | S_008010_DB03_BUSY(1) | + S_008010_CR_BUSY(1) | S_008010_CB03_BUSY(1) | + S_008010_GUI_ACTIVE(1); + u32 grbm2_busy_mask = S_008014_SPI0_BUSY(1) | S_008014_SPI1_BUSY(1) | + S_008014_SPI2_BUSY(1) | S_008014_SPI3_BUSY(1) | + S_008014_TA0_BUSY(1) | S_008014_TA1_BUSY(1) | + S_008014_TA2_BUSY(1) | S_008014_TA3_BUSY(1) | + S_008014_DB0_BUSY(1) | S_008014_DB1_BUSY(1) | + S_008014_DB2_BUSY(1) | S_008014_DB3_BUSY(1) | + S_008014_CB0_BUSY(1) | S_008014_CB1_BUSY(1) | + S_008014_CB2_BUSY(1) | S_008014_CB3_BUSY(1); + u32 srbm_reset = 0; - /* Could aper size report 0 ? */ - rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0); - rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0); + /* Disable CP parsing/prefetching */ + WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(0xff)); + /* Check if any of the rendering block is busy and reset it */ + if ((RREG32(R_008010_GRBM_STATUS) & grbm_busy_mask) || + (RREG32(R_008014_GRBM_STATUS2) & grbm2_busy_mask)) { + WREG32(R_008020_GRBM_SOFT_RESET, S_008020_SOFT_RESET_CR(1) | + S_008020_SOFT_RESET_DB(1) | + S_008020_SOFT_RESET_CB(1) | + S_008020_SOFT_RESET_PA(1) | + S_008020_SOFT_RESET_SC(1) | + S_008020_SOFT_RESET_SMX(1) | + S_008020_SOFT_RESET_SPI(1) | + S_008020_SOFT_RESET_SX(1) | + S_008020_SOFT_RESET_SH(1) | + S_008020_SOFT_RESET_TC(1) | + S_008020_SOFT_RESET_TA(1) | + S_008020_SOFT_RESET_VC(1) | + S_008020_SOFT_RESET_VGT(1)); + (void)RREG32(R_008020_GRBM_SOFT_RESET); + udelay(50); + WREG32(R_008020_GRBM_SOFT_RESET, 0); + (void)RREG32(R_008020_GRBM_SOFT_RESET); + } + /* Reset CP (we always reset CP) */ + WREG32(R_008020_GRBM_SOFT_RESET, S_008020_SOFT_RESET_CP(1)); + (void)RREG32(R_008020_GRBM_SOFT_RESET); + udelay(50); + WREG32(R_008020_GRBM_SOFT_RESET, 0); + (void)RREG32(R_008020_GRBM_SOFT_RESET); + /* Reset others GPU block if necessary */ + if (G_000E50_RLC_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_RLC(1); + if (G_000E50_GRBM_RQ_PENDING(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_GRBM(1); + if (G_000E50_HI_RQ_PENDING(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_IH(1); + if (G_000E50_VMC_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_VMC(1); + if (G_000E50_MCB_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_MC(1); + if (G_000E50_MCDZ_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_MC(1); + if (G_000E50_MCDY_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_MC(1); + if (G_000E50_MCDX_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_MC(1); + if (G_000E50_MCDW_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_MC(1); + if (G_000E50_RLC_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_RLC(1); + if (G_000E50_SEM_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_SEM(1); + WREG32(R_000E60_SRBM_SOFT_RESET, srbm_reset); + (void)RREG32(R_000E60_SRBM_SOFT_RESET); + udelay(50); + WREG32(R_000E60_SRBM_SOFT_RESET, 0); + (void)RREG32(R_000E60_SRBM_SOFT_RESET); + /* Wait a little for things to settle down */ + udelay(50); + return 0; +} + +int r600_gpu_reset(struct radeon_device *rdev) +{ + return r600_gpu_soft_reset(rdev); +} + +static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes, + u32 num_backends, + u32 backend_disable_mask) +{ + u32 backend_map = 0; + u32 enabled_backends_mask; + u32 enabled_backends_count; + u32 cur_pipe; + u32 swizzle_pipe[R6XX_MAX_PIPES]; + u32 cur_backend; + u32 i; + + if (num_tile_pipes > R6XX_MAX_PIPES) + num_tile_pipes = R6XX_MAX_PIPES; + if (num_tile_pipes < 1) + num_tile_pipes = 1; + if (num_backends > R6XX_MAX_BACKENDS) + num_backends = R6XX_MAX_BACKENDS; + if (num_backends < 1) + num_backends = 1; + + enabled_backends_mask = 0; + enabled_backends_count = 0; + for (i = 0; i < R6XX_MAX_BACKENDS; ++i) { + if (((backend_disable_mask >> i) & 1) == 0) { + enabled_backends_mask |= (1 << i); + ++enabled_backends_count; + } + if (enabled_backends_count == num_backends) + break; + } + + if (enabled_backends_count == 0) { + enabled_backends_mask = 1; + enabled_backends_count = 1; + } + + if (enabled_backends_count != num_backends) + num_backends = enabled_backends_count; + + memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R6XX_MAX_PIPES); + switch (num_tile_pipes) { + case 1: + swizzle_pipe[0] = 0; + break; + case 2: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + break; + case 3: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + swizzle_pipe[2] = 2; + break; + case 4: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + swizzle_pipe[2] = 2; + swizzle_pipe[3] = 3; + break; + case 5: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + swizzle_pipe[2] = 2; + swizzle_pipe[3] = 3; + swizzle_pipe[4] = 4; + break; + case 6: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 5; + swizzle_pipe[4] = 1; + swizzle_pipe[5] = 3; + break; + case 7: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 6; + swizzle_pipe[4] = 1; + swizzle_pipe[5] = 3; + swizzle_pipe[6] = 5; + break; + case 8: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 6; + swizzle_pipe[4] = 1; + swizzle_pipe[5] = 3; + swizzle_pipe[6] = 5; + swizzle_pipe[7] = 7; + break; + } + + cur_backend = 0; + for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) { + while (((1 << cur_backend) & enabled_backends_mask) == 0) + cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS; + + backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2))); + + cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS; + } + + return backend_map; +} + +int r600_count_pipe_bits(uint32_t val) +{ + int i, ret = 0; + + for (i = 0; i < 32; i++) { + ret += val & 1; + val >>= 1; + } + return ret; } +void r600_gpu_init(struct radeon_device *rdev) +{ + u32 tiling_config; + u32 ramcfg; + u32 tmp; + int i, j; + u32 sq_config; + u32 sq_gpr_resource_mgmt_1 = 0; + u32 sq_gpr_resource_mgmt_2 = 0; + u32 sq_thread_resource_mgmt = 0; + u32 sq_stack_resource_mgmt_1 = 0; + u32 sq_stack_resource_mgmt_2 = 0; + + /* FIXME: implement */ + switch (rdev->family) { + case CHIP_R600: + rdev->config.r600.max_pipes = 4; + rdev->config.r600.max_tile_pipes = 8; + rdev->config.r600.max_simds = 4; + rdev->config.r600.max_backends = 4; + rdev->config.r600.max_gprs = 256; + rdev->config.r600.max_threads = 192; + rdev->config.r600.max_stack_entries = 256; + rdev->config.r600.max_hw_contexts = 8; + rdev->config.r600.max_gs_threads = 16; + rdev->config.r600.sx_max_export_size = 128; + rdev->config.r600.sx_max_export_pos_size = 16; + rdev->config.r600.sx_max_export_smx_size = 128; + rdev->config.r600.sq_num_cf_insts = 2; + break; + case CHIP_RV630: + case CHIP_RV635: + rdev->config.r600.max_pipes = 2; + rdev->config.r600.max_tile_pipes = 2; + rdev->config.r600.max_simds = 3; + rdev->config.r600.max_backends = 1; + rdev->config.r600.max_gprs = 128; + rdev->config.r600.max_threads = 192; + rdev->config.r600.max_stack_entries = 128; + rdev->config.r600.max_hw_contexts = 8; + rdev->config.r600.max_gs_threads = 4; + rdev->config.r600.sx_max_export_size = 128; + rdev->config.r600.sx_max_export_pos_size = 16; + rdev->config.r600.sx_max_export_smx_size = 128; + rdev->config.r600.sq_num_cf_insts = 2; + break; + case CHIP_RV610: + case CHIP_RV620: + case CHIP_RS780: + case CHIP_RS880: + rdev->config.r600.max_pipes = 1; + rdev->config.r600.max_tile_pipes = 1; + rdev->config.r600.max_simds = 2; + rdev->config.r600.max_backends = 1; + rdev->config.r600.max_gprs = 128; + rdev->config.r600.max_threads = 192; + rdev->config.r600.max_stack_entries = 128; + rdev->config.r600.max_hw_contexts = 4; + rdev->config.r600.max_gs_threads = 4; + rdev->config.r600.sx_max_export_size = 128; + rdev->config.r600.sx_max_export_pos_size = 16; + rdev->config.r600.sx_max_export_smx_size = 128; + rdev->config.r600.sq_num_cf_insts = 1; + break; + case CHIP_RV670: + rdev->config.r600.max_pipes = 4; + rdev->config.r600.max_tile_pipes = 4; + rdev->config.r600.max_simds = 4; + rdev->config.r600.max_backends = 4; + rdev->config.r600.max_gprs = 192; + rdev->config.r600.max_threads = 192; + rdev->config.r600.max_stack_entries = 256; + rdev->config.r600.max_hw_contexts = 8; + rdev->config.r600.max_gs_threads = 16; + rdev->config.r600.sx_max_export_size = 128; + rdev->config.r600.sx_max_export_pos_size = 16; + rdev->config.r600.sx_max_export_smx_size = 128; + rdev->config.r600.sq_num_cf_insts = 2; + break; + default: + break; + } + + /* Initialize HDP */ + for (i = 0, j = 0; i < 32; i++, j += 0x18) { + WREG32((0x2c14 + j), 0x00000000); + WREG32((0x2c18 + j), 0x00000000); + WREG32((0x2c1c + j), 0x00000000); + WREG32((0x2c20 + j), 0x00000000); + WREG32((0x2c24 + j), 0x00000000); + } + + WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff)); + + /* Setup tiling */ + tiling_config = 0; + ramcfg = RREG32(RAMCFG); + switch (rdev->config.r600.max_tile_pipes) { + case 1: + tiling_config |= PIPE_TILING(0); + break; + case 2: + tiling_config |= PIPE_TILING(1); + break; + case 4: + tiling_config |= PIPE_TILING(2); + break; + case 8: + tiling_config |= PIPE_TILING(3); + break; + default: + break; + } + tiling_config |= BANK_TILING((ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT); + tiling_config |= GROUP_SIZE(0); + tmp = (ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT; + if (tmp > 3) { + tiling_config |= ROW_TILING(3); + tiling_config |= SAMPLE_SPLIT(3); + } else { + tiling_config |= ROW_TILING(tmp); + tiling_config |= SAMPLE_SPLIT(tmp); + } + tiling_config |= BANK_SWAPS(1); + tmp = r600_get_tile_pipe_to_backend_map(rdev->config.r600.max_tile_pipes, + rdev->config.r600.max_backends, + (0xff << rdev->config.r600.max_backends) & 0xff); + tiling_config |= BACKEND_MAP(tmp); + WREG32(GB_TILING_CONFIG, tiling_config); + WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff); + WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff); + + tmp = BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << rdev->config.r600.max_backends) & R6XX_MAX_BACKENDS_MASK); + WREG32(CC_RB_BACKEND_DISABLE, tmp); + + /* Setup pipes */ + tmp = INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << rdev->config.r600.max_pipes) & R6XX_MAX_PIPES_MASK); + tmp |= INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << rdev->config.r600.max_simds) & R6XX_MAX_SIMDS_MASK); + WREG32(CC_GC_SHADER_PIPE_CONFIG, tmp); + WREG32(GC_USER_SHADER_PIPE_CONFIG, tmp); + + tmp = R6XX_MAX_BACKENDS - r600_count_pipe_bits(tmp & INACTIVE_QD_PIPES_MASK); + WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK); + WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((tmp * 4) - 2) & VTX_REUSE_DEPTH_MASK); + + /* Setup some CP states */ + WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) | ROQ_IB2_START(0x2b))); + WREG32(CP_MEQ_THRESHOLDS, (MEQ_END(0x40) | ROQ_END(0x40))); + + WREG32(TA_CNTL_AUX, (DISABLE_CUBE_ANISO | SYNC_GRADIENT | + SYNC_WALKER | SYNC_ALIGNER)); + /* Setup various GPU states */ + if (rdev->family == CHIP_RV670) + WREG32(ARB_GDEC_RD_CNTL, 0x00000021); + + tmp = RREG32(SX_DEBUG_1); + tmp |= SMX_EVENT_RELEASE; + if ((rdev->family > CHIP_R600)) + tmp |= ENABLE_NEW_SMX_ADDRESS; + WREG32(SX_DEBUG_1, tmp); + + if (((rdev->family) == CHIP_R600) || + ((rdev->family) == CHIP_RV630) || + ((rdev->family) == CHIP_RV610) || + ((rdev->family) == CHIP_RV620) || + ((rdev->family) == CHIP_RS780)) { + WREG32(DB_DEBUG, PREZ_MUST_WAIT_FOR_POSTZ_DONE); + } else { + WREG32(DB_DEBUG, 0); + } + WREG32(DB_WATERMARKS, (DEPTH_FREE(4) | DEPTH_CACHELINE_FREE(16) | + DEPTH_FLUSH(16) | DEPTH_PENDING_FREE(4))); + + WREG32(PA_SC_MULTI_CHIP_CNTL, 0); + WREG32(VGT_NUM_INSTANCES, 0); + + WREG32(SPI_CONFIG_CNTL, GPR_WRITE_PRIORITY(0)); + WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(0)); + + tmp = RREG32(SQ_MS_FIFO_SIZES); + if (((rdev->family) == CHIP_RV610) || + ((rdev->family) == CHIP_RV620) || + ((rdev->family) == CHIP_RS780)) { + tmp = (CACHE_FIFO_SIZE(0xa) | + FETCH_FIFO_HIWATER(0xa) | + DONE_FIFO_HIWATER(0xe0) | + ALU_UPDATE_FIFO_HIWATER(0x8)); + } else if (((rdev->family) == CHIP_R600) || + ((rdev->family) == CHIP_RV630)) { + tmp &= ~DONE_FIFO_HIWATER(0xff); + tmp |= DONE_FIFO_HIWATER(0x4); + } + WREG32(SQ_MS_FIFO_SIZES, tmp); + + /* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT + * should be adjusted as needed by the 2D/3D drivers. This just sets default values + */ + sq_config = RREG32(SQ_CONFIG); + sq_config &= ~(PS_PRIO(3) | + VS_PRIO(3) | + GS_PRIO(3) | + ES_PRIO(3)); + sq_config |= (DX9_CONSTS | + VC_ENABLE | + PS_PRIO(0) | + VS_PRIO(1) | + GS_PRIO(2) | + ES_PRIO(3)); + + if ((rdev->family) == CHIP_R600) { + sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(124) | + NUM_VS_GPRS(124) | + NUM_CLAUSE_TEMP_GPRS(4)); + sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(0) | + NUM_ES_GPRS(0)); + sq_thread_resource_mgmt = (NUM_PS_THREADS(136) | + NUM_VS_THREADS(48) | + NUM_GS_THREADS(4) | + NUM_ES_THREADS(4)); + sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(128) | + NUM_VS_STACK_ENTRIES(128)); + sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(0) | + NUM_ES_STACK_ENTRIES(0)); + } else if (((rdev->family) == CHIP_RV610) || + ((rdev->family) == CHIP_RV620) || + ((rdev->family) == CHIP_RS780)) { + /* no vertex cache */ + sq_config &= ~VC_ENABLE; + + sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(44) | + NUM_VS_GPRS(44) | + NUM_CLAUSE_TEMP_GPRS(2)); + sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(17) | + NUM_ES_GPRS(17)); + sq_thread_resource_mgmt = (NUM_PS_THREADS(79) | + NUM_VS_THREADS(78) | + NUM_GS_THREADS(4) | + NUM_ES_THREADS(31)); + sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(40) | + NUM_VS_STACK_ENTRIES(40)); + sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(32) | + NUM_ES_STACK_ENTRIES(16)); + } else if (((rdev->family) == CHIP_RV630) || + ((rdev->family) == CHIP_RV635)) { + sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(44) | + NUM_VS_GPRS(44) | + NUM_CLAUSE_TEMP_GPRS(2)); + sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(18) | + NUM_ES_GPRS(18)); + sq_thread_resource_mgmt = (NUM_PS_THREADS(79) | + NUM_VS_THREADS(78) | + NUM_GS_THREADS(4) | + NUM_ES_THREADS(31)); + sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(40) | + NUM_VS_STACK_ENTRIES(40)); + sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(32) | + NUM_ES_STACK_ENTRIES(16)); + } else if ((rdev->family) == CHIP_RV670) { + sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(44) | + NUM_VS_GPRS(44) | + NUM_CLAUSE_TEMP_GPRS(2)); + sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(17) | + NUM_ES_GPRS(17)); + sq_thread_resource_mgmt = (NUM_PS_THREADS(79) | + NUM_VS_THREADS(78) | + NUM_GS_THREADS(4) | + NUM_ES_THREADS(31)); + sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(64) | + NUM_VS_STACK_ENTRIES(64)); + sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(64) | + NUM_ES_STACK_ENTRIES(64)); + } + + WREG32(SQ_CONFIG, sq_config); + WREG32(SQ_GPR_RESOURCE_MGMT_1, sq_gpr_resource_mgmt_1); + WREG32(SQ_GPR_RESOURCE_MGMT_2, sq_gpr_resource_mgmt_2); + WREG32(SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt); + WREG32(SQ_STACK_RESOURCE_MGMT_1, sq_stack_resource_mgmt_1); + WREG32(SQ_STACK_RESOURCE_MGMT_2, sq_stack_resource_mgmt_2); + + if (((rdev->family) == CHIP_RV610) || + ((rdev->family) == CHIP_RV620) || + ((rdev->family) == CHIP_RS780)) { + WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(TC_ONLY)); + } else { + WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC)); + } + + /* More default values. 2D/3D driver should adjust as needed */ + WREG32(PA_SC_AA_SAMPLE_LOCS_2S, (S0_X(0xc) | S0_Y(0x4) | + S1_X(0x4) | S1_Y(0xc))); + WREG32(PA_SC_AA_SAMPLE_LOCS_4S, (S0_X(0xe) | S0_Y(0xe) | + S1_X(0x2) | S1_Y(0x2) | + S2_X(0xa) | S2_Y(0x6) | + S3_X(0x6) | S3_Y(0xa))); + WREG32(PA_SC_AA_SAMPLE_LOCS_8S_WD0, (S0_X(0xe) | S0_Y(0xb) | + S1_X(0x4) | S1_Y(0xc) | + S2_X(0x1) | S2_Y(0x6) | + S3_X(0xa) | S3_Y(0xe))); + WREG32(PA_SC_AA_SAMPLE_LOCS_8S_WD1, (S4_X(0x6) | S4_Y(0x1) | + S5_X(0x0) | S5_Y(0x0) | + S6_X(0xb) | S6_Y(0x4) | + S7_X(0x7) | S7_Y(0x8))); + + WREG32(VGT_STRMOUT_EN, 0); + tmp = rdev->config.r600.max_pipes * 16; + switch (rdev->family) { + case CHIP_RV610: + case CHIP_RS780: + case CHIP_RV620: + tmp += 32; + break; + case CHIP_RV670: + tmp += 128; + break; + default: + break; + } + if (tmp > 256) { + tmp = 256; + } + WREG32(VGT_ES_PER_GS, 128); + WREG32(VGT_GS_PER_ES, tmp); + WREG32(VGT_GS_PER_VS, 2); + WREG32(VGT_GS_VERTEX_REUSE, 16); + + /* more default values. 2D/3D driver should adjust as needed */ + WREG32(PA_SC_LINE_STIPPLE_STATE, 0); + WREG32(VGT_STRMOUT_EN, 0); + WREG32(SX_MISC, 0); + WREG32(PA_SC_MODE_CNTL, 0); + WREG32(PA_SC_AA_CONFIG, 0); + WREG32(PA_SC_LINE_STIPPLE, 0); + WREG32(SPI_INPUT_Z, 0); + WREG32(SPI_PS_IN_CONTROL_0, NUM_INTERP(2)); + WREG32(CB_COLOR7_FRAG, 0); + + /* Clear render buffer base addresses */ + WREG32(CB_COLOR0_BASE, 0); + WREG32(CB_COLOR1_BASE, 0); + WREG32(CB_COLOR2_BASE, 0); + WREG32(CB_COLOR3_BASE, 0); + WREG32(CB_COLOR4_BASE, 0); + WREG32(CB_COLOR5_BASE, 0); + WREG32(CB_COLOR6_BASE, 0); + WREG32(CB_COLOR7_BASE, 0); + WREG32(CB_COLOR7_FRAG, 0); + + switch (rdev->family) { + case CHIP_RV610: + case CHIP_RS780: + case CHIP_RV620: + tmp = TC_L2_SIZE(8); + break; + case CHIP_RV630: + case CHIP_RV635: + tmp = TC_L2_SIZE(4); + break; + case CHIP_R600: + tmp = TC_L2_SIZE(0) | L2_DISABLE_LATE_HIT; + break; + default: + tmp = TC_L2_SIZE(0); + break; + } + WREG32(TC_CNTL, tmp); + + tmp = RREG32(HDP_HOST_PATH_CNTL); + WREG32(HDP_HOST_PATH_CNTL, tmp); + + tmp = RREG32(ARB_POP); + tmp |= ENABLE_TC128; + WREG32(ARB_POP, tmp); + + WREG32(PA_SC_MULTI_CHIP_CNTL, 0); + WREG32(PA_CL_ENHANCE, (CLIP_VTX_REORDER_ENA | + NUM_CLIP_SEQ(3))); + WREG32(PA_SC_ENHANCE, FORCE_EOV_MAX_CLK_CNT(4095)); +} + + /* * Indirect registers accessor */ -uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg) +u32 r600_pciep_rreg(struct radeon_device *rdev, u32 reg) +{ + u32 r; + + WREG32(PCIE_PORT_INDEX, ((reg) & 0xff)); + (void)RREG32(PCIE_PORT_INDEX); + r = RREG32(PCIE_PORT_DATA); + return r; +} + +void r600_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v) +{ + WREG32(PCIE_PORT_INDEX, ((reg) & 0xff)); + (void)RREG32(PCIE_PORT_INDEX); + WREG32(PCIE_PORT_DATA, (v)); + (void)RREG32(PCIE_PORT_DATA); +} + + +/* + * CP & Ring + */ +void r600_cp_stop(struct radeon_device *rdev) +{ + WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1)); +} + +int r600_cp_init_microcode(struct radeon_device *rdev) +{ + struct platform_device *pdev; + const char *chip_name; + size_t pfp_req_size, me_req_size; + char fw_name[30]; + int err; + + DRM_DEBUG("\n"); + + pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0); + err = IS_ERR(pdev); + if (err) { + printk(KERN_ERR "radeon_cp: Failed to register firmware\n"); + return -EINVAL; + } + + switch (rdev->family) { + case CHIP_R600: chip_name = "R600"; break; + case CHIP_RV610: chip_name = "RV610"; break; + case CHIP_RV630: chip_name = "RV630"; break; + case CHIP_RV620: chip_name = "RV620"; break; + case CHIP_RV635: chip_name = "RV635"; break; + case CHIP_RV670: chip_name = "RV670"; break; + case CHIP_RS780: + case CHIP_RS880: chip_name = "RS780"; break; + case CHIP_RV770: chip_name = "RV770"; break; + case CHIP_RV730: + case CHIP_RV740: chip_name = "RV730"; break; + case CHIP_RV710: chip_name = "RV710"; break; + default: BUG(); + } + + if (rdev->family >= CHIP_RV770) { + pfp_req_size = R700_PFP_UCODE_SIZE * 4; + me_req_size = R700_PM4_UCODE_SIZE * 4; + } else { + pfp_req_size = PFP_UCODE_SIZE * 4; + me_req_size = PM4_UCODE_SIZE * 12; + } + + DRM_INFO("Loading %s CP Microcode\n", chip_name); + + snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name); + err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev); + if (err) + goto out; + if (rdev->pfp_fw->size != pfp_req_size) { + printk(KERN_ERR + "r600_cp: Bogus length %zu in firmware \"%s\"\n", + rdev->pfp_fw->size, fw_name); + err = -EINVAL; + goto out; + } + + snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name); + err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev); + if (err) + goto out; + if (rdev->me_fw->size != me_req_size) { + printk(KERN_ERR + "r600_cp: Bogus length %zu in firmware \"%s\"\n", + rdev->me_fw->size, fw_name); + err = -EINVAL; + } +out: + platform_device_unregister(pdev); + + if (err) { + if (err != -EINVAL) + printk(KERN_ERR + "r600_cp: Failed to load firmware \"%s\"\n", + fw_name); + release_firmware(rdev->pfp_fw); + rdev->pfp_fw = NULL; + release_firmware(rdev->me_fw); + rdev->me_fw = NULL; + } + return err; +} + +static int r600_cp_load_microcode(struct radeon_device *rdev) +{ + const __be32 *fw_data; + int i; + + if (!rdev->me_fw || !rdev->pfp_fw) + return -EINVAL; + + r600_cp_stop(rdev); + + WREG32(CP_RB_CNTL, RB_NO_UPDATE | RB_BLKSZ(15) | RB_BUFSZ(3)); + + /* Reset cp */ + WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP); + RREG32(GRBM_SOFT_RESET); + mdelay(15); + WREG32(GRBM_SOFT_RESET, 0); + + WREG32(CP_ME_RAM_WADDR, 0); + + fw_data = (const __be32 *)rdev->me_fw->data; + WREG32(CP_ME_RAM_WADDR, 0); + for (i = 0; i < PM4_UCODE_SIZE * 3; i++) + WREG32(CP_ME_RAM_DATA, + be32_to_cpup(fw_data++)); + + fw_data = (const __be32 *)rdev->pfp_fw->data; + WREG32(CP_PFP_UCODE_ADDR, 0); + for (i = 0; i < PFP_UCODE_SIZE; i++) + WREG32(CP_PFP_UCODE_DATA, + be32_to_cpup(fw_data++)); + + WREG32(CP_PFP_UCODE_ADDR, 0); + WREG32(CP_ME_RAM_WADDR, 0); + WREG32(CP_ME_RAM_RADDR, 0); + return 0; +} + +int r600_cp_start(struct radeon_device *rdev) +{ + int r; + uint32_t cp_me; + + r = radeon_ring_lock(rdev, 7); + if (r) { + DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); + return r; + } + radeon_ring_write(rdev, PACKET3(PACKET3_ME_INITIALIZE, 5)); + radeon_ring_write(rdev, 0x1); + if (rdev->family < CHIP_RV770) { + radeon_ring_write(rdev, 0x3); + radeon_ring_write(rdev, rdev->config.r600.max_hw_contexts - 1); + } else { + radeon_ring_write(rdev, 0x0); + radeon_ring_write(rdev, rdev->config.rv770.max_hw_contexts - 1); + } + radeon_ring_write(rdev, PACKET3_ME_INITIALIZE_DEVICE_ID(1)); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_unlock_commit(rdev); + + cp_me = 0xff; + WREG32(R_0086D8_CP_ME_CNTL, cp_me); + return 0; +} + +int r600_cp_resume(struct radeon_device *rdev) +{ + u32 tmp; + u32 rb_bufsz; + int r; + + /* Reset cp */ + WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP); + RREG32(GRBM_SOFT_RESET); + mdelay(15); + WREG32(GRBM_SOFT_RESET, 0); + + /* Set ring buffer size */ + rb_bufsz = drm_order(rdev->cp.ring_size / 8); +#ifdef __BIG_ENDIAN + WREG32(CP_RB_CNTL, BUF_SWAP_32BIT | RB_NO_UPDATE | + (drm_order(4096/8) << 8) | rb_bufsz); +#else + WREG32(CP_RB_CNTL, RB_NO_UPDATE | (drm_order(4096/8) << 8) | rb_bufsz); +#endif + WREG32(CP_SEM_WAIT_TIMER, 0x4); + + /* Set the write pointer delay */ + WREG32(CP_RB_WPTR_DELAY, 0); + + /* Initialize the ring buffer's read and write pointers */ + tmp = RREG32(CP_RB_CNTL); + WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA); + WREG32(CP_RB_RPTR_WR, 0); + WREG32(CP_RB_WPTR, 0); + WREG32(CP_RB_RPTR_ADDR, rdev->cp.gpu_addr & 0xFFFFFFFF); + WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->cp.gpu_addr)); + mdelay(1); + WREG32(CP_RB_CNTL, tmp); + + WREG32(CP_RB_BASE, rdev->cp.gpu_addr >> 8); + WREG32(CP_DEBUG, (1 << 27) | (1 << 28)); + + rdev->cp.rptr = RREG32(CP_RB_RPTR); + rdev->cp.wptr = RREG32(CP_RB_WPTR); + + r600_cp_start(rdev); + rdev->cp.ready = true; + r = radeon_ring_test(rdev); + if (r) { + rdev->cp.ready = false; + return r; + } + return 0; +} + +void r600_cp_commit(struct radeon_device *rdev) +{ + WREG32(CP_RB_WPTR, rdev->cp.wptr); + (void)RREG32(CP_RB_WPTR); +} + +void r600_ring_init(struct radeon_device *rdev, unsigned ring_size) +{ + u32 rb_bufsz; + + /* Align ring size */ + rb_bufsz = drm_order(ring_size / 8); + ring_size = (1 << (rb_bufsz + 1)) * 4; + rdev->cp.ring_size = ring_size; + rdev->cp.align_mask = 16 - 1; +} + + +/* + * GPU scratch registers helpers function. + */ +void r600_scratch_init(struct radeon_device *rdev) +{ + int i; + + rdev->scratch.num_reg = 7; + for (i = 0; i < rdev->scratch.num_reg; i++) { + rdev->scratch.free[i] = true; + rdev->scratch.reg[i] = SCRATCH_REG0 + (i * 4); + } +} + +int r600_ring_test(struct radeon_device *rdev) +{ + uint32_t scratch; + uint32_t tmp = 0; + unsigned i; + int r; + + r = radeon_scratch_get(rdev, &scratch); + if (r) { + DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r); + return r; + } + WREG32(scratch, 0xCAFEDEAD); + r = radeon_ring_lock(rdev, 3); + if (r) { + DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); + radeon_scratch_free(rdev, scratch); + return r; + } + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(rdev, ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2)); + radeon_ring_write(rdev, 0xDEADBEEF); + radeon_ring_unlock_commit(rdev); + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + if (i < rdev->usec_timeout) { + DRM_INFO("ring test succeeded in %d usecs\n", i); + } else { + DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n", + scratch, tmp); + r = -EINVAL; + } + radeon_scratch_free(rdev, scratch); + return r; +} + +/* + * Writeback + */ +int r600_wb_init(struct radeon_device *rdev) +{ + int r; + + if (rdev->wb.wb_obj == NULL) { + r = radeon_object_create(rdev, NULL, 4096, + true, + RADEON_GEM_DOMAIN_GTT, + false, &rdev->wb.wb_obj); + if (r) { + DRM_ERROR("radeon: failed to create WB buffer (%d).\n", r); + return r; + } + r = radeon_object_pin(rdev->wb.wb_obj, + RADEON_GEM_DOMAIN_GTT, + &rdev->wb.gpu_addr); + if (r) { + DRM_ERROR("radeon: failed to pin WB buffer (%d).\n", r); + return r; + } + r = radeon_object_kmap(rdev->wb.wb_obj, (void **)&rdev->wb.wb); + if (r) { + DRM_ERROR("radeon: failed to map WB buffer (%d).\n", r); + return r; + } + } + WREG32(SCRATCH_ADDR, (rdev->wb.gpu_addr >> 8) & 0xFFFFFFFF); + WREG32(CP_RB_RPTR_ADDR, (rdev->wb.gpu_addr + 1024) & 0xFFFFFFFC); + WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + 1024) & 0xFF); + WREG32(SCRATCH_UMSK, 0xff); + return 0; +} + +void r600_wb_fini(struct radeon_device *rdev) +{ + if (rdev->wb.wb_obj) { + radeon_object_kunmap(rdev->wb.wb_obj); + radeon_object_unpin(rdev->wb.wb_obj); + radeon_object_unref(&rdev->wb.wb_obj); + rdev->wb.wb = NULL; + rdev->wb.wb_obj = NULL; + } +} + + +/* + * CS + */ +void r600_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence) +{ + /* Emit fence sequence & fire IRQ */ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(rdev, ((rdev->fence_drv.scratch_reg - PACKET3_SET_CONFIG_REG_OFFSET) >> 2)); + radeon_ring_write(rdev, fence->seq); +} + +int r600_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, + uint64_t dst_offset, + unsigned num_pages, + struct radeon_fence *fence) +{ + /* FIXME: implement */ + return 0; +} + +int r600_copy_blit(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_pages, struct radeon_fence *fence) +{ + r600_blit_prepare_copy(rdev, num_pages * 4096); + r600_kms_blit_copy(rdev, src_offset, dst_offset, num_pages * 4096); + r600_blit_done_copy(rdev, fence); + return 0; +} + +int r600_irq_process(struct radeon_device *rdev) +{ + /* FIXME: implement */ + return 0; +} + +int r600_irq_set(struct radeon_device *rdev) +{ + /* FIXME: implement */ + return 0; +} + +int r600_set_surface_reg(struct radeon_device *rdev, int reg, + uint32_t tiling_flags, uint32_t pitch, + uint32_t offset, uint32_t obj_size) +{ + /* FIXME: implement */ + return 0; +} + +void r600_clear_surface_reg(struct radeon_device *rdev, int reg) +{ + /* FIXME: implement */ +} + + +bool r600_card_posted(struct radeon_device *rdev) +{ + uint32_t reg; + + /* first check CRTCs */ + reg = RREG32(D1CRTC_CONTROL) | + RREG32(D2CRTC_CONTROL); + if (reg & CRTC_EN) + return true; + + /* then check MEM_SIZE, in case the crtcs are off */ + if (RREG32(CONFIG_MEMSIZE)) + return true; + + return false; +} + +int r600_resume(struct radeon_device *rdev) +{ + int r; + + r600_gpu_reset(rdev); + r600_mc_resume(rdev); + r = r600_pcie_gart_enable(rdev); + if (r) + return r; + r600_gpu_init(rdev); + r = radeon_ring_init(rdev, rdev->cp.ring_size); + if (r) + return r; + r = r600_cp_load_microcode(rdev); + if (r) + return r; + r = r600_cp_resume(rdev); + if (r) + return r; + r = r600_wb_init(rdev); + if (r) + return r; + return 0; +} + +int r600_suspend(struct radeon_device *rdev) +{ + /* FIXME: we should wait for ring to be empty */ + r600_cp_stop(rdev); + return 0; +} + +/* Plan is to move initialization in that function and use + * helper function so that radeon_device_init pretty much + * do nothing more than calling asic specific function. This + * should also allow to remove a bunch of callback function + * like vram_info. + */ +int r600_init(struct radeon_device *rdev) { - uint32_t r; + int r; - WREG32(R600_PCIE_PORT_INDEX, ((reg) & 0xff)); - (void)RREG32(R600_PCIE_PORT_INDEX); - r = RREG32(R600_PCIE_PORT_DATA); + rdev->new_init_path = true; + r = radeon_dummy_page_init(rdev); + if (r) + return r; + if (r600_debugfs_mc_info_init(rdev)) { + DRM_ERROR("Failed to register debugfs file for mc !\n"); + } + /* This don't do much */ + r = radeon_gem_init(rdev); + if (r) + return r; + /* Read BIOS */ + if (!radeon_get_bios(rdev)) { + if (ASIC_IS_AVIVO(rdev)) + return -EINVAL; + } + /* Must be an ATOMBIOS */ + if (!rdev->is_atom_bios) + return -EINVAL; + r = radeon_atombios_init(rdev); + if (r) + return r; + /* Post card if necessary */ + if (!r600_card_posted(rdev) && rdev->bios) { + DRM_INFO("GPU not posted. posting now...\n"); + atom_asic_init(rdev->mode_info.atom_context); + } + /* Initialize scratch registers */ + r600_scratch_init(rdev); + /* Initialize surface registers */ + radeon_surface_init(rdev); + r = radeon_clocks_init(rdev); + if (r) + return r; + /* Fence driver */ + r = radeon_fence_driver_init(rdev); + if (r) + return r; + r = r600_mc_init(rdev); + if (r) { + if (rdev->flags & RADEON_IS_AGP) { + /* Retry with disabling AGP */ + r600_fini(rdev); + rdev->flags &= ~RADEON_IS_AGP; + return r600_init(rdev); + } + return r; + } + /* Memory manager */ + r = radeon_object_init(rdev); + if (r) + return r; + rdev->cp.ring_obj = NULL; + r600_ring_init(rdev, 1024 * 1024); + + if (!rdev->me_fw || !rdev->pfp_fw) { + r = r600_cp_init_microcode(rdev); + if (r) { + DRM_ERROR("Failed to load firmware!\n"); + return r; + } + } + + r = r600_resume(rdev); + if (r) { + if (rdev->flags & RADEON_IS_AGP) { + /* Retry with disabling AGP */ + r600_fini(rdev); + rdev->flags &= ~RADEON_IS_AGP; + return r600_init(rdev); + } + return r; + } + r = radeon_ib_pool_init(rdev); + if (r) { + DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r); + return r; + } + r = r600_blit_init(rdev); + if (r) { + DRM_ERROR("radeon: failled blitter (%d).\n", r); + return r; + } + r = radeon_ib_test(rdev); + if (r) { + DRM_ERROR("radeon: failled testing IB (%d).\n", r); + return r; + } + return 0; +} + +void r600_fini(struct radeon_device *rdev) +{ + /* Suspend operations */ + r600_suspend(rdev); + + r600_blit_fini(rdev); + radeon_ring_fini(rdev); + r600_pcie_gart_disable(rdev); + radeon_gart_table_vram_free(rdev); + radeon_gart_fini(rdev); + radeon_gem_fini(rdev); + radeon_fence_driver_fini(rdev); + radeon_clocks_fini(rdev); +#if __OS_HAS_AGP + if (rdev->flags & RADEON_IS_AGP) + radeon_agp_fini(rdev); +#endif + radeon_object_fini(rdev); + if (rdev->is_atom_bios) + radeon_atombios_fini(rdev); + else + radeon_combios_fini(rdev); + kfree(rdev->bios); + rdev->bios = NULL; + radeon_dummy_page_fini(rdev); +} + + +/* + * CS stuff + */ +void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) +{ + /* FIXME: implement */ + radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + radeon_ring_write(rdev, ib->gpu_addr & 0xFFFFFFFC); + radeon_ring_write(rdev, upper_32_bits(ib->gpu_addr) & 0xFF); + radeon_ring_write(rdev, ib->length_dw); +} + +int r600_ib_test(struct radeon_device *rdev) +{ + struct radeon_ib *ib; + uint32_t scratch; + uint32_t tmp = 0; + unsigned i; + int r; + + r = radeon_scratch_get(rdev, &scratch); + if (r) { + DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r); + return r; + } + WREG32(scratch, 0xCAFEDEAD); + r = radeon_ib_get(rdev, &ib); + if (r) { + DRM_ERROR("radeon: failed to get ib (%d).\n", r); + return r; + } + ib->ptr[0] = PACKET3(PACKET3_SET_CONFIG_REG, 1); + ib->ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); + ib->ptr[2] = 0xDEADBEEF; + ib->ptr[3] = PACKET2(0); + ib->ptr[4] = PACKET2(0); + ib->ptr[5] = PACKET2(0); + ib->ptr[6] = PACKET2(0); + ib->ptr[7] = PACKET2(0); + ib->ptr[8] = PACKET2(0); + ib->ptr[9] = PACKET2(0); + ib->ptr[10] = PACKET2(0); + ib->ptr[11] = PACKET2(0); + ib->ptr[12] = PACKET2(0); + ib->ptr[13] = PACKET2(0); + ib->ptr[14] = PACKET2(0); + ib->ptr[15] = PACKET2(0); + ib->length_dw = 16; + r = radeon_ib_schedule(rdev, ib); + if (r) { + radeon_scratch_free(rdev, scratch); + radeon_ib_free(rdev, &ib); + DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); + return r; + } + r = radeon_fence_wait(ib->fence, false); + if (r) { + DRM_ERROR("radeon: fence wait failed (%d).\n", r); + return r; + } + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + if (i < rdev->usec_timeout) { + DRM_INFO("ib test succeeded in %u usecs\n", i); + } else { + DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n", + scratch, tmp); + r = -EINVAL; + } + radeon_scratch_free(rdev, scratch); + radeon_ib_free(rdev, &ib); return r; } -void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) + + + +/* + * Debugfs info + */ +#if defined(CONFIG_DEBUG_FS) + +static int r600_debugfs_cp_ring_info(struct seq_file *m, void *data) { - WREG32(R600_PCIE_PORT_INDEX, ((reg) & 0xff)); - (void)RREG32(R600_PCIE_PORT_INDEX); - WREG32(R600_PCIE_PORT_DATA, (v)); - (void)RREG32(R600_PCIE_PORT_DATA); + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct radeon_device *rdev = dev->dev_private; + uint32_t rdp, wdp; + unsigned count, i, j; + + radeon_ring_free_size(rdev); + rdp = RREG32(CP_RB_RPTR); + wdp = RREG32(CP_RB_WPTR); + count = (rdp + rdev->cp.ring_size - wdp) & rdev->cp.ptr_mask; + seq_printf(m, "CP_STAT 0x%08x\n", RREG32(CP_STAT)); + seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp); + seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp); + seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw); + seq_printf(m, "%u dwords in ring\n", count); + for (j = 0; j <= count; j++) { + i = (rdp + j) & rdev->cp.ptr_mask; + seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]); + } + return 0; +} + +static int r600_debugfs_mc_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct radeon_device *rdev = dev->dev_private; + + DREG32_SYS(m, rdev, R_000E50_SRBM_STATUS); + DREG32_SYS(m, rdev, VM_L2_STATUS); + return 0; +} + +static struct drm_info_list r600_mc_info_list[] = { + {"r600_mc_info", r600_debugfs_mc_info, 0, NULL}, + {"r600_ring_info", r600_debugfs_cp_ring_info, 0, NULL}, +}; +#endif + +int r600_debugfs_mc_info_init(struct radeon_device *rdev) +{ +#if defined(CONFIG_DEBUG_FS) + return radeon_debugfs_add_files(rdev, r600_mc_info_list, ARRAY_SIZE(r600_mc_info_list)); +#else + return 0; +#endif } diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c new file mode 100644 index 000000000000..c51402e92493 --- /dev/null +++ b/drivers/gpu/drm/radeon/r600_blit.c @@ -0,0 +1,855 @@ +/* + * Copyright 2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Alex Deucher <alexander.deucher@amd.com> + */ +#include "drmP.h" +#include "drm.h" +#include "radeon_drm.h" +#include "radeon_drv.h" + +#include "r600_blit_shaders.h" + +#define DI_PT_RECTLIST 0x11 +#define DI_INDEX_SIZE_16_BIT 0x0 +#define DI_SRC_SEL_AUTO_INDEX 0x2 + +#define FMT_8 0x1 +#define FMT_5_6_5 0x8 +#define FMT_8_8_8_8 0x1a +#define COLOR_8 0x1 +#define COLOR_5_6_5 0x8 +#define COLOR_8_8_8_8 0x1a + +static inline void +set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr) +{ + u32 cb_color_info; + int pitch, slice; + RING_LOCALS; + DRM_DEBUG("\n"); + + h = (h + 7) & ~7; + if (h < 8) + h = 8; + + cb_color_info = ((format << 2) | (1 << 27)); + pitch = (w / 8) - 1; + slice = ((w * h) / 64) - 1; + + if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) && + ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) { + BEGIN_RING(21 + 2); + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(gpu_addr >> 8); + OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0)); + OUT_RING(2 << 0); + } else { + BEGIN_RING(21); + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(gpu_addr >> 8); + } + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING((pitch << 0) | (slice << 10)); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(0); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(cb_color_info); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(0); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(0); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(0); + + ADVANCE_RING(); +} + +static inline void +cp_set_surface_sync(drm_radeon_private_t *dev_priv, + u32 sync_type, u32 size, u64 mc_addr) +{ + u32 cp_coher_size; + RING_LOCALS; + DRM_DEBUG("\n"); + + if (size == 0xffffffff) + cp_coher_size = 0xffffffff; + else + cp_coher_size = ((size + 255) >> 8); + + BEGIN_RING(5); + OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3)); + OUT_RING(sync_type); + OUT_RING(cp_coher_size); + OUT_RING((mc_addr >> 8)); + OUT_RING(10); /* poll interval */ + ADVANCE_RING(); +} + +static inline void +set_shaders(struct drm_device *dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + u64 gpu_addr; + int shader_size, i; + u32 *vs, *ps; + uint32_t sq_pgm_resources; + RING_LOCALS; + DRM_DEBUG("\n"); + + /* load shaders */ + vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset); + ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256); + + shader_size = r6xx_vs_size; + for (i = 0; i < shader_size; i++) + vs[i] = r6xx_vs[i]; + shader_size = r6xx_ps_size; + for (i = 0; i < shader_size; i++) + ps[i] = r6xx_ps[i]; + + dev_priv->blit_vb->used = 512; + + gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset; + + /* setup shader regs */ + sq_pgm_resources = (1 << 0); + + BEGIN_RING(9 + 12); + /* VS */ + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(gpu_addr >> 8); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(sq_pgm_resources); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(0); + + /* PS */ + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING((gpu_addr + 256) >> 8); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(sq_pgm_resources | (1 << 28)); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(2); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(0); + ADVANCE_RING(); + + cp_set_surface_sync(dev_priv, + R600_SH_ACTION_ENA, 512, gpu_addr); +} + +static inline void +set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr) +{ + uint32_t sq_vtx_constant_word2; + RING_LOCALS; + DRM_DEBUG("\n"); + + sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8)); + + BEGIN_RING(9); + OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); + OUT_RING(0x460); + OUT_RING(gpu_addr & 0xffffffff); + OUT_RING(48 - 1); + OUT_RING(sq_vtx_constant_word2); + OUT_RING(1 << 0); + OUT_RING(0); + OUT_RING(0); + OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30); + ADVANCE_RING(); + + if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)) + cp_set_surface_sync(dev_priv, + R600_TC_ACTION_ENA, 48, gpu_addr); + else + cp_set_surface_sync(dev_priv, + R600_VC_ACTION_ENA, 48, gpu_addr); +} + +static inline void +set_tex_resource(drm_radeon_private_t *dev_priv, + int format, int w, int h, int pitch, u64 gpu_addr) +{ + uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; + RING_LOCALS; + DRM_DEBUG("\n"); + + if (h < 1) + h = 1; + + sq_tex_resource_word0 = (1 << 0); + sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) | + ((w - 1) << 19)); + + sq_tex_resource_word1 = (format << 26); + sq_tex_resource_word1 |= ((h - 1) << 0); + + sq_tex_resource_word4 = ((1 << 14) | + (0 << 16) | + (1 << 19) | + (2 << 22) | + (3 << 25)); + + BEGIN_RING(9); + OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); + OUT_RING(0); + OUT_RING(sq_tex_resource_word0); + OUT_RING(sq_tex_resource_word1); + OUT_RING(gpu_addr >> 8); + OUT_RING(gpu_addr >> 8); + OUT_RING(sq_tex_resource_word4); + OUT_RING(0); + OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30); + ADVANCE_RING(); + +} + +static inline void +set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2) +{ + RING_LOCALS; + DRM_DEBUG("\n"); + + BEGIN_RING(12); + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2)); + OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING((x1 << 0) | (y1 << 16)); + OUT_RING((x2 << 0) | (y2 << 16)); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2)); + OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31)); + OUT_RING((x2 << 0) | (y2 << 16)); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2)); + OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31)); + OUT_RING((x2 << 0) | (y2 << 16)); + ADVANCE_RING(); +} + +static inline void +draw_auto(drm_radeon_private_t *dev_priv) +{ + RING_LOCALS; + DRM_DEBUG("\n"); + + BEGIN_RING(10); + OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); + OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2); + OUT_RING(DI_PT_RECTLIST); + + OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); + OUT_RING(DI_INDEX_SIZE_16_BIT); + + OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); + OUT_RING(1); + + OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); + OUT_RING(3); + OUT_RING(DI_SRC_SEL_AUTO_INDEX); + + ADVANCE_RING(); + COMMIT_RING(); +} + +static inline void +set_default_state(drm_radeon_private_t *dev_priv) +{ + int default_state_dw, i; + u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; + u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; + int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs; + int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads; + int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries; + RING_LOCALS; + + switch ((dev_priv->flags & RADEON_FAMILY_MASK)) { + case CHIP_R600: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV630: + case CHIP_RV635: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 40; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV610: + case CHIP_RV620: + case CHIP_RS780: + case CHIP_RS880: + default: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV670: + num_ps_gprs = 144; + num_vs_gprs = 40; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV770: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 256; + num_vs_stack_entries = 256; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV730: + case CHIP_RV740: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV710: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 48; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + } + + if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)) + sq_config = 0; + else + sq_config = R600_VC_ENABLE; + + sq_config |= (R600_DX9_CONSTS | + R600_ALU_INST_PREFER_VECTOR | + R600_PS_PRIO(0) | + R600_VS_PRIO(1) | + R600_GS_PRIO(2) | + R600_ES_PRIO(3)); + + sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) | + R600_NUM_VS_GPRS(num_vs_gprs) | + R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); + sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) | + R600_NUM_ES_GPRS(num_es_gprs)); + sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) | + R600_NUM_VS_THREADS(num_vs_threads) | + R600_NUM_GS_THREADS(num_gs_threads) | + R600_NUM_ES_THREADS(num_es_threads)); + sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) | + R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries)); + sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) | + R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries)); + + if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) { + default_state_dw = r7xx_default_size * 4; + BEGIN_RING(default_state_dw + 10); + for (i = 0; i < default_state_dw; i++) + OUT_RING(r7xx_default_state[i]); + } else { + default_state_dw = r6xx_default_size * 4; + BEGIN_RING(default_state_dw + 10); + for (i = 0; i < default_state_dw; i++) + OUT_RING(r6xx_default_state[i]); + } + OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0)); + OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT); + /* SQ config */ + OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6)); + OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2); + OUT_RING(sq_config); + OUT_RING(sq_gpr_resource_mgmt_1); + OUT_RING(sq_gpr_resource_mgmt_2); + OUT_RING(sq_thread_resource_mgmt); + OUT_RING(sq_stack_resource_mgmt_1); + OUT_RING(sq_stack_resource_mgmt_2); + ADVANCE_RING(); +} + +static inline uint32_t i2f(uint32_t input) +{ + u32 result, i, exponent, fraction; + + if ((input & 0x3fff) == 0) + result = 0; /* 0 is a special case */ + else { + exponent = 140; /* exponent biased by 127; */ + fraction = (input & 0x3fff) << 10; /* cheat and only + handle numbers below 2^^15 */ + for (i = 0; i < 14; i++) { + if (fraction & 0x800000) + break; + else { + fraction = fraction << 1; /* keep + shifting left until top bit = 1 */ + exponent = exponent - 1; + } + } + result = exponent << 23 | (fraction & 0x7fffff); /* mask + off top bit; assumed 1 */ + } + return result; +} + + +int r600_nomm_get_vb(struct drm_device *dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + dev_priv->blit_vb = radeon_freelist_get(dev); + if (!dev_priv->blit_vb) { + DRM_ERROR("Unable to allocate vertex buffer for blit\n"); + return -EAGAIN; + } + return 0; +} + +void r600_nomm_put_vb(struct drm_device *dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + + dev_priv->blit_vb->used = 0; + radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb); +} + +void *r600_nomm_get_vb_ptr(struct drm_device *dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + return (((char *)dev->agp_buffer_map->handle + + dev_priv->blit_vb->offset + dev_priv->blit_vb->used)); +} + +int +r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + DRM_DEBUG("\n"); + + r600_nomm_get_vb(dev); + + dev_priv->blit_vb->file_priv = file_priv; + + set_default_state(dev_priv); + set_shaders(dev); + + return 0; +} + + +void +r600_done_blit_copy(struct drm_device *dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + RING_LOCALS; + DRM_DEBUG("\n"); + + BEGIN_RING(5); + OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0)); + OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT); + /* wait for 3D idle clean */ + OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); + OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2); + OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN); + + ADVANCE_RING(); + COMMIT_RING(); + + r600_nomm_put_vb(dev); +} + +void +r600_blit_copy(struct drm_device *dev, + uint64_t src_gpu_addr, uint64_t dst_gpu_addr, + int size_bytes) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + int max_bytes; + u64 vb_addr; + u32 *vb; + + vb = r600_nomm_get_vb_ptr(dev); + + if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) { + max_bytes = 8192; + + while (size_bytes) { + int cur_size = size_bytes; + int src_x = src_gpu_addr & 255; + int dst_x = dst_gpu_addr & 255; + int h = 1; + src_gpu_addr = src_gpu_addr & ~255; + dst_gpu_addr = dst_gpu_addr & ~255; + + if (!src_x && !dst_x) { + h = (cur_size / max_bytes); + if (h > 8192) + h = 8192; + if (h == 0) + h = 1; + else + cur_size = max_bytes; + } else { + if (cur_size > max_bytes) + cur_size = max_bytes; + if (cur_size > (max_bytes - dst_x)) + cur_size = (max_bytes - dst_x); + if (cur_size > (max_bytes - src_x)) + cur_size = (max_bytes - src_x); + } + + if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) { + + r600_nomm_put_vb(dev); + r600_nomm_get_vb(dev); + if (!dev_priv->blit_vb) + return; + set_shaders(dev); + vb = r600_nomm_get_vb_ptr(dev); + } + + vb[0] = i2f(dst_x); + vb[1] = 0; + vb[2] = i2f(src_x); + vb[3] = 0; + + vb[4] = i2f(dst_x); + vb[5] = i2f(h); + vb[6] = i2f(src_x); + vb[7] = i2f(h); + + vb[8] = i2f(dst_x + cur_size); + vb[9] = i2f(h); + vb[10] = i2f(src_x + cur_size); + vb[11] = i2f(h); + + /* src */ + set_tex_resource(dev_priv, FMT_8, + src_x + cur_size, h, src_x + cur_size, + src_gpu_addr); + + cp_set_surface_sync(dev_priv, + R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); + + /* dst */ + set_render_target(dev_priv, COLOR_8, + dst_x + cur_size, h, + dst_gpu_addr); + + /* scissors */ + set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h); + + /* Vertex buffer setup */ + vb_addr = dev_priv->gart_buffers_offset + + dev_priv->blit_vb->offset + + dev_priv->blit_vb->used; + set_vtx_resource(dev_priv, vb_addr); + + /* draw */ + draw_auto(dev_priv); + + cp_set_surface_sync(dev_priv, + R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA, + cur_size * h, dst_gpu_addr); + + vb += 12; + dev_priv->blit_vb->used += 12 * 4; + + src_gpu_addr += cur_size * h; + dst_gpu_addr += cur_size * h; + size_bytes -= cur_size * h; + } + } else { + max_bytes = 8192 * 4; + + while (size_bytes) { + int cur_size = size_bytes; + int src_x = (src_gpu_addr & 255); + int dst_x = (dst_gpu_addr & 255); + int h = 1; + src_gpu_addr = src_gpu_addr & ~255; + dst_gpu_addr = dst_gpu_addr & ~255; + + if (!src_x && !dst_x) { + h = (cur_size / max_bytes); + if (h > 8192) + h = 8192; + if (h == 0) + h = 1; + else + cur_size = max_bytes; + } else { + if (cur_size > max_bytes) + cur_size = max_bytes; + if (cur_size > (max_bytes - dst_x)) + cur_size = (max_bytes - dst_x); + if (cur_size > (max_bytes - src_x)) + cur_size = (max_bytes - src_x); + } + + if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) { + r600_nomm_put_vb(dev); + r600_nomm_get_vb(dev); + if (!dev_priv->blit_vb) + return; + + set_shaders(dev); + vb = r600_nomm_get_vb_ptr(dev); + } + + vb[0] = i2f(dst_x / 4); + vb[1] = 0; + vb[2] = i2f(src_x / 4); + vb[3] = 0; + + vb[4] = i2f(dst_x / 4); + vb[5] = i2f(h); + vb[6] = i2f(src_x / 4); + vb[7] = i2f(h); + + vb[8] = i2f((dst_x + cur_size) / 4); + vb[9] = i2f(h); + vb[10] = i2f((src_x + cur_size) / 4); + vb[11] = i2f(h); + + /* src */ + set_tex_resource(dev_priv, FMT_8_8_8_8, + (src_x + cur_size) / 4, + h, (src_x + cur_size) / 4, + src_gpu_addr); + + cp_set_surface_sync(dev_priv, + R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); + + /* dst */ + set_render_target(dev_priv, COLOR_8_8_8_8, + dst_x + cur_size, h, + dst_gpu_addr); + + /* scissors */ + set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h); + + /* Vertex buffer setup */ + vb_addr = dev_priv->gart_buffers_offset + + dev_priv->blit_vb->offset + + dev_priv->blit_vb->used; + set_vtx_resource(dev_priv, vb_addr); + + /* draw */ + draw_auto(dev_priv); + + cp_set_surface_sync(dev_priv, + R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA, + cur_size * h, dst_gpu_addr); + + vb += 12; + dev_priv->blit_vb->used += 12 * 4; + + src_gpu_addr += cur_size * h; + dst_gpu_addr += cur_size * h; + size_bytes -= cur_size * h; + } + } +} + +void +r600_blit_swap(struct drm_device *dev, + uint64_t src_gpu_addr, uint64_t dst_gpu_addr, + int sx, int sy, int dx, int dy, + int w, int h, int src_pitch, int dst_pitch, int cpp) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + int cb_format, tex_format; + u64 vb_addr; + u32 *vb; + + vb = (u32 *) ((char *)dev->agp_buffer_map->handle + + dev_priv->blit_vb->offset + dev_priv->blit_vb->used); + + if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) { + + r600_nomm_put_vb(dev); + r600_nomm_get_vb(dev); + if (!dev_priv->blit_vb) + return; + + set_shaders(dev); + vb = r600_nomm_get_vb_ptr(dev); + } + + if (cpp == 4) { + cb_format = COLOR_8_8_8_8; + tex_format = FMT_8_8_8_8; + } else if (cpp == 2) { + cb_format = COLOR_5_6_5; + tex_format = FMT_5_6_5; + } else { + cb_format = COLOR_8; + tex_format = FMT_8; + } + + vb[0] = i2f(dx); + vb[1] = i2f(dy); + vb[2] = i2f(sx); + vb[3] = i2f(sy); + + vb[4] = i2f(dx); + vb[5] = i2f(dy + h); + vb[6] = i2f(sx); + vb[7] = i2f(sy + h); + + vb[8] = i2f(dx + w); + vb[9] = i2f(dy + h); + vb[10] = i2f(sx + w); + vb[11] = i2f(sy + h); + + /* src */ + set_tex_resource(dev_priv, tex_format, + src_pitch / cpp, + sy + h, src_pitch / cpp, + src_gpu_addr); + + cp_set_surface_sync(dev_priv, + R600_TC_ACTION_ENA, (src_pitch * (sy + h)), src_gpu_addr); + + /* dst */ + set_render_target(dev_priv, cb_format, + dst_pitch / cpp, dy + h, + dst_gpu_addr); + + /* scissors */ + set_scissors(dev_priv, dx, dy, dx + w, dy + h); + + /* Vertex buffer setup */ + vb_addr = dev_priv->gart_buffers_offset + + dev_priv->blit_vb->offset + + dev_priv->blit_vb->used; + set_vtx_resource(dev_priv, vb_addr); + + /* draw */ + draw_auto(dev_priv); + + cp_set_surface_sync(dev_priv, + R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA, + dst_pitch * (dy + h), dst_gpu_addr); + + dev_priv->blit_vb->used += 12 * 4; +} diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c new file mode 100644 index 000000000000..5755647e688a --- /dev/null +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c @@ -0,0 +1,777 @@ +#include "drmP.h" +#include "drm.h" +#include "radeon_drm.h" +#include "radeon.h" + +#include "r600d.h" +#include "r600_blit_shaders.h" + +#define DI_PT_RECTLIST 0x11 +#define DI_INDEX_SIZE_16_BIT 0x0 +#define DI_SRC_SEL_AUTO_INDEX 0x2 + +#define FMT_8 0x1 +#define FMT_5_6_5 0x8 +#define FMT_8_8_8_8 0x1a +#define COLOR_8 0x1 +#define COLOR_5_6_5 0x8 +#define COLOR_8_8_8_8 0x1a + +/* emits 21 on rv770+, 23 on r600 */ +static void +set_render_target(struct radeon_device *rdev, int format, + int w, int h, u64 gpu_addr) +{ + u32 cb_color_info; + int pitch, slice; + + h = (h + 7) & ~7; + if (h < 8) + h = 8; + + cb_color_info = ((format << 2) | (1 << 27)); + pitch = (w / 8) - 1; + slice = ((w * h) / 64) - 1; + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, gpu_addr >> 8); + + if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) { + radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_BASE_UPDATE, 0)); + radeon_ring_write(rdev, 2 << 0); + } + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_SIZE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, (pitch << 0) | (slice << 10)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_VIEW - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 0); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_INFO - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, cb_color_info); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_TILE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 0); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_FRAG - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 0); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_MASK - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 0); +} + +/* emits 5dw */ +static void +cp_set_surface_sync(struct radeon_device *rdev, + u32 sync_type, u32 size, + u64 mc_addr) +{ + u32 cp_coher_size; + + if (size == 0xffffffff) + cp_coher_size = 0xffffffff; + else + cp_coher_size = ((size + 255) >> 8); + + radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3)); + radeon_ring_write(rdev, sync_type); + radeon_ring_write(rdev, cp_coher_size); + radeon_ring_write(rdev, mc_addr >> 8); + radeon_ring_write(rdev, 10); /* poll interval */ +} + +/* emits 21dw + 1 surface sync = 26dw */ +static void +set_shaders(struct radeon_device *rdev) +{ + u64 gpu_addr; + u32 sq_pgm_resources; + + /* setup shader regs */ + sq_pgm_resources = (1 << 0); + + /* VS */ + gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset; + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, gpu_addr >> 8); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_RESOURCES_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, sq_pgm_resources); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 0); + + /* PS */ + gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset; + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, gpu_addr >> 8); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_RESOURCES_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, sq_pgm_resources | (1 << 28)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_EXPORTS_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 2); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 0); + + cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr); +} + +/* emits 9 + 1 sync (5) = 14*/ +static void +set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr) +{ + u32 sq_vtx_constant_word2; + + sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7)); + radeon_ring_write(rdev, 0x460); + radeon_ring_write(rdev, gpu_addr & 0xffffffff); + radeon_ring_write(rdev, 48 - 1); + radeon_ring_write(rdev, sq_vtx_constant_word2); + radeon_ring_write(rdev, 1 << 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30); + + if ((rdev->family == CHIP_RV610) || + (rdev->family == CHIP_RV620) || + (rdev->family == CHIP_RS780) || + (rdev->family == CHIP_RS880) || + (rdev->family == CHIP_RV710)) + cp_set_surface_sync(rdev, + PACKET3_TC_ACTION_ENA, 48, gpu_addr); + else + cp_set_surface_sync(rdev, + PACKET3_VC_ACTION_ENA, 48, gpu_addr); +} + +/* emits 9 */ +static void +set_tex_resource(struct radeon_device *rdev, + int format, int w, int h, int pitch, + u64 gpu_addr) +{ + uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; + + if (h < 1) + h = 1; + + sq_tex_resource_word0 = (1 << 0); + sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) | + ((w - 1) << 19)); + + sq_tex_resource_word1 = (format << 26); + sq_tex_resource_word1 |= ((h - 1) << 0); + + sq_tex_resource_word4 = ((1 << 14) | + (0 << 16) | + (1 << 19) | + (2 << 22) | + (3 << 25)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7)); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, sq_tex_resource_word0); + radeon_ring_write(rdev, sq_tex_resource_word1); + radeon_ring_write(rdev, gpu_addr >> 8); + radeon_ring_write(rdev, gpu_addr >> 8); + radeon_ring_write(rdev, sq_tex_resource_word4); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, SQ_TEX_VTX_VALID_TEXTURE << 30); +} + +/* emits 12 */ +static void +set_scissors(struct radeon_device *rdev, int x1, int y1, + int x2, int y2) +{ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(rdev, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, (x1 << 0) | (y1 << 16)); + radeon_ring_write(rdev, (x2 << 0) | (y2 << 16)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(rdev, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31)); + radeon_ring_write(rdev, (x2 << 0) | (y2 << 16)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(rdev, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31)); + radeon_ring_write(rdev, (x2 << 0) | (y2 << 16)); +} + +/* emits 10 */ +static void +draw_auto(struct radeon_device *rdev) +{ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(rdev, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); + radeon_ring_write(rdev, DI_PT_RECTLIST); + + radeon_ring_write(rdev, PACKET3(PACKET3_INDEX_TYPE, 0)); + radeon_ring_write(rdev, DI_INDEX_SIZE_16_BIT); + + radeon_ring_write(rdev, PACKET3(PACKET3_NUM_INSTANCES, 0)); + radeon_ring_write(rdev, 1); + + radeon_ring_write(rdev, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1)); + radeon_ring_write(rdev, 3); + radeon_ring_write(rdev, DI_SRC_SEL_AUTO_INDEX); + +} + +/* emits 14 */ +static void +set_default_state(struct radeon_device *rdev) +{ + u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; + u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; + int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs; + int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads; + int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries; + u64 gpu_addr; + + switch (rdev->family) { + case CHIP_R600: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV630: + case CHIP_RV635: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 40; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV610: + case CHIP_RV620: + case CHIP_RS780: + case CHIP_RS880: + default: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV670: + num_ps_gprs = 144; + num_vs_gprs = 40; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV770: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 256; + num_vs_stack_entries = 256; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV730: + case CHIP_RV740: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV710: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 48; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + } + + if ((rdev->family == CHIP_RV610) || + (rdev->family == CHIP_RV620) || + (rdev->family == CHIP_RS780) || + (rdev->family == CHIP_RS780) || + (rdev->family == CHIP_RV710)) + sq_config = 0; + else + sq_config = VC_ENABLE; + + sq_config |= (DX9_CONSTS | + ALU_INST_PREFER_VECTOR | + PS_PRIO(0) | + VS_PRIO(1) | + GS_PRIO(2) | + ES_PRIO(3)); + + sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) | + NUM_VS_GPRS(num_vs_gprs) | + NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); + sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) | + NUM_ES_GPRS(num_es_gprs)); + sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) | + NUM_VS_THREADS(num_vs_threads) | + NUM_GS_THREADS(num_gs_threads) | + NUM_ES_THREADS(num_es_threads)); + sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) | + NUM_VS_STACK_ENTRIES(num_vs_stack_entries)); + sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) | + NUM_ES_STACK_ENTRIES(num_es_stack_entries)); + + /* emit an IB pointing at default state */ + gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset; + radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + radeon_ring_write(rdev, gpu_addr & 0xFFFFFFFC); + radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF); + radeon_ring_write(rdev, (rdev->r600_blit.state_len / 4)); + + radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0)); + radeon_ring_write(rdev, CACHE_FLUSH_AND_INV_EVENT); + /* SQ config */ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 6)); + radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); + radeon_ring_write(rdev, sq_config); + radeon_ring_write(rdev, sq_gpr_resource_mgmt_1); + radeon_ring_write(rdev, sq_gpr_resource_mgmt_2); + radeon_ring_write(rdev, sq_thread_resource_mgmt); + radeon_ring_write(rdev, sq_stack_resource_mgmt_1); + radeon_ring_write(rdev, sq_stack_resource_mgmt_2); +} + +static inline uint32_t i2f(uint32_t input) +{ + u32 result, i, exponent, fraction; + + if ((input & 0x3fff) == 0) + result = 0; /* 0 is a special case */ + else { + exponent = 140; /* exponent biased by 127; */ + fraction = (input & 0x3fff) << 10; /* cheat and only + handle numbers below 2^^15 */ + for (i = 0; i < 14; i++) { + if (fraction & 0x800000) + break; + else { + fraction = fraction << 1; /* keep + shifting left until top bit = 1 */ + exponent = exponent - 1; + } + } + result = exponent << 23 | (fraction & 0x7fffff); /* mask + off top bit; assumed 1 */ + } + return result; +} + +int r600_blit_init(struct radeon_device *rdev) +{ + u32 obj_size; + int r; + void *ptr; + + rdev->r600_blit.state_offset = 0; + + if (rdev->family >= CHIP_RV770) + rdev->r600_blit.state_len = r7xx_default_size * 4; + else + rdev->r600_blit.state_len = r6xx_default_size * 4; + + obj_size = rdev->r600_blit.state_len; + obj_size = ALIGN(obj_size, 256); + + rdev->r600_blit.vs_offset = obj_size; + obj_size += r6xx_vs_size * 4; + obj_size = ALIGN(obj_size, 256); + + rdev->r600_blit.ps_offset = obj_size; + obj_size += r6xx_ps_size * 4; + obj_size = ALIGN(obj_size, 256); + + r = radeon_object_create(rdev, NULL, obj_size, + true, RADEON_GEM_DOMAIN_VRAM, + false, &rdev->r600_blit.shader_obj); + if (r) { + DRM_ERROR("r600 failed to allocate shader\n"); + return r; + } + + r = radeon_object_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM, + &rdev->r600_blit.shader_gpu_addr); + if (r) { + DRM_ERROR("failed to pin blit object %d\n", r); + return r; + } + + DRM_DEBUG("r6xx blit allocated bo @ 0x%16llx %08x vs %08x ps %08x\n", + rdev->r600_blit.shader_gpu_addr, obj_size, + rdev->r600_blit.vs_offset, rdev->r600_blit.ps_offset); + + r = radeon_object_kmap(rdev->r600_blit.shader_obj, &ptr); + if (r) { + DRM_ERROR("failed to map blit object %d\n", r); + return r; + } + + if (rdev->family >= CHIP_RV770) + memcpy_toio(ptr + rdev->r600_blit.state_offset, r7xx_default_state, rdev->r600_blit.state_len); + else + memcpy_toio(ptr + rdev->r600_blit.state_offset, r6xx_default_state, rdev->r600_blit.state_len); + + memcpy(ptr + rdev->r600_blit.vs_offset, r6xx_vs, r6xx_vs_size * 4); + memcpy(ptr + rdev->r600_blit.ps_offset, r6xx_ps, r6xx_ps_size * 4); + + radeon_object_kunmap(rdev->r600_blit.shader_obj); + return 0; +} + +void r600_blit_fini(struct radeon_device *rdev) +{ + radeon_object_unpin(rdev->r600_blit.shader_obj); + radeon_object_unref(&rdev->r600_blit.shader_obj); +} + +int r600_vb_ib_get(struct radeon_device *rdev) +{ + int r; + r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib); + if (r) { + DRM_ERROR("failed to get IB for vertex buffer\n"); + return r; + } + + rdev->r600_blit.vb_total = 64*1024; + rdev->r600_blit.vb_used = 0; + return 0; +} + +void r600_vb_ib_put(struct radeon_device *rdev) +{ + mutex_lock(&rdev->ib_pool.mutex); + radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence); + list_add_tail(&rdev->r600_blit.vb_ib->list, &rdev->ib_pool.scheduled_ibs); + mutex_unlock(&rdev->ib_pool.mutex); + radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); +} + +int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes) +{ + int r; + int ring_size; + const int max_size = 8192*8192; + + r = r600_vb_ib_get(rdev); + WARN_ON(r); + + /* loops of emits 64 + fence emit possible */ + ring_size = ((size_bytes + max_size) / max_size) * 78; + /* set default + shaders */ + ring_size += 40; /* shaders + def state */ + ring_size += 3; /* fence emit for VB IB */ + ring_size += 5; /* done copy */ + ring_size += 3; /* fence emit for done copy */ + r = radeon_ring_lock(rdev, ring_size); + WARN_ON(r); + + set_default_state(rdev); /* 14 */ + set_shaders(rdev); /* 26 */ + return 0; +} + +void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence) +{ + int r; + + radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0)); + radeon_ring_write(rdev, CACHE_FLUSH_AND_INV_EVENT); + /* wait for 3D idle clean */ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(rdev, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); + radeon_ring_write(rdev, WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit); + + if (rdev->r600_blit.vb_ib) + r600_vb_ib_put(rdev); + + if (fence) + r = radeon_fence_emit(rdev, fence); + + radeon_ring_unlock_commit(rdev); +} + +void r600_kms_blit_copy(struct radeon_device *rdev, + u64 src_gpu_addr, u64 dst_gpu_addr, + int size_bytes) +{ + int max_bytes; + u64 vb_gpu_addr; + u32 *vb; + + DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr, + size_bytes, rdev->r600_blit.vb_used); + vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used); + if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) { + max_bytes = 8192; + + while (size_bytes) { + int cur_size = size_bytes; + int src_x = src_gpu_addr & 255; + int dst_x = dst_gpu_addr & 255; + int h = 1; + src_gpu_addr = src_gpu_addr & ~255; + dst_gpu_addr = dst_gpu_addr & ~255; + + if (!src_x && !dst_x) { + h = (cur_size / max_bytes); + if (h > 8192) + h = 8192; + if (h == 0) + h = 1; + else + cur_size = max_bytes; + } else { + if (cur_size > max_bytes) + cur_size = max_bytes; + if (cur_size > (max_bytes - dst_x)) + cur_size = (max_bytes - dst_x); + if (cur_size > (max_bytes - src_x)) + cur_size = (max_bytes - src_x); + } + + if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { + WARN_ON(1); + +#if 0 + r600_vb_ib_put(rdev); + + r600_nomm_put_vb(dev); + r600_nomm_get_vb(dev); + if (!dev_priv->blit_vb) + return; + set_shaders(dev); + vb = r600_nomm_get_vb_ptr(dev); +#endif + } + + vb[0] = i2f(dst_x); + vb[1] = 0; + vb[2] = i2f(src_x); + vb[3] = 0; + + vb[4] = i2f(dst_x); + vb[5] = i2f(h); + vb[6] = i2f(src_x); + vb[7] = i2f(h); + + vb[8] = i2f(dst_x + cur_size); + vb[9] = i2f(h); + vb[10] = i2f(src_x + cur_size); + vb[11] = i2f(h); + + /* src 9 */ + set_tex_resource(rdev, FMT_8, + src_x + cur_size, h, src_x + cur_size, + src_gpu_addr); + + /* 5 */ + cp_set_surface_sync(rdev, + PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); + + /* dst 23 */ + set_render_target(rdev, COLOR_8, + dst_x + cur_size, h, + dst_gpu_addr); + + /* scissors 12 */ + set_scissors(rdev, dst_x, 0, dst_x + cur_size, h); + + /* 14 */ + vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; + set_vtx_resource(rdev, vb_gpu_addr); + + /* draw 10 */ + draw_auto(rdev); + + /* 5 */ + cp_set_surface_sync(rdev, + PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, + cur_size * h, dst_gpu_addr); + + vb += 12; + rdev->r600_blit.vb_used += 12 * 4; + + src_gpu_addr += cur_size * h; + dst_gpu_addr += cur_size * h; + size_bytes -= cur_size * h; + } + } else { + max_bytes = 8192 * 4; + + while (size_bytes) { + int cur_size = size_bytes; + int src_x = (src_gpu_addr & 255); + int dst_x = (dst_gpu_addr & 255); + int h = 1; + src_gpu_addr = src_gpu_addr & ~255; + dst_gpu_addr = dst_gpu_addr & ~255; + + if (!src_x && !dst_x) { + h = (cur_size / max_bytes); + if (h > 8192) + h = 8192; + if (h == 0) + h = 1; + else + cur_size = max_bytes; + } else { + if (cur_size > max_bytes) + cur_size = max_bytes; + if (cur_size > (max_bytes - dst_x)) + cur_size = (max_bytes - dst_x); + if (cur_size > (max_bytes - src_x)) + cur_size = (max_bytes - src_x); + } + + if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { + WARN_ON(1); + } +#if 0 + if ((rdev->blit_vb->used + 48) > rdev->blit_vb->total) { + r600_nomm_put_vb(dev); + r600_nomm_get_vb(dev); + if (!rdev->blit_vb) + return; + + set_shaders(dev); + vb = r600_nomm_get_vb_ptr(dev); + } +#endif + + vb[0] = i2f(dst_x / 4); + vb[1] = 0; + vb[2] = i2f(src_x / 4); + vb[3] = 0; + + vb[4] = i2f(dst_x / 4); + vb[5] = i2f(h); + vb[6] = i2f(src_x / 4); + vb[7] = i2f(h); + + vb[8] = i2f((dst_x + cur_size) / 4); + vb[9] = i2f(h); + vb[10] = i2f((src_x + cur_size) / 4); + vb[11] = i2f(h); + + /* src 9 */ + set_tex_resource(rdev, FMT_8_8_8_8, + (src_x + cur_size) / 4, + h, (src_x + cur_size) / 4, + src_gpu_addr); + /* 5 */ + cp_set_surface_sync(rdev, + PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); + + /* dst 23 */ + set_render_target(rdev, COLOR_8_8_8_8, + dst_x + cur_size, h, + dst_gpu_addr); + + /* scissors 12 */ + set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h); + + /* Vertex buffer setup 14 */ + vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; + set_vtx_resource(rdev, vb_gpu_addr); + + /* draw 10 */ + draw_auto(rdev); + + /* 5 */ + cp_set_surface_sync(rdev, + PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, + cur_size * h, dst_gpu_addr); + + /* 78 ring dwords per loop */ + vb += 12; + rdev->r600_blit.vb_used += 12 * 4; + + src_gpu_addr += cur_size * h; + dst_gpu_addr += cur_size * h; + size_bytes -= cur_size * h; + } + } +} + diff --git a/drivers/gpu/drm/radeon/r600_blit_shaders.c b/drivers/gpu/drm/radeon/r600_blit_shaders.c new file mode 100644 index 000000000000..d745e815c2e8 --- /dev/null +++ b/drivers/gpu/drm/radeon/r600_blit_shaders.c @@ -0,0 +1,1072 @@ + +#include <linux/types.h> +#include <linux/kernel.h> + +const u32 r6xx_default_state[] = +{ + 0xc0002400, + 0x00000000, + 0xc0012800, + 0x80000000, + 0x80000000, + 0xc0004600, + 0x00000016, + 0xc0016800, + 0x00000010, + 0x00028000, + 0xc0016800, + 0x00000010, + 0x00008000, + 0xc0016800, + 0x00000542, + 0x07000003, + 0xc0016800, + 0x000005c5, + 0x00000000, + 0xc0016800, + 0x00000363, + 0x00000000, + 0xc0016800, + 0x0000060c, + 0x82000000, + 0xc0016800, + 0x0000060e, + 0x01020204, + 0xc0016f00, + 0x00000000, + 0x00000000, + 0xc0016f00, + 0x00000001, + 0x00000000, + 0xc0096900, + 0x0000022a, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0016900, + 0x00000004, + 0x00000000, + 0xc0016900, + 0x0000000a, + 0x00000000, + 0xc0016900, + 0x0000000b, + 0x00000000, + 0xc0016900, + 0x0000010c, + 0x00000000, + 0xc0016900, + 0x0000010d, + 0x00000000, + 0xc0016900, + 0x00000200, + 0x00000000, + 0xc0016900, + 0x00000343, + 0x00000060, + 0xc0016900, + 0x00000344, + 0x00000040, + 0xc0016900, + 0x00000351, + 0x0000aa00, + 0xc0016900, + 0x00000104, + 0x00000000, + 0xc0016900, + 0x0000010e, + 0x00000000, + 0xc0046900, + 0x00000105, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0036900, + 0x00000109, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0046900, + 0x0000030c, + 0x01000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0046900, + 0x00000048, + 0x3f800000, + 0x00000000, + 0x3f800000, + 0x3f800000, + 0xc0016900, + 0x0000008e, + 0x0000000f, + 0xc0016900, + 0x00000080, + 0x00000000, + 0xc0016900, + 0x00000083, + 0x0000ffff, + 0xc0016900, + 0x00000084, + 0x00000000, + 0xc0016900, + 0x00000085, + 0x20002000, + 0xc0016900, + 0x00000086, + 0x00000000, + 0xc0016900, + 0x00000087, + 0x20002000, + 0xc0016900, + 0x00000088, + 0x00000000, + 0xc0016900, + 0x00000089, + 0x20002000, + 0xc0016900, + 0x0000008a, + 0x00000000, + 0xc0016900, + 0x0000008b, + 0x20002000, + 0xc0016900, + 0x0000008c, + 0x00000000, + 0xc0016900, + 0x00000094, + 0x80000000, + 0xc0016900, + 0x00000095, + 0x20002000, + 0xc0026900, + 0x000000b4, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x00000096, + 0x80000000, + 0xc0016900, + 0x00000097, + 0x20002000, + 0xc0026900, + 0x000000b6, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x00000098, + 0x80000000, + 0xc0016900, + 0x00000099, + 0x20002000, + 0xc0026900, + 0x000000b8, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x0000009a, + 0x80000000, + 0xc0016900, + 0x0000009b, + 0x20002000, + 0xc0026900, + 0x000000ba, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x0000009c, + 0x80000000, + 0xc0016900, + 0x0000009d, + 0x20002000, + 0xc0026900, + 0x000000bc, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x0000009e, + 0x80000000, + 0xc0016900, + 0x0000009f, + 0x20002000, + 0xc0026900, + 0x000000be, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a0, + 0x80000000, + 0xc0016900, + 0x000000a1, + 0x20002000, + 0xc0026900, + 0x000000c0, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a2, + 0x80000000, + 0xc0016900, + 0x000000a3, + 0x20002000, + 0xc0026900, + 0x000000c2, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a4, + 0x80000000, + 0xc0016900, + 0x000000a5, + 0x20002000, + 0xc0026900, + 0x000000c4, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a6, + 0x80000000, + 0xc0016900, + 0x000000a7, + 0x20002000, + 0xc0026900, + 0x000000c6, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a8, + 0x80000000, + 0xc0016900, + 0x000000a9, + 0x20002000, + 0xc0026900, + 0x000000c8, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000aa, + 0x80000000, + 0xc0016900, + 0x000000ab, + 0x20002000, + 0xc0026900, + 0x000000ca, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000ac, + 0x80000000, + 0xc0016900, + 0x000000ad, + 0x20002000, + 0xc0026900, + 0x000000cc, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000ae, + 0x80000000, + 0xc0016900, + 0x000000af, + 0x20002000, + 0xc0026900, + 0x000000ce, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000b0, + 0x80000000, + 0xc0016900, + 0x000000b1, + 0x20002000, + 0xc0026900, + 0x000000d0, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000b2, + 0x80000000, + 0xc0016900, + 0x000000b3, + 0x20002000, + 0xc0026900, + 0x000000d2, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x00000293, + 0x00004010, + 0xc0016900, + 0x00000300, + 0x00000000, + 0xc0016900, + 0x00000301, + 0x00000000, + 0xc0016900, + 0x00000312, + 0xffffffff, + 0xc0016900, + 0x00000307, + 0x00000000, + 0xc0016900, + 0x00000308, + 0x00000000, + 0xc0016900, + 0x00000283, + 0x00000000, + 0xc0016900, + 0x00000292, + 0x00000000, + 0xc0066900, + 0x0000010f, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0016900, + 0x00000206, + 0x00000000, + 0xc0016900, + 0x00000207, + 0x00000000, + 0xc0016900, + 0x00000208, + 0x00000000, + 0xc0046900, + 0x00000303, + 0x3f800000, + 0x3f800000, + 0x3f800000, + 0x3f800000, + 0xc0016900, + 0x00000205, + 0x00000004, + 0xc0016900, + 0x00000280, + 0x00000000, + 0xc0016900, + 0x00000281, + 0x00000000, + 0xc0016900, + 0x0000037e, + 0x00000000, + 0xc0016900, + 0x00000382, + 0x00000000, + 0xc0016900, + 0x00000380, + 0x00000000, + 0xc0016900, + 0x00000383, + 0x00000000, + 0xc0016900, + 0x00000381, + 0x00000000, + 0xc0016900, + 0x00000282, + 0x00000008, + 0xc0016900, + 0x00000302, + 0x0000002d, + 0xc0016900, + 0x0000037f, + 0x00000000, + 0xc0016900, + 0x000001b2, + 0x00000000, + 0xc0016900, + 0x000001b6, + 0x00000000, + 0xc0016900, + 0x000001b7, + 0x00000000, + 0xc0016900, + 0x000001b8, + 0x00000000, + 0xc0016900, + 0x000001b9, + 0x00000000, + 0xc0016900, + 0x00000225, + 0x00000000, + 0xc0016900, + 0x00000229, + 0x00000000, + 0xc0016900, + 0x00000237, + 0x00000000, + 0xc0016900, + 0x00000100, + 0x00000800, + 0xc0016900, + 0x00000101, + 0x00000000, + 0xc0016900, + 0x00000102, + 0x00000000, + 0xc0016900, + 0x000002a8, + 0x00000000, + 0xc0016900, + 0x000002a9, + 0x00000000, + 0xc0016900, + 0x00000103, + 0x00000000, + 0xc0016900, + 0x00000284, + 0x00000000, + 0xc0016900, + 0x00000290, + 0x00000000, + 0xc0016900, + 0x00000285, + 0x00000000, + 0xc0016900, + 0x00000286, + 0x00000000, + 0xc0016900, + 0x00000287, + 0x00000000, + 0xc0016900, + 0x00000288, + 0x00000000, + 0xc0016900, + 0x00000289, + 0x00000000, + 0xc0016900, + 0x0000028a, + 0x00000000, + 0xc0016900, + 0x0000028b, + 0x00000000, + 0xc0016900, + 0x0000028c, + 0x00000000, + 0xc0016900, + 0x0000028d, + 0x00000000, + 0xc0016900, + 0x0000028e, + 0x00000000, + 0xc0016900, + 0x0000028f, + 0x00000000, + 0xc0016900, + 0x000002a1, + 0x00000000, + 0xc0016900, + 0x000002a5, + 0x00000000, + 0xc0016900, + 0x000002ac, + 0x00000000, + 0xc0016900, + 0x000002ad, + 0x00000000, + 0xc0016900, + 0x000002ae, + 0x00000000, + 0xc0016900, + 0x000002c8, + 0x00000000, + 0xc0016900, + 0x00000206, + 0x00000100, + 0xc0016900, + 0x00000204, + 0x00010000, + 0xc0036e00, + 0x00000000, + 0x00000012, + 0x00000000, + 0x00000000, + 0xc0016900, + 0x0000008f, + 0x0000000f, + 0xc0016900, + 0x000001e8, + 0x00000001, + 0xc0016900, + 0x00000202, + 0x00cc0000, + 0xc0016900, + 0x00000205, + 0x00000244, + 0xc0016900, + 0x00000203, + 0x00000210, + 0xc0016900, + 0x000001b1, + 0x00000000, + 0xc0016900, + 0x00000185, + 0x00000000, + 0xc0016900, + 0x000001b3, + 0x00000001, + 0xc0016900, + 0x000001b4, + 0x00000000, + 0xc0016900, + 0x00000191, + 0x00000b00, + 0xc0016900, + 0x000001b5, + 0x00000000, +}; + +const u32 r7xx_default_state[] = +{ + 0xc0012800, + 0x80000000, + 0x80000000, + 0xc0004600, + 0x00000016, + 0xc0016800, + 0x00000010, + 0x00028000, + 0xc0016800, + 0x00000010, + 0x00008000, + 0xc0016800, + 0x00000542, + 0x07000002, + 0xc0016800, + 0x000005c5, + 0x00000000, + 0xc0016800, + 0x00000363, + 0x00004000, + 0xc0016800, + 0x0000060c, + 0x00000000, + 0xc0016800, + 0x0000060e, + 0x00420204, + 0xc0016f00, + 0x00000000, + 0x00000000, + 0xc0016f00, + 0x00000001, + 0x00000000, + 0xc0096900, + 0x0000022a, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0016900, + 0x00000004, + 0x00000000, + 0xc0016900, + 0x0000000a, + 0x00000000, + 0xc0016900, + 0x0000000b, + 0x00000000, + 0xc0016900, + 0x0000010c, + 0x00000000, + 0xc0016900, + 0x0000010d, + 0x00000000, + 0xc0016900, + 0x00000200, + 0x00000000, + 0xc0016900, + 0x00000343, + 0x00000060, + 0xc0016900, + 0x00000344, + 0x00000000, + 0xc0016900, + 0x00000351, + 0x0000aa00, + 0xc0016900, + 0x00000104, + 0x00000000, + 0xc0016900, + 0x0000010e, + 0x00000000, + 0xc0046900, + 0x00000105, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0046900, + 0x0000030c, + 0x01000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0016900, + 0x0000008e, + 0x0000000f, + 0xc0016900, + 0x00000080, + 0x00000000, + 0xc0016900, + 0x00000083, + 0x0000ffff, + 0xc0016900, + 0x00000084, + 0x00000000, + 0xc0016900, + 0x00000085, + 0x20002000, + 0xc0016900, + 0x00000086, + 0x00000000, + 0xc0016900, + 0x00000087, + 0x20002000, + 0xc0016900, + 0x00000088, + 0x00000000, + 0xc0016900, + 0x00000089, + 0x20002000, + 0xc0016900, + 0x0000008a, + 0x00000000, + 0xc0016900, + 0x0000008b, + 0x20002000, + 0xc0016900, + 0x0000008c, + 0xaaaaaaaa, + 0xc0016900, + 0x00000094, + 0x80000000, + 0xc0016900, + 0x00000095, + 0x20002000, + 0xc0026900, + 0x000000b4, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x00000096, + 0x80000000, + 0xc0016900, + 0x00000097, + 0x20002000, + 0xc0026900, + 0x000000b6, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x00000098, + 0x80000000, + 0xc0016900, + 0x00000099, + 0x20002000, + 0xc0026900, + 0x000000b8, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x0000009a, + 0x80000000, + 0xc0016900, + 0x0000009b, + 0x20002000, + 0xc0026900, + 0x000000ba, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x0000009c, + 0x80000000, + 0xc0016900, + 0x0000009d, + 0x20002000, + 0xc0026900, + 0x000000bc, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x0000009e, + 0x80000000, + 0xc0016900, + 0x0000009f, + 0x20002000, + 0xc0026900, + 0x000000be, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a0, + 0x80000000, + 0xc0016900, + 0x000000a1, + 0x20002000, + 0xc0026900, + 0x000000c0, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a2, + 0x80000000, + 0xc0016900, + 0x000000a3, + 0x20002000, + 0xc0026900, + 0x000000c2, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a4, + 0x80000000, + 0xc0016900, + 0x000000a5, + 0x20002000, + 0xc0026900, + 0x000000c4, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a6, + 0x80000000, + 0xc0016900, + 0x000000a7, + 0x20002000, + 0xc0026900, + 0x000000c6, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a8, + 0x80000000, + 0xc0016900, + 0x000000a9, + 0x20002000, + 0xc0026900, + 0x000000c8, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000aa, + 0x80000000, + 0xc0016900, + 0x000000ab, + 0x20002000, + 0xc0026900, + 0x000000ca, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000ac, + 0x80000000, + 0xc0016900, + 0x000000ad, + 0x20002000, + 0xc0026900, + 0x000000cc, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000ae, + 0x80000000, + 0xc0016900, + 0x000000af, + 0x20002000, + 0xc0026900, + 0x000000ce, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000b0, + 0x80000000, + 0xc0016900, + 0x000000b1, + 0x20002000, + 0xc0026900, + 0x000000d0, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000b2, + 0x80000000, + 0xc0016900, + 0x000000b3, + 0x20002000, + 0xc0026900, + 0x000000d2, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x00000293, + 0x00514000, + 0xc0016900, + 0x00000300, + 0x00000000, + 0xc0016900, + 0x00000301, + 0x00000000, + 0xc0016900, + 0x00000312, + 0xffffffff, + 0xc0016900, + 0x00000307, + 0x00000000, + 0xc0016900, + 0x00000308, + 0x00000000, + 0xc0016900, + 0x00000283, + 0x00000000, + 0xc0016900, + 0x00000292, + 0x00000000, + 0xc0066900, + 0x0000010f, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0016900, + 0x00000206, + 0x00000000, + 0xc0016900, + 0x00000207, + 0x00000000, + 0xc0016900, + 0x00000208, + 0x00000000, + 0xc0046900, + 0x00000303, + 0x3f800000, + 0x3f800000, + 0x3f800000, + 0x3f800000, + 0xc0016900, + 0x00000205, + 0x00000004, + 0xc0016900, + 0x00000280, + 0x00000000, + 0xc0016900, + 0x00000281, + 0x00000000, + 0xc0016900, + 0x0000037e, + 0x00000000, + 0xc0016900, + 0x00000382, + 0x00000000, + 0xc0016900, + 0x00000380, + 0x00000000, + 0xc0016900, + 0x00000383, + 0x00000000, + 0xc0016900, + 0x00000381, + 0x00000000, + 0xc0016900, + 0x00000282, + 0x00000008, + 0xc0016900, + 0x00000302, + 0x0000002d, + 0xc0016900, + 0x0000037f, + 0x00000000, + 0xc0016900, + 0x000001b2, + 0x00000001, + 0xc0016900, + 0x000001b6, + 0x00000000, + 0xc0016900, + 0x000001b7, + 0x00000000, + 0xc0016900, + 0x000001b8, + 0x00000000, + 0xc0016900, + 0x000001b9, + 0x00000000, + 0xc0016900, + 0x00000225, + 0x00000000, + 0xc0016900, + 0x00000229, + 0x00000000, + 0xc0016900, + 0x00000237, + 0x00000000, + 0xc0016900, + 0x00000100, + 0x00000800, + 0xc0016900, + 0x00000101, + 0x00000000, + 0xc0016900, + 0x00000102, + 0x00000000, + 0xc0016900, + 0x000002a8, + 0x00000000, + 0xc0016900, + 0x000002a9, + 0x00000000, + 0xc0016900, + 0x00000103, + 0x00000000, + 0xc0016900, + 0x00000284, + 0x00000000, + 0xc0016900, + 0x00000290, + 0x00000000, + 0xc0016900, + 0x00000285, + 0x00000000, + 0xc0016900, + 0x00000286, + 0x00000000, + 0xc0016900, + 0x00000287, + 0x00000000, + 0xc0016900, + 0x00000288, + 0x00000000, + 0xc0016900, + 0x00000289, + 0x00000000, + 0xc0016900, + 0x0000028a, + 0x00000000, + 0xc0016900, + 0x0000028b, + 0x00000000, + 0xc0016900, + 0x0000028c, + 0x00000000, + 0xc0016900, + 0x0000028d, + 0x00000000, + 0xc0016900, + 0x0000028e, + 0x00000000, + 0xc0016900, + 0x0000028f, + 0x00000000, + 0xc0016900, + 0x000002a1, + 0x00000000, + 0xc0016900, + 0x000002a5, + 0x00000000, + 0xc0016900, + 0x000002ac, + 0x00000000, + 0xc0016900, + 0x000002ad, + 0x00000000, + 0xc0016900, + 0x000002ae, + 0x00000000, + 0xc0016900, + 0x000002c8, + 0x00000000, + 0xc0016900, + 0x00000206, + 0x00000100, + 0xc0016900, + 0x00000204, + 0x00010000, + 0xc0036e00, + 0x00000000, + 0x00000012, + 0x00000000, + 0x00000000, + 0xc0016900, + 0x0000008f, + 0x0000000f, + 0xc0016900, + 0x000001e8, + 0x00000001, + 0xc0016900, + 0x00000202, + 0x00cc0000, + 0xc0016900, + 0x00000205, + 0x00000244, + 0xc0016900, + 0x00000203, + 0x00000210, + 0xc0016900, + 0x000001b1, + 0x00000000, + 0xc0016900, + 0x00000185, + 0x00000000, + 0xc0016900, + 0x000001b3, + 0x00000001, + 0xc0016900, + 0x000001b4, + 0x00000000, + 0xc0016900, + 0x00000191, + 0x00000b00, + 0xc0016900, + 0x000001b5, + 0x00000000, +}; + +/* same for r6xx/r7xx */ +const u32 r6xx_vs[] = +{ + 0x00000004, + 0x81000000, + 0x0000203c, + 0x94000b08, + 0x00004000, + 0x14200b1a, + 0x00000000, + 0x00000000, + 0x3c000000, + 0x68cd1000, + 0x00080000, + 0x00000000, +}; + +const u32 r6xx_ps[] = +{ + 0x00000002, + 0x80800000, + 0x00000000, + 0x94200688, + 0x00000010, + 0x000d1000, + 0xb0800000, + 0x00000000, +}; + +const u32 r6xx_ps_size = ARRAY_SIZE(r6xx_ps); +const u32 r6xx_vs_size = ARRAY_SIZE(r6xx_vs); +const u32 r6xx_default_size = ARRAY_SIZE(r6xx_default_state); +const u32 r7xx_default_size = ARRAY_SIZE(r7xx_default_state); diff --git a/drivers/gpu/drm/radeon/r600_blit_shaders.h b/drivers/gpu/drm/radeon/r600_blit_shaders.h new file mode 100644 index 000000000000..fdc3b378cbb0 --- /dev/null +++ b/drivers/gpu/drm/radeon/r600_blit_shaders.h @@ -0,0 +1,14 @@ + +#ifndef R600_BLIT_SHADERS_H +#define R600_BLIT_SHADERS_H + +extern const u32 r6xx_ps[]; +extern const u32 r6xx_vs[]; +extern const u32 r7xx_default_state[]; +extern const u32 r6xx_default_state[]; + + +extern const u32 r6xx_ps_size, r6xx_vs_size; +extern const u32 r6xx_default_size, r7xx_default_size; + +#endif diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c index 8327912de964..6d5a711c2e91 100644 --- a/drivers/gpu/drm/radeon/r600_cp.c +++ b/drivers/gpu/drm/radeon/r600_cp.c @@ -58,6 +58,12 @@ MODULE_FIRMWARE("radeon/RV730_me.bin"); MODULE_FIRMWARE("radeon/RV710_pfp.bin"); MODULE_FIRMWARE("radeon/RV710_me.bin"); + +int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp, + unsigned family, u32 *ib, int *l); +void r600_cs_legacy_init(void); + + # define ATI_PCIGART_PAGE_SIZE 4096 /**< PCI GART page size */ # define ATI_PCIGART_PAGE_MASK (~(ATI_PCIGART_PAGE_SIZE-1)) @@ -1857,6 +1863,8 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, DRM_DEBUG("\n"); + mutex_init(&dev_priv->cs_mutex); + r600_cs_legacy_init(); /* if we require new memory map but we don't have it fail */ if ((dev_priv->flags & RADEON_NEW_MEMMAP) && !dev_priv->new_memmap) { DRM_ERROR("Cannot initialise DRM on this card\nThis card requires a new X.org DDX for 3D\n"); @@ -1888,7 +1896,7 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, /* Enable vblank on CRTC1 for older X servers */ dev_priv->vblank_crtc = DRM_RADEON_VBLANK_CRTC1; - + dev_priv->do_boxes = 0; dev_priv->cp_mode = init->cp_mode; /* We don't support anything other than bus-mastering ring mode, @@ -1974,11 +1982,11 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, } else #endif { - dev_priv->cp_ring->handle = (void *)dev_priv->cp_ring->offset; + dev_priv->cp_ring->handle = (void *)(unsigned long)dev_priv->cp_ring->offset; dev_priv->ring_rptr->handle = - (void *)dev_priv->ring_rptr->offset; + (void *)(unsigned long)dev_priv->ring_rptr->offset; dev->agp_buffer_map->handle = - (void *)dev->agp_buffer_map->offset; + (void *)(unsigned long)dev->agp_buffer_map->offset; DRM_DEBUG("dev_priv->cp_ring->handle %p\n", dev_priv->cp_ring->handle); @@ -2282,3 +2290,239 @@ int r600_cp_dispatch_indirect(struct drm_device *dev, return 0; } + +void r600_cp_dispatch_swap(struct drm_device *dev, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + struct drm_master *master = file_priv->master; + struct drm_radeon_master_private *master_priv = master->driver_priv; + drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv; + int nbox = sarea_priv->nbox; + struct drm_clip_rect *pbox = sarea_priv->boxes; + int i, cpp, src_pitch, dst_pitch; + uint64_t src, dst; + RING_LOCALS; + DRM_DEBUG("\n"); + + if (dev_priv->color_fmt == RADEON_COLOR_FORMAT_ARGB8888) + cpp = 4; + else + cpp = 2; + + if (sarea_priv->pfCurrentPage == 0) { + src_pitch = dev_priv->back_pitch; + dst_pitch = dev_priv->front_pitch; + src = dev_priv->back_offset + dev_priv->fb_location; + dst = dev_priv->front_offset + dev_priv->fb_location; + } else { + src_pitch = dev_priv->front_pitch; + dst_pitch = dev_priv->back_pitch; + src = dev_priv->front_offset + dev_priv->fb_location; + dst = dev_priv->back_offset + dev_priv->fb_location; + } + + if (r600_prepare_blit_copy(dev, file_priv)) { + DRM_ERROR("unable to allocate vertex buffer for swap buffer\n"); + return; + } + for (i = 0; i < nbox; i++) { + int x = pbox[i].x1; + int y = pbox[i].y1; + int w = pbox[i].x2 - x; + int h = pbox[i].y2 - y; + + DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h); + + r600_blit_swap(dev, + src, dst, + x, y, x, y, w, h, + src_pitch, dst_pitch, cpp); + } + r600_done_blit_copy(dev); + + /* Increment the frame counter. The client-side 3D driver must + * throttle the framerate by waiting for this value before + * performing the swapbuffer ioctl. + */ + sarea_priv->last_frame++; + + BEGIN_RING(3); + R600_FRAME_AGE(sarea_priv->last_frame); + ADVANCE_RING(); +} + +int r600_cp_dispatch_texture(struct drm_device *dev, + struct drm_file *file_priv, + drm_radeon_texture_t *tex, + drm_radeon_tex_image_t *image) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + struct drm_buf *buf; + u32 *buffer; + const u8 __user *data; + int size, pass_size; + u64 src_offset, dst_offset; + + if (!radeon_check_offset(dev_priv, tex->offset)) { + DRM_ERROR("Invalid destination offset\n"); + return -EINVAL; + } + + /* this might fail for zero-sized uploads - are those illegal? */ + if (!radeon_check_offset(dev_priv, tex->offset + tex->height * tex->pitch - 1)) { + DRM_ERROR("Invalid final destination offset\n"); + return -EINVAL; + } + + size = tex->height * tex->pitch; + + if (size == 0) + return 0; + + dst_offset = tex->offset; + + if (r600_prepare_blit_copy(dev, file_priv)) { + DRM_ERROR("unable to allocate vertex buffer for swap buffer\n"); + return -EAGAIN; + } + do { + data = (const u8 __user *)image->data; + pass_size = size; + + buf = radeon_freelist_get(dev); + if (!buf) { + DRM_DEBUG("EAGAIN\n"); + if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image))) + return -EFAULT; + return -EAGAIN; + } + + if (pass_size > buf->total) + pass_size = buf->total; + + /* Dispatch the indirect buffer. + */ + buffer = + (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset); + + if (DRM_COPY_FROM_USER(buffer, data, pass_size)) { + DRM_ERROR("EFAULT on pad, %d bytes\n", pass_size); + return -EFAULT; + } + + buf->file_priv = file_priv; + buf->used = pass_size; + src_offset = dev_priv->gart_buffers_offset + buf->offset; + + r600_blit_copy(dev, src_offset, dst_offset, pass_size); + + radeon_cp_discard_buffer(dev, file_priv->master, buf); + + /* Update the input parameters for next time */ + image->data = (const u8 __user *)image->data + pass_size; + dst_offset += pass_size; + size -= pass_size; + } while (size > 0); + r600_done_blit_copy(dev); + + return 0; +} + +/* + * Legacy cs ioctl + */ +static u32 radeon_cs_id_get(struct drm_radeon_private *radeon) +{ + /* FIXME: check if wrap affect last reported wrap & sequence */ + radeon->cs_id_scnt = (radeon->cs_id_scnt + 1) & 0x00FFFFFF; + if (!radeon->cs_id_scnt) { + /* increment wrap counter */ + radeon->cs_id_wcnt += 0x01000000; + /* valid sequence counter start at 1 */ + radeon->cs_id_scnt = 1; + } + return (radeon->cs_id_scnt | radeon->cs_id_wcnt); +} + +static void r600_cs_id_emit(drm_radeon_private_t *dev_priv, u32 *id) +{ + RING_LOCALS; + + *id = radeon_cs_id_get(dev_priv); + + /* SCRATCH 2 */ + BEGIN_RING(3); + R600_CLEAR_AGE(*id); + ADVANCE_RING(); + COMMIT_RING(); +} + +static int r600_ib_get(struct drm_device *dev, + struct drm_file *fpriv, + struct drm_buf **buffer) +{ + struct drm_buf *buf; + + *buffer = NULL; + buf = radeon_freelist_get(dev); + if (!buf) { + return -EBUSY; + } + buf->file_priv = fpriv; + *buffer = buf; + return 0; +} + +static void r600_ib_free(struct drm_device *dev, struct drm_buf *buf, + struct drm_file *fpriv, int l, int r) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + + if (buf) { + if (!r) + r600_cp_dispatch_indirect(dev, buf, 0, l * 4); + radeon_cp_discard_buffer(dev, fpriv->master, buf); + COMMIT_RING(); + } +} + +int r600_cs_legacy_ioctl(struct drm_device *dev, void *data, struct drm_file *fpriv) +{ + struct drm_radeon_private *dev_priv = dev->dev_private; + struct drm_radeon_cs *cs = data; + struct drm_buf *buf; + unsigned family; + int l, r = 0; + u32 *ib, cs_id = 0; + + if (dev_priv == NULL) { + DRM_ERROR("called with no initialization\n"); + return -EINVAL; + } + family = dev_priv->flags & RADEON_FAMILY_MASK; + if (family < CHIP_R600) { + DRM_ERROR("cs ioctl valid only for R6XX & R7XX in legacy mode\n"); + return -EINVAL; + } + mutex_lock(&dev_priv->cs_mutex); + /* get ib */ + r = r600_ib_get(dev, fpriv, &buf); + if (r) { + DRM_ERROR("ib_get failed\n"); + goto out; + } + ib = dev->agp_buffer_map->handle + buf->offset; + /* now parse command stream */ + r = r600_cs_legacy(dev, data, fpriv, family, ib, &l); + if (r) { + goto out; + } + +out: + r600_ib_free(dev, buf, fpriv, l, r); + /* emit cs id sequence */ + r600_cs_id_emit(dev_priv, &cs_id); + cs->cs_id = cs_id; + mutex_unlock(&dev_priv->cs_mutex); + return r; +} diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c new file mode 100644 index 000000000000..39bf6349351b --- /dev/null +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -0,0 +1,658 @@ +/* + * Copyright 2008 Advanced Micro Devices, Inc. + * Copyright 2008 Red Hat Inc. + * Copyright 2009 Jerome Glisse. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + * Alex Deucher + * Jerome Glisse + */ +#include "drmP.h" +#include "radeon.h" +#include "radeon_share.h" +#include "r600d.h" +#include "avivod.h" + +static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p, + struct radeon_cs_reloc **cs_reloc); +static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p, + struct radeon_cs_reloc **cs_reloc); +typedef int (*next_reloc_t)(struct radeon_cs_parser*, struct radeon_cs_reloc**); +static next_reloc_t r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_mm; + +/** + * r600_cs_packet_parse() - parse cp packet and point ib index to next packet + * @parser: parser structure holding parsing context. + * @pkt: where to store packet informations + * + * Assume that chunk_ib_index is properly set. Will return -EINVAL + * if packet is bigger than remaining ib size. or if packets is unknown. + **/ +int r600_cs_packet_parse(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt, + unsigned idx) +{ + struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; + uint32_t header; + + if (idx >= ib_chunk->length_dw) { + DRM_ERROR("Can not parse packet at %d after CS end %d !\n", + idx, ib_chunk->length_dw); + return -EINVAL; + } + header = ib_chunk->kdata[idx]; + pkt->idx = idx; + pkt->type = CP_PACKET_GET_TYPE(header); + pkt->count = CP_PACKET_GET_COUNT(header); + pkt->one_reg_wr = 0; + switch (pkt->type) { + case PACKET_TYPE0: + pkt->reg = CP_PACKET0_GET_REG(header); + break; + case PACKET_TYPE3: + pkt->opcode = CP_PACKET3_GET_OPCODE(header); + break; + case PACKET_TYPE2: + pkt->count = -1; + break; + default: + DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx); + return -EINVAL; + } + if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) { + DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n", + pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw); + return -EINVAL; + } + return 0; +} + +/** + * r600_cs_packet_next_reloc_mm() - parse next packet which should be reloc packet3 + * @parser: parser structure holding parsing context. + * @data: pointer to relocation data + * @offset_start: starting offset + * @offset_mask: offset mask (to align start offset on) + * @reloc: reloc informations + * + * Check next packet is relocation packet3, do bo validation and compute + * GPU offset using the provided start. + **/ +static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p, + struct radeon_cs_reloc **cs_reloc) +{ + struct radeon_cs_chunk *ib_chunk; + struct radeon_cs_chunk *relocs_chunk; + struct radeon_cs_packet p3reloc; + unsigned idx; + int r; + + if (p->chunk_relocs_idx == -1) { + DRM_ERROR("No relocation chunk !\n"); + return -EINVAL; + } + *cs_reloc = NULL; + ib_chunk = &p->chunks[p->chunk_ib_idx]; + relocs_chunk = &p->chunks[p->chunk_relocs_idx]; + r = r600_cs_packet_parse(p, &p3reloc, p->idx); + if (r) { + return r; + } + p->idx += p3reloc.count + 2; + if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) { + DRM_ERROR("No packet3 for relocation for packet at %d.\n", + p3reloc.idx); + return -EINVAL; + } + idx = ib_chunk->kdata[p3reloc.idx + 1]; + if (idx >= relocs_chunk->length_dw) { + DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", + idx, relocs_chunk->length_dw); + return -EINVAL; + } + /* FIXME: we assume reloc size is 4 dwords */ + *cs_reloc = p->relocs_ptr[(idx / 4)]; + return 0; +} + +/** + * r600_cs_packet_next_reloc_nomm() - parse next packet which should be reloc packet3 + * @parser: parser structure holding parsing context. + * @data: pointer to relocation data + * @offset_start: starting offset + * @offset_mask: offset mask (to align start offset on) + * @reloc: reloc informations + * + * Check next packet is relocation packet3, do bo validation and compute + * GPU offset using the provided start. + **/ +static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p, + struct radeon_cs_reloc **cs_reloc) +{ + struct radeon_cs_chunk *ib_chunk; + struct radeon_cs_chunk *relocs_chunk; + struct radeon_cs_packet p3reloc; + unsigned idx; + int r; + + if (p->chunk_relocs_idx == -1) { + DRM_ERROR("No relocation chunk !\n"); + return -EINVAL; + } + *cs_reloc = NULL; + ib_chunk = &p->chunks[p->chunk_ib_idx]; + relocs_chunk = &p->chunks[p->chunk_relocs_idx]; + r = r600_cs_packet_parse(p, &p3reloc, p->idx); + if (r) { + return r; + } + p->idx += p3reloc.count + 2; + if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) { + DRM_ERROR("No packet3 for relocation for packet at %d.\n", + p3reloc.idx); + return -EINVAL; + } + idx = ib_chunk->kdata[p3reloc.idx + 1]; + if (idx >= relocs_chunk->length_dw) { + DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", + idx, relocs_chunk->length_dw); + return -EINVAL; + } + *cs_reloc = &p->relocs[0]; + (*cs_reloc)->lobj.gpu_offset = (u64)relocs_chunk->kdata[idx + 3] << 32; + (*cs_reloc)->lobj.gpu_offset |= relocs_chunk->kdata[idx + 0]; + return 0; +} + +static int r600_packet0_check(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt, + unsigned idx, unsigned reg) +{ + switch (reg) { + case AVIVO_D1MODE_VLINE_START_END: + case AVIVO_D2MODE_VLINE_START_END: + break; + default: + printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n", + reg, idx); + return -EINVAL; + } + return 0; +} + +static int r600_cs_parse_packet0(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt) +{ + unsigned reg, i; + unsigned idx; + int r; + + idx = pkt->idx + 1; + reg = pkt->reg; + for (i = 0; i <= pkt->count; i++, idx++, reg += 4) { + r = r600_packet0_check(p, pkt, idx, reg); + if (r) { + return r; + } + } + return 0; +} + +static int r600_packet3_check(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt) +{ + struct radeon_cs_chunk *ib_chunk; + struct radeon_cs_reloc *reloc; + volatile u32 *ib; + unsigned idx; + unsigned i; + unsigned start_reg, end_reg, reg; + int r; + + ib = p->ib->ptr; + ib_chunk = &p->chunks[p->chunk_ib_idx]; + idx = pkt->idx + 1; + switch (pkt->opcode) { + case PACKET3_START_3D_CMDBUF: + if (p->family >= CHIP_RV770 || pkt->count) { + DRM_ERROR("bad START_3D\n"); + return -EINVAL; + } + break; + case PACKET3_CONTEXT_CONTROL: + if (pkt->count != 1) { + DRM_ERROR("bad CONTEXT_CONTROL\n"); + return -EINVAL; + } + break; + case PACKET3_INDEX_TYPE: + case PACKET3_NUM_INSTANCES: + if (pkt->count) { + DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES\n"); + return -EINVAL; + } + break; + case PACKET3_DRAW_INDEX: + if (pkt->count != 3) { + DRM_ERROR("bad DRAW_INDEX\n"); + return -EINVAL; + } + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad DRAW_INDEX\n"); + return -EINVAL; + } + ib[idx+0] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); + ib[idx+1] = upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + break; + case PACKET3_DRAW_INDEX_AUTO: + if (pkt->count != 1) { + DRM_ERROR("bad DRAW_INDEX_AUTO\n"); + return -EINVAL; + } + break; + case PACKET3_DRAW_INDEX_IMMD_BE: + case PACKET3_DRAW_INDEX_IMMD: + if (pkt->count < 2) { + DRM_ERROR("bad DRAW_INDEX_IMMD\n"); + return -EINVAL; + } + break; + case PACKET3_WAIT_REG_MEM: + if (pkt->count != 5) { + DRM_ERROR("bad WAIT_REG_MEM\n"); + return -EINVAL; + } + /* bit 4 is reg (0) or mem (1) */ + if (ib_chunk->kdata[idx+0] & 0x10) { + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad WAIT_REG_MEM\n"); + return -EINVAL; + } + ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); + ib[idx+2] = upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + } + break; + case PACKET3_SURFACE_SYNC: + if (pkt->count != 3) { + DRM_ERROR("bad SURFACE_SYNC\n"); + return -EINVAL; + } + /* 0xffffffff/0x0 is flush all cache flag */ + if (ib_chunk->kdata[idx+1] != 0xffffffff || + ib_chunk->kdata[idx+2] != 0) { + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad SURFACE_SYNC\n"); + return -EINVAL; + } + ib[idx+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + } + break; + case PACKET3_EVENT_WRITE: + if (pkt->count != 2 && pkt->count != 0) { + DRM_ERROR("bad EVENT_WRITE\n"); + return -EINVAL; + } + if (pkt->count) { + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad EVENT_WRITE\n"); + return -EINVAL; + } + ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); + ib[idx+2] |= upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + } + break; + case PACKET3_EVENT_WRITE_EOP: + if (pkt->count != 4) { + DRM_ERROR("bad EVENT_WRITE_EOP\n"); + return -EINVAL; + } + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad EVENT_WRITE\n"); + return -EINVAL; + } + ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); + ib[idx+2] |= upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + break; + case PACKET3_SET_CONFIG_REG: + start_reg = (ib[idx+0] << 2) + PACKET3_SET_CONFIG_REG_OFFSET; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_CONFIG_REG_OFFSET) || + (start_reg >= PACKET3_SET_CONFIG_REG_END) || + (end_reg >= PACKET3_SET_CONFIG_REG_END)) { + DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n"); + return -EINVAL; + } + for (i = 0; i < pkt->count; i++) { + reg = start_reg + (4 * i); + switch (reg) { + case CP_COHER_BASE: + /* use PACKET3_SURFACE_SYNC */ + return -EINVAL; + default: + break; + } + } + break; + case PACKET3_SET_CONTEXT_REG: + start_reg = (ib[idx+0] << 2) + PACKET3_SET_CONTEXT_REG_OFFSET; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_CONTEXT_REG_OFFSET) || + (start_reg >= PACKET3_SET_CONTEXT_REG_END) || + (end_reg >= PACKET3_SET_CONTEXT_REG_END)) { + DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n"); + return -EINVAL; + } + for (i = 0; i < pkt->count; i++) { + reg = start_reg + (4 * i); + switch (reg) { + case DB_DEPTH_BASE: + case CB_COLOR0_BASE: + case CB_COLOR1_BASE: + case CB_COLOR2_BASE: + case CB_COLOR3_BASE: + case CB_COLOR4_BASE: + case CB_COLOR5_BASE: + case CB_COLOR6_BASE: + case CB_COLOR7_BASE: + case SQ_PGM_START_FS: + case SQ_PGM_START_ES: + case SQ_PGM_START_VS: + case SQ_PGM_START_GS: + case SQ_PGM_START_PS: + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad SET_CONTEXT_REG " + "0x%04X\n", reg); + return -EINVAL; + } + ib[idx+1+i] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + break; + case VGT_DMA_BASE: + case VGT_DMA_BASE_HI: + /* These should be handled by DRAW_INDEX packet 3 */ + case VGT_STRMOUT_BASE_OFFSET_0: + case VGT_STRMOUT_BASE_OFFSET_1: + case VGT_STRMOUT_BASE_OFFSET_2: + case VGT_STRMOUT_BASE_OFFSET_3: + case VGT_STRMOUT_BASE_OFFSET_HI_0: + case VGT_STRMOUT_BASE_OFFSET_HI_1: + case VGT_STRMOUT_BASE_OFFSET_HI_2: + case VGT_STRMOUT_BASE_OFFSET_HI_3: + case VGT_STRMOUT_BUFFER_BASE_0: + case VGT_STRMOUT_BUFFER_BASE_1: + case VGT_STRMOUT_BUFFER_BASE_2: + case VGT_STRMOUT_BUFFER_BASE_3: + case VGT_STRMOUT_BUFFER_OFFSET_0: + case VGT_STRMOUT_BUFFER_OFFSET_1: + case VGT_STRMOUT_BUFFER_OFFSET_2: + case VGT_STRMOUT_BUFFER_OFFSET_3: + /* These should be handled by STRMOUT_BUFFER packet 3 */ + DRM_ERROR("bad context reg: 0x%08x\n", reg); + return -EINVAL; + default: + break; + } + } + break; + case PACKET3_SET_RESOURCE: + if (pkt->count % 7) { + DRM_ERROR("bad SET_RESOURCE\n"); + return -EINVAL; + } + start_reg = (ib[idx+0] << 2) + PACKET3_SET_RESOURCE_OFFSET; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_RESOURCE_OFFSET) || + (start_reg >= PACKET3_SET_RESOURCE_END) || + (end_reg >= PACKET3_SET_RESOURCE_END)) { + DRM_ERROR("bad SET_RESOURCE\n"); + return -EINVAL; + } + for (i = 0; i < (pkt->count / 7); i++) { + switch (G__SQ_VTX_CONSTANT_TYPE(ib[idx+(i*7)+6+1])) { + case SQ_TEX_VTX_VALID_TEXTURE: + /* tex base */ + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad SET_RESOURCE\n"); + return -EINVAL; + } + ib[idx+1+(i*7)+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + /* tex mip base */ + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad SET_RESOURCE\n"); + return -EINVAL; + } + ib[idx+1+(i*7)+3] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + break; + case SQ_TEX_VTX_VALID_BUFFER: + /* vtx base */ + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad SET_RESOURCE\n"); + return -EINVAL; + } + ib[idx+1+(i*7)+0] += (u32)((reloc->lobj.gpu_offset) & 0xffffffff); + ib[idx+1+(i*7)+2] |= upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + break; + case SQ_TEX_VTX_INVALID_TEXTURE: + case SQ_TEX_VTX_INVALID_BUFFER: + default: + DRM_ERROR("bad SET_RESOURCE\n"); + return -EINVAL; + } + } + break; + case PACKET3_SET_ALU_CONST: + start_reg = (ib[idx+0] << 2) + PACKET3_SET_ALU_CONST_OFFSET; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_ALU_CONST_OFFSET) || + (start_reg >= PACKET3_SET_ALU_CONST_END) || + (end_reg >= PACKET3_SET_ALU_CONST_END)) { + DRM_ERROR("bad SET_ALU_CONST\n"); + return -EINVAL; + } + break; + case PACKET3_SET_BOOL_CONST: + start_reg = (ib[idx+0] << 2) + PACKET3_SET_BOOL_CONST_OFFSET; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_BOOL_CONST_OFFSET) || + (start_reg >= PACKET3_SET_BOOL_CONST_END) || + (end_reg >= PACKET3_SET_BOOL_CONST_END)) { + DRM_ERROR("bad SET_BOOL_CONST\n"); + return -EINVAL; + } + break; + case PACKET3_SET_LOOP_CONST: + start_reg = (ib[idx+0] << 2) + PACKET3_SET_LOOP_CONST_OFFSET; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_LOOP_CONST_OFFSET) || + (start_reg >= PACKET3_SET_LOOP_CONST_END) || + (end_reg >= PACKET3_SET_LOOP_CONST_END)) { + DRM_ERROR("bad SET_LOOP_CONST\n"); + return -EINVAL; + } + break; + case PACKET3_SET_CTL_CONST: + start_reg = (ib[idx+0] << 2) + PACKET3_SET_CTL_CONST_OFFSET; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_CTL_CONST_OFFSET) || + (start_reg >= PACKET3_SET_CTL_CONST_END) || + (end_reg >= PACKET3_SET_CTL_CONST_END)) { + DRM_ERROR("bad SET_CTL_CONST\n"); + return -EINVAL; + } + break; + case PACKET3_SET_SAMPLER: + if (pkt->count % 3) { + DRM_ERROR("bad SET_SAMPLER\n"); + return -EINVAL; + } + start_reg = (ib[idx+0] << 2) + PACKET3_SET_SAMPLER_OFFSET; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_SAMPLER_OFFSET) || + (start_reg >= PACKET3_SET_SAMPLER_END) || + (end_reg >= PACKET3_SET_SAMPLER_END)) { + DRM_ERROR("bad SET_SAMPLER\n"); + return -EINVAL; + } + break; + case PACKET3_SURFACE_BASE_UPDATE: + if (p->family >= CHIP_RV770 || p->family == CHIP_R600) { + DRM_ERROR("bad SURFACE_BASE_UPDATE\n"); + return -EINVAL; + } + if (pkt->count) { + DRM_ERROR("bad SURFACE_BASE_UPDATE\n"); + return -EINVAL; + } + break; + case PACKET3_NOP: + break; + default: + DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode); + return -EINVAL; + } + return 0; +} + +int r600_cs_parse(struct radeon_cs_parser *p) +{ + struct radeon_cs_packet pkt; + int r; + + do { + r = r600_cs_packet_parse(p, &pkt, p->idx); + if (r) { + return r; + } + p->idx += pkt.count + 2; + switch (pkt.type) { + case PACKET_TYPE0: + r = r600_cs_parse_packet0(p, &pkt); + break; + case PACKET_TYPE2: + break; + case PACKET_TYPE3: + r = r600_packet3_check(p, &pkt); + break; + default: + DRM_ERROR("Unknown packet type %d !\n", pkt.type); + return -EINVAL; + } + if (r) { + return r; + } + } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); +#if 0 + for (r = 0; r < p->ib->length_dw; r++) { + printk(KERN_INFO "%05d 0x%08X\n", r, p->ib->ptr[r]); + mdelay(1); + } +#endif + return 0; +} + +static int r600_cs_parser_relocs_legacy(struct radeon_cs_parser *p) +{ + if (p->chunk_relocs_idx == -1) { + return 0; + } + p->relocs = kcalloc(1, sizeof(struct radeon_cs_reloc), GFP_KERNEL); + if (p->relocs == NULL) { + return -ENOMEM; + } + return 0; +} + +/** + * cs_parser_fini() - clean parser states + * @parser: parser structure holding parsing context. + * @error: error number + * + * If error is set than unvalidate buffer, otherwise just free memory + * used by parsing context. + **/ +static void r600_cs_parser_fini(struct radeon_cs_parser *parser, int error) +{ + unsigned i; + + kfree(parser->relocs); + for (i = 0; i < parser->nchunks; i++) { + kfree(parser->chunks[i].kdata); + } + kfree(parser->chunks); + kfree(parser->chunks_array); +} + +int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp, + unsigned family, u32 *ib, int *l) +{ + struct radeon_cs_parser parser; + struct radeon_cs_chunk *ib_chunk; + struct radeon_ib fake_ib; + int r; + + /* initialize parser */ + memset(&parser, 0, sizeof(struct radeon_cs_parser)); + parser.filp = filp; + parser.rdev = NULL; + parser.family = family; + parser.ib = &fake_ib; + fake_ib.ptr = ib; + r = radeon_cs_parser_init(&parser, data); + if (r) { + DRM_ERROR("Failed to initialize parser !\n"); + r600_cs_parser_fini(&parser, r); + return r; + } + r = r600_cs_parser_relocs_legacy(&parser); + if (r) { + DRM_ERROR("Failed to parse relocation !\n"); + r600_cs_parser_fini(&parser, r); + return r; + } + /* Copy the packet into the IB, the parser will read from the + * input memory (cached) and write to the IB (which can be + * uncached). */ + ib_chunk = &parser.chunks[parser.chunk_ib_idx]; + parser.ib->length_dw = ib_chunk->length_dw; + memcpy((void *)parser.ib->ptr, ib_chunk->kdata, ib_chunk->length_dw*4); + *l = parser.ib->length_dw; + r = r600_cs_parse(&parser); + if (r) { + DRM_ERROR("Invalid command stream !\n"); + r600_cs_parser_fini(&parser, r); + return r; + } + r600_cs_parser_fini(&parser, r); + return r; +} + +void r600_cs_legacy_init(void) +{ + r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm; +} diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h new file mode 100644 index 000000000000..723295f59281 --- /dev/null +++ b/drivers/gpu/drm/radeon/r600d.h @@ -0,0 +1,661 @@ +/* + * Copyright 2009 Advanced Micro Devices, Inc. + * Copyright 2009 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + * Alex Deucher + * Jerome Glisse + */ +#ifndef R600D_H +#define R600D_H + +#define CP_PACKET2 0x80000000 +#define PACKET2_PAD_SHIFT 0 +#define PACKET2_PAD_MASK (0x3fffffff << 0) + +#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) + +#define R6XX_MAX_SH_GPRS 256 +#define R6XX_MAX_TEMP_GPRS 16 +#define R6XX_MAX_SH_THREADS 256 +#define R6XX_MAX_SH_STACK_ENTRIES 4096 +#define R6XX_MAX_BACKENDS 8 +#define R6XX_MAX_BACKENDS_MASK 0xff +#define R6XX_MAX_SIMDS 8 +#define R6XX_MAX_SIMDS_MASK 0xff +#define R6XX_MAX_PIPES 8 +#define R6XX_MAX_PIPES_MASK 0xff + +/* PTE flags */ +#define PTE_VALID (1 << 0) +#define PTE_SYSTEM (1 << 1) +#define PTE_SNOOPED (1 << 2) +#define PTE_READABLE (1 << 5) +#define PTE_WRITEABLE (1 << 6) + +/* Registers */ +#define ARB_POP 0x2418 +#define ENABLE_TC128 (1 << 30) +#define ARB_GDEC_RD_CNTL 0x246C + +#define CC_GC_SHADER_PIPE_CONFIG 0x8950 +#define CC_RB_BACKEND_DISABLE 0x98F4 +#define BACKEND_DISABLE(x) ((x) << 16) + +#define CB_COLOR0_BASE 0x28040 +#define CB_COLOR1_BASE 0x28044 +#define CB_COLOR2_BASE 0x28048 +#define CB_COLOR3_BASE 0x2804C +#define CB_COLOR4_BASE 0x28050 +#define CB_COLOR5_BASE 0x28054 +#define CB_COLOR6_BASE 0x28058 +#define CB_COLOR7_BASE 0x2805C +#define CB_COLOR7_FRAG 0x280FC + +#define CB_COLOR0_SIZE 0x28060 +#define CB_COLOR0_VIEW 0x28080 +#define CB_COLOR0_INFO 0x280a0 +#define CB_COLOR0_TILE 0x280c0 +#define CB_COLOR0_FRAG 0x280e0 +#define CB_COLOR0_MASK 0x28100 + +#define CONFIG_MEMSIZE 0x5428 +#define CP_STAT 0x8680 +#define CP_COHER_BASE 0x85F8 +#define CP_DEBUG 0xC1FC +#define R_0086D8_CP_ME_CNTL 0x86D8 +#define S_0086D8_CP_ME_HALT(x) (((x) & 1)<<28) +#define C_0086D8_CP_ME_HALT(x) ((x) & 0xEFFFFFFF) +#define CP_ME_RAM_DATA 0xC160 +#define CP_ME_RAM_RADDR 0xC158 +#define CP_ME_RAM_WADDR 0xC15C +#define CP_MEQ_THRESHOLDS 0x8764 +#define MEQ_END(x) ((x) << 16) +#define ROQ_END(x) ((x) << 24) +#define CP_PERFMON_CNTL 0x87FC +#define CP_PFP_UCODE_ADDR 0xC150 +#define CP_PFP_UCODE_DATA 0xC154 +#define CP_QUEUE_THRESHOLDS 0x8760 +#define ROQ_IB1_START(x) ((x) << 0) +#define ROQ_IB2_START(x) ((x) << 8) +#define CP_RB_BASE 0xC100 +#define CP_RB_CNTL 0xC104 +#define RB_BUFSZ(x) ((x)<<0) +#define RB_BLKSZ(x) ((x)<<8) +#define RB_NO_UPDATE (1<<27) +#define RB_RPTR_WR_ENA (1<<31) +#define BUF_SWAP_32BIT (2 << 16) +#define CP_RB_RPTR 0x8700 +#define CP_RB_RPTR_ADDR 0xC10C +#define CP_RB_RPTR_ADDR_HI 0xC110 +#define CP_RB_RPTR_WR 0xC108 +#define CP_RB_WPTR 0xC114 +#define CP_RB_WPTR_ADDR 0xC118 +#define CP_RB_WPTR_ADDR_HI 0xC11C +#define CP_RB_WPTR_DELAY 0x8704 +#define CP_ROQ_IB1_STAT 0x8784 +#define CP_ROQ_IB2_STAT 0x8788 +#define CP_SEM_WAIT_TIMER 0x85BC + +#define DB_DEBUG 0x9830 +#define PREZ_MUST_WAIT_FOR_POSTZ_DONE (1 << 31) +#define DB_DEPTH_BASE 0x2800C +#define DB_WATERMARKS 0x9838 +#define DEPTH_FREE(x) ((x) << 0) +#define DEPTH_FLUSH(x) ((x) << 5) +#define DEPTH_PENDING_FREE(x) ((x) << 15) +#define DEPTH_CACHELINE_FREE(x) ((x) << 20) + +#define DCP_TILING_CONFIG 0x6CA0 +#define PIPE_TILING(x) ((x) << 1) +#define BANK_TILING(x) ((x) << 4) +#define GROUP_SIZE(x) ((x) << 6) +#define ROW_TILING(x) ((x) << 8) +#define BANK_SWAPS(x) ((x) << 11) +#define SAMPLE_SPLIT(x) ((x) << 14) +#define BACKEND_MAP(x) ((x) << 16) + +#define GB_TILING_CONFIG 0x98F0 + +#define GC_USER_SHADER_PIPE_CONFIG 0x8954 +#define INACTIVE_QD_PIPES(x) ((x) << 8) +#define INACTIVE_QD_PIPES_MASK 0x0000FF00 +#define INACTIVE_SIMDS(x) ((x) << 16) +#define INACTIVE_SIMDS_MASK 0x00FF0000 + +#define SQ_CONFIG 0x8c00 +# define VC_ENABLE (1 << 0) +# define EXPORT_SRC_C (1 << 1) +# define DX9_CONSTS (1 << 2) +# define ALU_INST_PREFER_VECTOR (1 << 3) +# define DX10_CLAMP (1 << 4) +# define CLAUSE_SEQ_PRIO(x) ((x) << 8) +# define PS_PRIO(x) ((x) << 24) +# define VS_PRIO(x) ((x) << 26) +# define GS_PRIO(x) ((x) << 28) +# define ES_PRIO(x) ((x) << 30) +#define SQ_GPR_RESOURCE_MGMT_1 0x8c04 +# define NUM_PS_GPRS(x) ((x) << 0) +# define NUM_VS_GPRS(x) ((x) << 16) +# define NUM_CLAUSE_TEMP_GPRS(x) ((x) << 28) +#define SQ_GPR_RESOURCE_MGMT_2 0x8c08 +# define NUM_GS_GPRS(x) ((x) << 0) +# define NUM_ES_GPRS(x) ((x) << 16) +#define SQ_THREAD_RESOURCE_MGMT 0x8c0c +# define NUM_PS_THREADS(x) ((x) << 0) +# define NUM_VS_THREADS(x) ((x) << 8) +# define NUM_GS_THREADS(x) ((x) << 16) +# define NUM_ES_THREADS(x) ((x) << 24) +#define SQ_STACK_RESOURCE_MGMT_1 0x8c10 +# define NUM_PS_STACK_ENTRIES(x) ((x) << 0) +# define NUM_VS_STACK_ENTRIES(x) ((x) << 16) +#define SQ_STACK_RESOURCE_MGMT_2 0x8c14 +# define NUM_GS_STACK_ENTRIES(x) ((x) << 0) +# define NUM_ES_STACK_ENTRIES(x) ((x) << 16) + +#define GRBM_CNTL 0x8000 +# define GRBM_READ_TIMEOUT(x) ((x) << 0) +#define GRBM_STATUS 0x8010 +#define CMDFIFO_AVAIL_MASK 0x0000001F +#define GUI_ACTIVE (1<<31) +#define GRBM_STATUS2 0x8014 +#define GRBM_SOFT_RESET 0x8020 +#define SOFT_RESET_CP (1<<0) + +#define HDP_HOST_PATH_CNTL 0x2C00 +#define HDP_NONSURFACE_BASE 0x2C04 +#define HDP_NONSURFACE_INFO 0x2C08 +#define HDP_NONSURFACE_SIZE 0x2C0C +#define HDP_REG_COHERENCY_FLUSH_CNTL 0x54A0 +#define HDP_TILING_CONFIG 0x2F3C + +#define MC_VM_AGP_TOP 0x2184 +#define MC_VM_AGP_BOT 0x2188 +#define MC_VM_AGP_BASE 0x218C +#define MC_VM_FB_LOCATION 0x2180 +#define MC_VM_L1_TLB_MCD_RD_A_CNTL 0x219C +#define ENABLE_L1_TLB (1 << 0) +#define ENABLE_L1_FRAGMENT_PROCESSING (1 << 1) +#define ENABLE_L1_STRICT_ORDERING (1 << 2) +#define SYSTEM_ACCESS_MODE_MASK 0x000000C0 +#define SYSTEM_ACCESS_MODE_SHIFT 6 +#define SYSTEM_ACCESS_MODE_PA_ONLY (0 << 6) +#define SYSTEM_ACCESS_MODE_USE_SYS_MAP (1 << 6) +#define SYSTEM_ACCESS_MODE_IN_SYS (2 << 6) +#define SYSTEM_ACCESS_MODE_NOT_IN_SYS (3 << 6) +#define SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU (0 << 8) +#define SYSTEM_APERTURE_UNMAPPED_ACCESS_DEFAULT_PAGE (1 << 8) +#define ENABLE_SEMAPHORE_MODE (1 << 10) +#define ENABLE_WAIT_L2_QUERY (1 << 11) +#define EFFECTIVE_L1_TLB_SIZE(x) (((x) & 7) << 12) +#define EFFECTIVE_L1_TLB_SIZE_MASK 0x00007000 +#define EFFECTIVE_L1_TLB_SIZE_SHIFT 12 +#define EFFECTIVE_L1_QUEUE_SIZE(x) (((x) & 7) << 15) +#define EFFECTIVE_L1_QUEUE_SIZE_MASK 0x00038000 +#define EFFECTIVE_L1_QUEUE_SIZE_SHIFT 15 +#define MC_VM_L1_TLB_MCD_RD_B_CNTL 0x21A0 +#define MC_VM_L1_TLB_MCB_RD_GFX_CNTL 0x21FC +#define MC_VM_L1_TLB_MCB_RD_HDP_CNTL 0x2204 +#define MC_VM_L1_TLB_MCB_RD_PDMA_CNTL 0x2208 +#define MC_VM_L1_TLB_MCB_RD_SEM_CNTL 0x220C +#define MC_VM_L1_TLB_MCB_RD_SYS_CNTL 0x2200 +#define MC_VM_L1_TLB_MCD_WR_A_CNTL 0x21A4 +#define MC_VM_L1_TLB_MCD_WR_B_CNTL 0x21A8 +#define MC_VM_L1_TLB_MCB_WR_GFX_CNTL 0x2210 +#define MC_VM_L1_TLB_MCB_WR_HDP_CNTL 0x2218 +#define MC_VM_L1_TLB_MCB_WR_PDMA_CNTL 0x221C +#define MC_VM_L1_TLB_MCB_WR_SEM_CNTL 0x2220 +#define MC_VM_L1_TLB_MCB_WR_SYS_CNTL 0x2214 +#define MC_VM_SYSTEM_APERTURE_LOW_ADDR 0x2190 +#define LOGICAL_PAGE_NUMBER_MASK 0x000FFFFF +#define LOGICAL_PAGE_NUMBER_SHIFT 0 +#define MC_VM_SYSTEM_APERTURE_HIGH_ADDR 0x2194 +#define MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR 0x2198 + +#define PA_CL_ENHANCE 0x8A14 +#define CLIP_VTX_REORDER_ENA (1 << 0) +#define NUM_CLIP_SEQ(x) ((x) << 1) +#define PA_SC_AA_CONFIG 0x28C04 +#define PA_SC_AA_SAMPLE_LOCS_2S 0x8B40 +#define PA_SC_AA_SAMPLE_LOCS_4S 0x8B44 +#define PA_SC_AA_SAMPLE_LOCS_8S_WD0 0x8B48 +#define PA_SC_AA_SAMPLE_LOCS_8S_WD1 0x8B4C +#define S0_X(x) ((x) << 0) +#define S0_Y(x) ((x) << 4) +#define S1_X(x) ((x) << 8) +#define S1_Y(x) ((x) << 12) +#define S2_X(x) ((x) << 16) +#define S2_Y(x) ((x) << 20) +#define S3_X(x) ((x) << 24) +#define S3_Y(x) ((x) << 28) +#define S4_X(x) ((x) << 0) +#define S4_Y(x) ((x) << 4) +#define S5_X(x) ((x) << 8) +#define S5_Y(x) ((x) << 12) +#define S6_X(x) ((x) << 16) +#define S6_Y(x) ((x) << 20) +#define S7_X(x) ((x) << 24) +#define S7_Y(x) ((x) << 28) +#define PA_SC_CLIPRECT_RULE 0x2820c +#define PA_SC_ENHANCE 0x8BF0 +#define FORCE_EOV_MAX_CLK_CNT(x) ((x) << 0) +#define FORCE_EOV_MAX_TILE_CNT(x) ((x) << 12) +#define PA_SC_LINE_STIPPLE 0x28A0C +#define PA_SC_LINE_STIPPLE_STATE 0x8B10 +#define PA_SC_MODE_CNTL 0x28A4C +#define PA_SC_MULTI_CHIP_CNTL 0x8B20 + +#define PA_SC_SCREEN_SCISSOR_TL 0x28030 +#define PA_SC_GENERIC_SCISSOR_TL 0x28240 +#define PA_SC_WINDOW_SCISSOR_TL 0x28204 + +#define PCIE_PORT_INDEX 0x0038 +#define PCIE_PORT_DATA 0x003C + +#define RAMCFG 0x2408 +#define NOOFBANK_SHIFT 0 +#define NOOFBANK_MASK 0x00000001 +#define NOOFRANK_SHIFT 1 +#define NOOFRANK_MASK 0x00000002 +#define NOOFROWS_SHIFT 2 +#define NOOFROWS_MASK 0x0000001C +#define NOOFCOLS_SHIFT 5 +#define NOOFCOLS_MASK 0x00000060 +#define CHANSIZE_SHIFT 7 +#define CHANSIZE_MASK 0x00000080 +#define BURSTLENGTH_SHIFT 8 +#define BURSTLENGTH_MASK 0x00000100 +#define CHANSIZE_OVERRIDE (1 << 10) + +#define SCRATCH_REG0 0x8500 +#define SCRATCH_REG1 0x8504 +#define SCRATCH_REG2 0x8508 +#define SCRATCH_REG3 0x850C +#define SCRATCH_REG4 0x8510 +#define SCRATCH_REG5 0x8514 +#define SCRATCH_REG6 0x8518 +#define SCRATCH_REG7 0x851C +#define SCRATCH_UMSK 0x8540 +#define SCRATCH_ADDR 0x8544 + +#define SPI_CONFIG_CNTL 0x9100 +#define GPR_WRITE_PRIORITY(x) ((x) << 0) +#define DISABLE_INTERP_1 (1 << 5) +#define SPI_CONFIG_CNTL_1 0x913C +#define VTX_DONE_DELAY(x) ((x) << 0) +#define INTERP_ONE_PRIM_PER_ROW (1 << 4) +#define SPI_INPUT_Z 0x286D8 +#define SPI_PS_IN_CONTROL_0 0x286CC +#define NUM_INTERP(x) ((x)<<0) +#define POSITION_ENA (1<<8) +#define POSITION_CENTROID (1<<9) +#define POSITION_ADDR(x) ((x)<<10) +#define PARAM_GEN(x) ((x)<<15) +#define PARAM_GEN_ADDR(x) ((x)<<19) +#define BARYC_SAMPLE_CNTL(x) ((x)<<26) +#define PERSP_GRADIENT_ENA (1<<28) +#define LINEAR_GRADIENT_ENA (1<<29) +#define POSITION_SAMPLE (1<<30) +#define BARYC_AT_SAMPLE_ENA (1<<31) +#define SPI_PS_IN_CONTROL_1 0x286D0 +#define GEN_INDEX_PIX (1<<0) +#define GEN_INDEX_PIX_ADDR(x) ((x)<<1) +#define FRONT_FACE_ENA (1<<8) +#define FRONT_FACE_CHAN(x) ((x)<<9) +#define FRONT_FACE_ALL_BITS (1<<11) +#define FRONT_FACE_ADDR(x) ((x)<<12) +#define FOG_ADDR(x) ((x)<<17) +#define FIXED_PT_POSITION_ENA (1<<24) +#define FIXED_PT_POSITION_ADDR(x) ((x)<<25) + +#define SQ_MS_FIFO_SIZES 0x8CF0 +#define CACHE_FIFO_SIZE(x) ((x) << 0) +#define FETCH_FIFO_HIWATER(x) ((x) << 8) +#define DONE_FIFO_HIWATER(x) ((x) << 16) +#define ALU_UPDATE_FIFO_HIWATER(x) ((x) << 24) +#define SQ_PGM_START_ES 0x28880 +#define SQ_PGM_START_FS 0x28894 +#define SQ_PGM_START_GS 0x2886C +#define SQ_PGM_START_PS 0x28840 +#define SQ_PGM_RESOURCES_PS 0x28850 +#define SQ_PGM_EXPORTS_PS 0x28854 +#define SQ_PGM_CF_OFFSET_PS 0x288cc +#define SQ_PGM_START_VS 0x28858 +#define SQ_PGM_RESOURCES_VS 0x28868 +#define SQ_PGM_CF_OFFSET_VS 0x288d0 +#define SQ_VTX_CONSTANT_WORD6_0 0x38018 +#define S__SQ_VTX_CONSTANT_TYPE(x) (((x) & 3) << 30) +#define G__SQ_VTX_CONSTANT_TYPE(x) (((x) >> 30) & 3) +#define SQ_TEX_VTX_INVALID_TEXTURE 0x0 +#define SQ_TEX_VTX_INVALID_BUFFER 0x1 +#define SQ_TEX_VTX_VALID_TEXTURE 0x2 +#define SQ_TEX_VTX_VALID_BUFFER 0x3 + + +#define SX_MISC 0x28350 +#define SX_DEBUG_1 0x9054 +#define SMX_EVENT_RELEASE (1 << 0) +#define ENABLE_NEW_SMX_ADDRESS (1 << 16) + +#define TA_CNTL_AUX 0x9508 +#define DISABLE_CUBE_WRAP (1 << 0) +#define DISABLE_CUBE_ANISO (1 << 1) +#define SYNC_GRADIENT (1 << 24) +#define SYNC_WALKER (1 << 25) +#define SYNC_ALIGNER (1 << 26) +#define BILINEAR_PRECISION_6_BIT (0 << 31) +#define BILINEAR_PRECISION_8_BIT (1 << 31) + +#define TC_CNTL 0x9608 +#define TC_L2_SIZE(x) ((x)<<5) +#define L2_DISABLE_LATE_HIT (1<<9) + + +#define VGT_CACHE_INVALIDATION 0x88C4 +#define CACHE_INVALIDATION(x) ((x)<<0) +#define VC_ONLY 0 +#define TC_ONLY 1 +#define VC_AND_TC 2 +#define VGT_DMA_BASE 0x287E8 +#define VGT_DMA_BASE_HI 0x287E4 +#define VGT_ES_PER_GS 0x88CC +#define VGT_GS_PER_ES 0x88C8 +#define VGT_GS_PER_VS 0x88E8 +#define VGT_GS_VERTEX_REUSE 0x88D4 +#define VGT_PRIMITIVE_TYPE 0x8958 +#define VGT_NUM_INSTANCES 0x8974 +#define VGT_OUT_DEALLOC_CNTL 0x28C5C +#define DEALLOC_DIST_MASK 0x0000007F +#define VGT_STRMOUT_BASE_OFFSET_0 0x28B10 +#define VGT_STRMOUT_BASE_OFFSET_1 0x28B14 +#define VGT_STRMOUT_BASE_OFFSET_2 0x28B18 +#define VGT_STRMOUT_BASE_OFFSET_3 0x28B1c +#define VGT_STRMOUT_BASE_OFFSET_HI_0 0x28B44 +#define VGT_STRMOUT_BASE_OFFSET_HI_1 0x28B48 +#define VGT_STRMOUT_BASE_OFFSET_HI_2 0x28B4c +#define VGT_STRMOUT_BASE_OFFSET_HI_3 0x28B50 +#define VGT_STRMOUT_BUFFER_BASE_0 0x28AD8 +#define VGT_STRMOUT_BUFFER_BASE_1 0x28AE8 +#define VGT_STRMOUT_BUFFER_BASE_2 0x28AF8 +#define VGT_STRMOUT_BUFFER_BASE_3 0x28B08 +#define VGT_STRMOUT_BUFFER_OFFSET_0 0x28ADC +#define VGT_STRMOUT_BUFFER_OFFSET_1 0x28AEC +#define VGT_STRMOUT_BUFFER_OFFSET_2 0x28AFC +#define VGT_STRMOUT_BUFFER_OFFSET_3 0x28B0C +#define VGT_STRMOUT_EN 0x28AB0 +#define VGT_VERTEX_REUSE_BLOCK_CNTL 0x28C58 +#define VTX_REUSE_DEPTH_MASK 0x000000FF +#define VGT_EVENT_INITIATOR 0x28a90 +# define CACHE_FLUSH_AND_INV_EVENT (0x16 << 0) + +#define VM_CONTEXT0_CNTL 0x1410 +#define ENABLE_CONTEXT (1 << 0) +#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1) +#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4) +#define VM_CONTEXT0_INVALIDATION_LOW_ADDR 0x1490 +#define VM_CONTEXT0_INVALIDATION_HIGH_ADDR 0x14B0 +#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x1574 +#define VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x1594 +#define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x15B4 +#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR 0x1554 +#define VM_CONTEXT0_REQUEST_RESPONSE 0x1470 +#define REQUEST_TYPE(x) (((x) & 0xf) << 0) +#define RESPONSE_TYPE_MASK 0x000000F0 +#define RESPONSE_TYPE_SHIFT 4 +#define VM_L2_CNTL 0x1400 +#define ENABLE_L2_CACHE (1 << 0) +#define ENABLE_L2_FRAGMENT_PROCESSING (1 << 1) +#define ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE (1 << 9) +#define EFFECTIVE_L2_QUEUE_SIZE(x) (((x) & 7) << 13) +#define VM_L2_CNTL2 0x1404 +#define INVALIDATE_ALL_L1_TLBS (1 << 0) +#define INVALIDATE_L2_CACHE (1 << 1) +#define VM_L2_CNTL3 0x1408 +#define BANK_SELECT_0(x) (((x) & 0x1f) << 0) +#define BANK_SELECT_1(x) (((x) & 0x1f) << 5) +#define L2_CACHE_UPDATE_MODE(x) (((x) & 3) << 10) +#define VM_L2_STATUS 0x140C +#define L2_BUSY (1 << 0) + +#define WAIT_UNTIL 0x8040 +#define WAIT_2D_IDLE_bit (1 << 14) +#define WAIT_3D_IDLE_bit (1 << 15) +#define WAIT_2D_IDLECLEAN_bit (1 << 16) +#define WAIT_3D_IDLECLEAN_bit (1 << 17) + + + +/* + * PM4 + */ +#define PACKET_TYPE0 0 +#define PACKET_TYPE1 1 +#define PACKET_TYPE2 2 +#define PACKET_TYPE3 3 + +#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) +#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) +#define CP_PACKET0_GET_REG(h) (((h) & 0xFFFF) << 2) +#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) +#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ + (((reg) >> 2) & 0xFFFF) | \ + ((n) & 0x3FFF) << 16) +#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ + (((op) & 0xFF) << 8) | \ + ((n) & 0x3FFF) << 16) + +/* Packet 3 types */ +#define PACKET3_NOP 0x10 +#define PACKET3_INDIRECT_BUFFER_END 0x17 +#define PACKET3_SET_PREDICATION 0x20 +#define PACKET3_REG_RMW 0x21 +#define PACKET3_COND_EXEC 0x22 +#define PACKET3_PRED_EXEC 0x23 +#define PACKET3_START_3D_CMDBUF 0x24 +#define PACKET3_DRAW_INDEX_2 0x27 +#define PACKET3_CONTEXT_CONTROL 0x28 +#define PACKET3_DRAW_INDEX_IMMD_BE 0x29 +#define PACKET3_INDEX_TYPE 0x2A +#define PACKET3_DRAW_INDEX 0x2B +#define PACKET3_DRAW_INDEX_AUTO 0x2D +#define PACKET3_DRAW_INDEX_IMMD 0x2E +#define PACKET3_NUM_INSTANCES 0x2F +#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34 +#define PACKET3_INDIRECT_BUFFER_MP 0x38 +#define PACKET3_MEM_SEMAPHORE 0x39 +#define PACKET3_MPEG_INDEX 0x3A +#define PACKET3_WAIT_REG_MEM 0x3C +#define PACKET3_MEM_WRITE 0x3D +#define PACKET3_INDIRECT_BUFFER 0x32 +#define PACKET3_CP_INTERRUPT 0x40 +#define PACKET3_SURFACE_SYNC 0x43 +# define PACKET3_CB0_DEST_BASE_ENA (1 << 6) +# define PACKET3_TC_ACTION_ENA (1 << 23) +# define PACKET3_VC_ACTION_ENA (1 << 24) +# define PACKET3_CB_ACTION_ENA (1 << 25) +# define PACKET3_DB_ACTION_ENA (1 << 26) +# define PACKET3_SH_ACTION_ENA (1 << 27) +# define PACKET3_SMX_ACTION_ENA (1 << 28) +#define PACKET3_ME_INITIALIZE 0x44 +#define PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16) +#define PACKET3_COND_WRITE 0x45 +#define PACKET3_EVENT_WRITE 0x46 +#define PACKET3_EVENT_WRITE_EOP 0x47 +#define PACKET3_ONE_REG_WRITE 0x57 +#define PACKET3_SET_CONFIG_REG 0x68 +#define PACKET3_SET_CONFIG_REG_OFFSET 0x00008000 +#define PACKET3_SET_CONFIG_REG_END 0x0000ac00 +#define PACKET3_SET_CONTEXT_REG 0x69 +#define PACKET3_SET_CONTEXT_REG_OFFSET 0x00028000 +#define PACKET3_SET_CONTEXT_REG_END 0x00029000 +#define PACKET3_SET_ALU_CONST 0x6A +#define PACKET3_SET_ALU_CONST_OFFSET 0x00030000 +#define PACKET3_SET_ALU_CONST_END 0x00032000 +#define PACKET3_SET_BOOL_CONST 0x6B +#define PACKET3_SET_BOOL_CONST_OFFSET 0x0003e380 +#define PACKET3_SET_BOOL_CONST_END 0x00040000 +#define PACKET3_SET_LOOP_CONST 0x6C +#define PACKET3_SET_LOOP_CONST_OFFSET 0x0003e200 +#define PACKET3_SET_LOOP_CONST_END 0x0003e380 +#define PACKET3_SET_RESOURCE 0x6D +#define PACKET3_SET_RESOURCE_OFFSET 0x00038000 +#define PACKET3_SET_RESOURCE_END 0x0003c000 +#define PACKET3_SET_SAMPLER 0x6E +#define PACKET3_SET_SAMPLER_OFFSET 0x0003c000 +#define PACKET3_SET_SAMPLER_END 0x0003cff0 +#define PACKET3_SET_CTL_CONST 0x6F +#define PACKET3_SET_CTL_CONST_OFFSET 0x0003cff0 +#define PACKET3_SET_CTL_CONST_END 0x0003e200 +#define PACKET3_SURFACE_BASE_UPDATE 0x73 + + +#define R_008020_GRBM_SOFT_RESET 0x8020 +#define S_008020_SOFT_RESET_CP(x) (((x) & 1) << 0) +#define S_008020_SOFT_RESET_CB(x) (((x) & 1) << 1) +#define S_008020_SOFT_RESET_CR(x) (((x) & 1) << 2) +#define S_008020_SOFT_RESET_DB(x) (((x) & 1) << 3) +#define S_008020_SOFT_RESET_PA(x) (((x) & 1) << 5) +#define S_008020_SOFT_RESET_SC(x) (((x) & 1) << 6) +#define S_008020_SOFT_RESET_SMX(x) (((x) & 1) << 7) +#define S_008020_SOFT_RESET_SPI(x) (((x) & 1) << 8) +#define S_008020_SOFT_RESET_SH(x) (((x) & 1) << 9) +#define S_008020_SOFT_RESET_SX(x) (((x) & 1) << 10) +#define S_008020_SOFT_RESET_TC(x) (((x) & 1) << 11) +#define S_008020_SOFT_RESET_TA(x) (((x) & 1) << 12) +#define S_008020_SOFT_RESET_VC(x) (((x) & 1) << 13) +#define S_008020_SOFT_RESET_VGT(x) (((x) & 1) << 14) +#define R_008010_GRBM_STATUS 0x8010 +#define S_008010_CMDFIFO_AVAIL(x) (((x) & 0x1F) << 0) +#define S_008010_CP_RQ_PENDING(x) (((x) & 1) << 6) +#define S_008010_CF_RQ_PENDING(x) (((x) & 1) << 7) +#define S_008010_PF_RQ_PENDING(x) (((x) & 1) << 8) +#define S_008010_GRBM_EE_BUSY(x) (((x) & 1) << 10) +#define S_008010_VC_BUSY(x) (((x) & 1) << 11) +#define S_008010_DB03_CLEAN(x) (((x) & 1) << 12) +#define S_008010_CB03_CLEAN(x) (((x) & 1) << 13) +#define S_008010_VGT_BUSY_NO_DMA(x) (((x) & 1) << 16) +#define S_008010_VGT_BUSY(x) (((x) & 1) << 17) +#define S_008010_TA03_BUSY(x) (((x) & 1) << 18) +#define S_008010_TC_BUSY(x) (((x) & 1) << 19) +#define S_008010_SX_BUSY(x) (((x) & 1) << 20) +#define S_008010_SH_BUSY(x) (((x) & 1) << 21) +#define S_008010_SPI03_BUSY(x) (((x) & 1) << 22) +#define S_008010_SMX_BUSY(x) (((x) & 1) << 23) +#define S_008010_SC_BUSY(x) (((x) & 1) << 24) +#define S_008010_PA_BUSY(x) (((x) & 1) << 25) +#define S_008010_DB03_BUSY(x) (((x) & 1) << 26) +#define S_008010_CR_BUSY(x) (((x) & 1) << 27) +#define S_008010_CP_COHERENCY_BUSY(x) (((x) & 1) << 28) +#define S_008010_CP_BUSY(x) (((x) & 1) << 29) +#define S_008010_CB03_BUSY(x) (((x) & 1) << 30) +#define S_008010_GUI_ACTIVE(x) (((x) & 1) << 31) +#define G_008010_CMDFIFO_AVAIL(x) (((x) >> 0) & 0x1F) +#define G_008010_CP_RQ_PENDING(x) (((x) >> 6) & 1) +#define G_008010_CF_RQ_PENDING(x) (((x) >> 7) & 1) +#define G_008010_PF_RQ_PENDING(x) (((x) >> 8) & 1) +#define G_008010_GRBM_EE_BUSY(x) (((x) >> 10) & 1) +#define G_008010_VC_BUSY(x) (((x) >> 11) & 1) +#define G_008010_DB03_CLEAN(x) (((x) >> 12) & 1) +#define G_008010_CB03_CLEAN(x) (((x) >> 13) & 1) +#define G_008010_VGT_BUSY_NO_DMA(x) (((x) >> 16) & 1) +#define G_008010_VGT_BUSY(x) (((x) >> 17) & 1) +#define G_008010_TA03_BUSY(x) (((x) >> 18) & 1) +#define G_008010_TC_BUSY(x) (((x) >> 19) & 1) +#define G_008010_SX_BUSY(x) (((x) >> 20) & 1) +#define G_008010_SH_BUSY(x) (((x) >> 21) & 1) +#define G_008010_SPI03_BUSY(x) (((x) >> 22) & 1) +#define G_008010_SMX_BUSY(x) (((x) >> 23) & 1) +#define G_008010_SC_BUSY(x) (((x) >> 24) & 1) +#define G_008010_PA_BUSY(x) (((x) >> 25) & 1) +#define G_008010_DB03_BUSY(x) (((x) >> 26) & 1) +#define G_008010_CR_BUSY(x) (((x) >> 27) & 1) +#define G_008010_CP_COHERENCY_BUSY(x) (((x) >> 28) & 1) +#define G_008010_CP_BUSY(x) (((x) >> 29) & 1) +#define G_008010_CB03_BUSY(x) (((x) >> 30) & 1) +#define G_008010_GUI_ACTIVE(x) (((x) >> 31) & 1) +#define R_008014_GRBM_STATUS2 0x8014 +#define S_008014_CR_CLEAN(x) (((x) & 1) << 0) +#define S_008014_SMX_CLEAN(x) (((x) & 1) << 1) +#define S_008014_SPI0_BUSY(x) (((x) & 1) << 8) +#define S_008014_SPI1_BUSY(x) (((x) & 1) << 9) +#define S_008014_SPI2_BUSY(x) (((x) & 1) << 10) +#define S_008014_SPI3_BUSY(x) (((x) & 1) << 11) +#define S_008014_TA0_BUSY(x) (((x) & 1) << 12) +#define S_008014_TA1_BUSY(x) (((x) & 1) << 13) +#define S_008014_TA2_BUSY(x) (((x) & 1) << 14) +#define S_008014_TA3_BUSY(x) (((x) & 1) << 15) +#define S_008014_DB0_BUSY(x) (((x) & 1) << 16) +#define S_008014_DB1_BUSY(x) (((x) & 1) << 17) +#define S_008014_DB2_BUSY(x) (((x) & 1) << 18) +#define S_008014_DB3_BUSY(x) (((x) & 1) << 19) +#define S_008014_CB0_BUSY(x) (((x) & 1) << 20) +#define S_008014_CB1_BUSY(x) (((x) & 1) << 21) +#define S_008014_CB2_BUSY(x) (((x) & 1) << 22) +#define S_008014_CB3_BUSY(x) (((x) & 1) << 23) +#define G_008014_CR_CLEAN(x) (((x) >> 0) & 1) +#define G_008014_SMX_CLEAN(x) (((x) >> 1) & 1) +#define G_008014_SPI0_BUSY(x) (((x) >> 8) & 1) +#define G_008014_SPI1_BUSY(x) (((x) >> 9) & 1) +#define G_008014_SPI2_BUSY(x) (((x) >> 10) & 1) +#define G_008014_SPI3_BUSY(x) (((x) >> 11) & 1) +#define G_008014_TA0_BUSY(x) (((x) >> 12) & 1) +#define G_008014_TA1_BUSY(x) (((x) >> 13) & 1) +#define G_008014_TA2_BUSY(x) (((x) >> 14) & 1) +#define G_008014_TA3_BUSY(x) (((x) >> 15) & 1) +#define G_008014_DB0_BUSY(x) (((x) >> 16) & 1) +#define G_008014_DB1_BUSY(x) (((x) >> 17) & 1) +#define G_008014_DB2_BUSY(x) (((x) >> 18) & 1) +#define G_008014_DB3_BUSY(x) (((x) >> 19) & 1) +#define G_008014_CB0_BUSY(x) (((x) >> 20) & 1) +#define G_008014_CB1_BUSY(x) (((x) >> 21) & 1) +#define G_008014_CB2_BUSY(x) (((x) >> 22) & 1) +#define G_008014_CB3_BUSY(x) (((x) >> 23) & 1) +#define R_000E50_SRBM_STATUS 0x0E50 +#define G_000E50_RLC_RQ_PENDING(x) (((x) >> 3) & 1) +#define G_000E50_RCU_RQ_PENDING(x) (((x) >> 4) & 1) +#define G_000E50_GRBM_RQ_PENDING(x) (((x) >> 5) & 1) +#define G_000E50_HI_RQ_PENDING(x) (((x) >> 6) & 1) +#define G_000E50_IO_EXTERN_SIGNAL(x) (((x) >> 7) & 1) +#define G_000E50_VMC_BUSY(x) (((x) >> 8) & 1) +#define G_000E50_MCB_BUSY(x) (((x) >> 9) & 1) +#define G_000E50_MCDZ_BUSY(x) (((x) >> 10) & 1) +#define G_000E50_MCDY_BUSY(x) (((x) >> 11) & 1) +#define G_000E50_MCDX_BUSY(x) (((x) >> 12) & 1) +#define G_000E50_MCDW_BUSY(x) (((x) >> 13) & 1) +#define G_000E50_SEM_BUSY(x) (((x) >> 14) & 1) +#define G_000E50_RLC_BUSY(x) (((x) >> 15) & 1) +#define R_000E60_SRBM_SOFT_RESET 0x0E60 +#define S_000E60_SOFT_RESET_BIF(x) (((x) & 1) << 1) +#define S_000E60_SOFT_RESET_CG(x) (((x) & 1) << 2) +#define S_000E60_SOFT_RESET_CMC(x) (((x) & 1) << 3) +#define S_000E60_SOFT_RESET_CSC(x) (((x) & 1) << 4) +#define S_000E60_SOFT_RESET_DC(x) (((x) & 1) << 5) +#define S_000E60_SOFT_RESET_GRBM(x) (((x) & 1) << 8) +#define S_000E60_SOFT_RESET_HDP(x) (((x) & 1) << 9) +#define S_000E60_SOFT_RESET_IH(x) (((x) & 1) << 10) +#define S_000E60_SOFT_RESET_MC(x) (((x) & 1) << 11) +#define S_000E60_SOFT_RESET_RLC(x) (((x) & 1) << 13) +#define S_000E60_SOFT_RESET_ROM(x) (((x) & 1) << 14) +#define S_000E60_SOFT_RESET_SEM(x) (((x) & 1) << 15) +#define S_000E60_SOFT_RESET_TSC(x) (((x) & 1) << 16) +#define S_000E60_SOFT_RESET_VMC(x) (((x) & 1) << 17) + +#endif diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index e47f2fc294ce..3299733ac300 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -50,8 +50,8 @@ #include <linux/kref.h> #include "radeon_mode.h" +#include "radeon_share.h" #include "radeon_reg.h" -#include "r300.h" /* * Modules parameters. @@ -112,10 +112,11 @@ enum radeon_family { CHIP_RV635, CHIP_RV670, CHIP_RS780, + CHIP_RS880, CHIP_RV770, CHIP_RV730, CHIP_RV710, - CHIP_RS880, + CHIP_RV740, CHIP_LAST, }; @@ -152,10 +153,21 @@ struct radeon_device; */ bool radeon_get_bios(struct radeon_device *rdev); + /* - * Clocks + * Dummy page */ +struct radeon_dummy_page { + struct page *page; + dma_addr_t addr; +}; +int radeon_dummy_page_init(struct radeon_device *rdev); +void radeon_dummy_page_fini(struct radeon_device *rdev); + +/* + * Clocks + */ struct radeon_clock { struct radeon_pll p1pll; struct radeon_pll p2pll; @@ -166,6 +178,7 @@ struct radeon_clock { uint32_t default_sclk; }; + /* * Fences. */ @@ -332,14 +345,18 @@ struct radeon_mc { resource_size_t aper_size; resource_size_t aper_base; resource_size_t agp_base; - unsigned gtt_location; - unsigned gtt_size; - unsigned vram_location; /* for some chips with <= 32MB we need to lie * about vram size near mc fb location */ - unsigned mc_vram_size; + u64 mc_vram_size; + u64 gtt_location; + u64 gtt_size; + u64 gtt_start; + u64 gtt_end; + u64 vram_location; + u64 vram_start; + u64 vram_end; unsigned vram_width; - unsigned real_vram_size; + u64 real_vram_size; int vram_mtrr; bool vram_is_ddr; }; @@ -411,6 +428,16 @@ struct radeon_cp { bool ready; }; +struct r600_blit { + struct radeon_object *shader_obj; + u64 shader_gpu_addr; + u32 vs_offset, ps_offset; + u32 state_offset; + u32 state_len; + u32 vb_used, vb_total; + struct radeon_ib *vb_ib; +}; + int radeon_ib_get(struct radeon_device *rdev, struct radeon_ib **ib); void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib); int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib); @@ -463,6 +490,7 @@ struct radeon_cs_parser { int chunk_relocs_idx; struct radeon_ib *ib; void *track; + unsigned family; }; struct radeon_cs_packet { @@ -559,6 +587,9 @@ int r100_debugfs_cp_init(struct radeon_device *rdev); */ struct radeon_asic { int (*init)(struct radeon_device *rdev); + void (*fini)(struct radeon_device *rdev); + int (*resume)(struct radeon_device *rdev); + int (*suspend)(struct radeon_device *rdev); void (*errata)(struct radeon_device *rdev); void (*vram_info)(struct radeon_device *rdev); int (*gpu_reset)(struct radeon_device *rdev); @@ -573,7 +604,11 @@ struct radeon_asic { int (*cp_init)(struct radeon_device *rdev, unsigned ring_size); void (*cp_fini)(struct radeon_device *rdev); void (*cp_disable)(struct radeon_device *rdev); + void (*cp_commit)(struct radeon_device *rdev); void (*ring_start)(struct radeon_device *rdev); + int (*ring_test)(struct radeon_device *rdev); + void (*ring_ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib); + int (*ib_test)(struct radeon_device *rdev); int (*irq_set)(struct radeon_device *rdev); int (*irq_process)(struct radeon_device *rdev); u32 (*get_vblank_counter)(struct radeon_device *rdev, int crtc); @@ -613,6 +648,8 @@ struct r100_asic { union radeon_asic_config { struct r300_asic r300; struct r100_asic r100; + struct r600_asic r600; + struct rv770_asic rv770; }; @@ -698,12 +735,16 @@ struct radeon_device { struct radeon_pm pm; struct mutex cs_mutex; struct radeon_wb wb; + struct radeon_dummy_page dummy_page; bool gpu_lockup; bool shutdown; bool suspend; bool need_dma32; + bool new_init_path; struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES]; - const struct firmware *fw; /* firmware */ + const struct firmware *me_fw; /* all family ME firmware */ + const struct firmware *pfp_fw; /* r6/700 PFP firmware */ + struct r600_blit r600_blit; }; int radeon_device_init(struct radeon_device *rdev, @@ -713,6 +754,13 @@ int radeon_device_init(struct radeon_device *rdev, void radeon_device_fini(struct radeon_device *rdev); int radeon_gpu_wait_for_idle(struct radeon_device *rdev); +/* r600 blit */ +int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes); +void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence); +void r600_kms_blit_copy(struct radeon_device *rdev, + u64 src_gpu_addr, u64 dst_gpu_addr, + int size_bytes); + static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg) { if (reg < 0x10000) @@ -740,6 +788,7 @@ static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32 #define RREG8(reg) readb(((void __iomem *)rdev->rmmio) + (reg)) #define WREG8(reg, v) writeb(v, ((void __iomem *)rdev->rmmio) + (reg)) #define RREG32(reg) r100_mm_rreg(rdev, (reg)) +#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", r100_mm_rreg(rdev, (reg))) #define WREG32(reg, v) r100_mm_wreg(rdev, (reg), (v)) #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) @@ -763,6 +812,7 @@ static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32 tmp_ |= ((val) & ~(mask)); \ WREG32_PLL(reg, tmp_); \ } while (0) +#define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg))) /* * Indirect registers accessor @@ -827,51 +877,6 @@ void radeon_atombios_fini(struct radeon_device *rdev); /* * RING helpers. */ -#define CP_PACKET0 0x00000000 -#define PACKET0_BASE_INDEX_SHIFT 0 -#define PACKET0_BASE_INDEX_MASK (0x1ffff << 0) -#define PACKET0_COUNT_SHIFT 16 -#define PACKET0_COUNT_MASK (0x3fff << 16) -#define CP_PACKET1 0x40000000 -#define CP_PACKET2 0x80000000 -#define PACKET2_PAD_SHIFT 0 -#define PACKET2_PAD_MASK (0x3fffffff << 0) -#define CP_PACKET3 0xC0000000 -#define PACKET3_IT_OPCODE_SHIFT 8 -#define PACKET3_IT_OPCODE_MASK (0xff << 8) -#define PACKET3_COUNT_SHIFT 16 -#define PACKET3_COUNT_MASK (0x3fff << 16) -/* PACKET3 op code */ -#define PACKET3_NOP 0x10 -#define PACKET3_3D_DRAW_VBUF 0x28 -#define PACKET3_3D_DRAW_IMMD 0x29 -#define PACKET3_3D_DRAW_INDX 0x2A -#define PACKET3_3D_LOAD_VBPNTR 0x2F -#define PACKET3_INDX_BUFFER 0x33 -#define PACKET3_3D_DRAW_VBUF_2 0x34 -#define PACKET3_3D_DRAW_IMMD_2 0x35 -#define PACKET3_3D_DRAW_INDX_2 0x36 -#define PACKET3_BITBLT_MULTI 0x9B - -#define PACKET0(reg, n) (CP_PACKET0 | \ - REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) | \ - REG_SET(PACKET0_COUNT, (n))) -#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) -#define PACKET3(op, n) (CP_PACKET3 | \ - REG_SET(PACKET3_IT_OPCODE, (op)) | \ - REG_SET(PACKET3_COUNT, (n))) - -#define PACKET_TYPE0 0 -#define PACKET_TYPE1 1 -#define PACKET_TYPE2 2 -#define PACKET_TYPE3 3 - -#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) -#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) -#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2) -#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1) -#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) - static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v) { #if DRM_DEBUG_CODE @@ -890,6 +895,9 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v) * ASICs macro. */ #define radeon_init(rdev) (rdev)->asic->init((rdev)) +#define radeon_fini(rdev) (rdev)->asic->fini((rdev)) +#define radeon_resume(rdev) (rdev)->asic->resume((rdev)) +#define radeon_suspend(rdev) (rdev)->asic->suspend((rdev)) #define radeon_cs_parse(p) rdev->asic->cs_parse((p)) #define radeon_errata(rdev) (rdev)->asic->errata((rdev)) #define radeon_vram_info(rdev) (rdev)->asic->vram_info((rdev)) @@ -905,7 +913,11 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v) #define radeon_cp_init(rdev,rsize) (rdev)->asic->cp_init((rdev), (rsize)) #define radeon_cp_fini(rdev) (rdev)->asic->cp_fini((rdev)) #define radeon_cp_disable(rdev) (rdev)->asic->cp_disable((rdev)) +#define radeon_cp_commit(rdev) (rdev)->asic->cp_commit((rdev)) #define radeon_ring_start(rdev) (rdev)->asic->ring_start((rdev)) +#define radeon_ring_test(rdev) (rdev)->asic->ring_test((rdev)) +#define radeon_ring_ib_execute(rdev, ib) (rdev)->asic->ring_ib_execute((rdev), (ib)) +#define radeon_ib_test(rdev) (rdev)->asic->ib_test((rdev)) #define radeon_irq_set(rdev) (rdev)->asic->irq_set((rdev)) #define radeon_irq_process(rdev) (rdev)->asic->irq_process((rdev)) #define radeon_get_vblank_counter(rdev, crtc) (rdev)->asic->get_vblank_counter((rdev), (crtc)) diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index c9cbd8ae1f95..e87bb915a6de 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -60,6 +60,7 @@ int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr); int r100_cp_init(struct radeon_device *rdev, unsigned ring_size); void r100_cp_fini(struct radeon_device *rdev); void r100_cp_disable(struct radeon_device *rdev); +void r100_cp_commit(struct radeon_device *rdev); void r100_ring_start(struct radeon_device *rdev); int r100_irq_set(struct radeon_device *rdev); int r100_irq_process(struct radeon_device *rdev); @@ -78,6 +79,9 @@ int r100_set_surface_reg(struct radeon_device *rdev, int reg, uint32_t offset, uint32_t obj_size); int r100_clear_surface_reg(struct radeon_device *rdev, int reg); void r100_bandwidth_update(struct radeon_device *rdev); +void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); +int r100_ib_test(struct radeon_device *rdev); +int r100_ring_test(struct radeon_device *rdev); static struct radeon_asic r100_asic = { .init = &r100_init, @@ -95,7 +99,11 @@ static struct radeon_asic r100_asic = { .cp_init = &r100_cp_init, .cp_fini = &r100_cp_fini, .cp_disable = &r100_cp_disable, + .cp_commit = &r100_cp_commit, .ring_start = &r100_ring_start, + .ring_test = &r100_ring_test, + .ring_ib_execute = &r100_ring_ib_execute, + .ib_test = &r100_ib_test, .irq_set = &r100_irq_set, .irq_process = &r100_irq_process, .get_vblank_counter = &r100_get_vblank_counter, @@ -156,7 +164,11 @@ static struct radeon_asic r300_asic = { .cp_init = &r100_cp_init, .cp_fini = &r100_cp_fini, .cp_disable = &r100_cp_disable, + .cp_commit = &r100_cp_commit, .ring_start = &r300_ring_start, + .ring_test = &r100_ring_test, + .ring_ib_execute = &r100_ring_ib_execute, + .ib_test = &r100_ib_test, .irq_set = &r100_irq_set, .irq_process = &r100_irq_process, .get_vblank_counter = &r100_get_vblank_counter, @@ -197,7 +209,11 @@ static struct radeon_asic r420_asic = { .cp_init = &r100_cp_init, .cp_fini = &r100_cp_fini, .cp_disable = &r100_cp_disable, + .cp_commit = &r100_cp_commit, .ring_start = &r300_ring_start, + .ring_test = &r100_ring_test, + .ring_ib_execute = &r100_ring_ib_execute, + .ib_test = &r100_ib_test, .irq_set = &r100_irq_set, .irq_process = &r100_irq_process, .get_vblank_counter = &r100_get_vblank_counter, @@ -245,7 +261,11 @@ static struct radeon_asic rs400_asic = { .cp_init = &r100_cp_init, .cp_fini = &r100_cp_fini, .cp_disable = &r100_cp_disable, + .cp_commit = &r100_cp_commit, .ring_start = &r300_ring_start, + .ring_test = &r100_ring_test, + .ring_ib_execute = &r100_ring_ib_execute, + .ib_test = &r100_ib_test, .irq_set = &r100_irq_set, .irq_process = &r100_irq_process, .get_vblank_counter = &r100_get_vblank_counter, @@ -298,7 +318,11 @@ static struct radeon_asic rs600_asic = { .cp_init = &r100_cp_init, .cp_fini = &r100_cp_fini, .cp_disable = &r100_cp_disable, + .cp_commit = &r100_cp_commit, .ring_start = &r300_ring_start, + .ring_test = &r100_ring_test, + .ring_ib_execute = &r100_ring_ib_execute, + .ib_test = &r100_ib_test, .irq_set = &rs600_irq_set, .irq_process = &rs600_irq_process, .get_vblank_counter = &rs600_get_vblank_counter, @@ -341,7 +365,11 @@ static struct radeon_asic rs690_asic = { .cp_init = &r100_cp_init, .cp_fini = &r100_cp_fini, .cp_disable = &r100_cp_disable, + .cp_commit = &r100_cp_commit, .ring_start = &r300_ring_start, + .ring_test = &r100_ring_test, + .ring_ib_execute = &r100_ring_ib_execute, + .ib_test = &r100_ib_test, .irq_set = &rs600_irq_set, .irq_process = &rs600_irq_process, .get_vblank_counter = &rs600_get_vblank_counter, @@ -391,7 +419,11 @@ static struct radeon_asic rv515_asic = { .cp_init = &r100_cp_init, .cp_fini = &r100_cp_fini, .cp_disable = &r100_cp_disable, + .cp_commit = &r100_cp_commit, .ring_start = &rv515_ring_start, + .ring_test = &r100_ring_test, + .ring_ib_execute = &r100_ring_ib_execute, + .ib_test = &r100_ib_test, .irq_set = &rs600_irq_set, .irq_process = &rs600_irq_process, .get_vblank_counter = &rs600_get_vblank_counter, @@ -434,7 +466,11 @@ static struct radeon_asic r520_asic = { .cp_init = &r100_cp_init, .cp_fini = &r100_cp_fini, .cp_disable = &r100_cp_disable, + .cp_commit = &r100_cp_commit, .ring_start = &rv515_ring_start, + .ring_test = &r100_ring_test, + .ring_ib_execute = &r100_ring_ib_execute, + .ib_test = &r100_ib_test, .irq_set = &rs600_irq_set, .irq_process = &rs600_irq_process, .get_vblank_counter = &rs600_get_vblank_counter, @@ -453,9 +489,127 @@ static struct radeon_asic r520_asic = { }; /* - * r600,rv610,rv630,rv620,rv635,rv670,rs780,rv770,rv730,rv710 + * r600,rv610,rv630,rv620,rv635,rv670,rs780,rs880 */ +int r600_init(struct radeon_device *rdev); +void r600_fini(struct radeon_device *rdev); +int r600_suspend(struct radeon_device *rdev); +int r600_resume(struct radeon_device *rdev); +int r600_wb_init(struct radeon_device *rdev); +void r600_wb_fini(struct radeon_device *rdev); +void r600_cp_commit(struct radeon_device *rdev); +void r600_pcie_gart_tlb_flush(struct radeon_device *rdev); uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg); void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); +int r600_cs_parse(struct radeon_cs_parser *p); +void r600_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence); +int r600_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, + uint64_t dst_offset, + unsigned num_pages, + struct radeon_fence *fence); +int r600_irq_process(struct radeon_device *rdev); +int r600_irq_set(struct radeon_device *rdev); +int r600_gpu_reset(struct radeon_device *rdev); +int r600_set_surface_reg(struct radeon_device *rdev, int reg, + uint32_t tiling_flags, uint32_t pitch, + uint32_t offset, uint32_t obj_size); +int r600_clear_surface_reg(struct radeon_device *rdev, int reg); +void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); +int r600_ib_test(struct radeon_device *rdev); +int r600_ring_test(struct radeon_device *rdev); +int r600_copy_blit(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_pages, struct radeon_fence *fence); + +static struct radeon_asic r600_asic = { + .errata = NULL, + .init = &r600_init, + .fini = &r600_fini, + .suspend = &r600_suspend, + .resume = &r600_resume, + .cp_commit = &r600_cp_commit, + .vram_info = NULL, + .gpu_reset = &r600_gpu_reset, + .mc_init = NULL, + .mc_fini = NULL, + .wb_init = &r600_wb_init, + .wb_fini = &r600_wb_fini, + .gart_enable = NULL, + .gart_disable = NULL, + .gart_tlb_flush = &r600_pcie_gart_tlb_flush, + .gart_set_page = &rs600_gart_set_page, + .cp_init = NULL, + .cp_fini = NULL, + .cp_disable = NULL, + .ring_start = NULL, + .ring_test = &r600_ring_test, + .ring_ib_execute = &r600_ring_ib_execute, + .ib_test = &r600_ib_test, + .irq_set = &r600_irq_set, + .irq_process = &r600_irq_process, + .fence_ring_emit = &r600_fence_ring_emit, + .cs_parse = &r600_cs_parse, + .copy_blit = &r600_copy_blit, + .copy_dma = &r600_copy_blit, + .copy = NULL, + .set_engine_clock = &radeon_atom_set_engine_clock, + .set_memory_clock = &radeon_atom_set_memory_clock, + .set_pcie_lanes = NULL, + .set_clock_gating = &radeon_atom_set_clock_gating, + .set_surface_reg = r600_set_surface_reg, + .clear_surface_reg = r600_clear_surface_reg, + .bandwidth_update = &r520_bandwidth_update, +}; + +/* + * rv770,rv730,rv710,rv740 + */ +int rv770_init(struct radeon_device *rdev); +void rv770_fini(struct radeon_device *rdev); +int rv770_suspend(struct radeon_device *rdev); +int rv770_resume(struct radeon_device *rdev); +int rv770_gpu_reset(struct radeon_device *rdev); + +static struct radeon_asic rv770_asic = { + .errata = NULL, + .init = &rv770_init, + .fini = &rv770_fini, + .suspend = &rv770_suspend, + .resume = &rv770_resume, + .cp_commit = &r600_cp_commit, + .vram_info = NULL, + .gpu_reset = &rv770_gpu_reset, + .mc_init = NULL, + .mc_fini = NULL, + .wb_init = &r600_wb_init, + .wb_fini = &r600_wb_fini, + .gart_enable = NULL, + .gart_disable = NULL, + .gart_tlb_flush = &r600_pcie_gart_tlb_flush, + .gart_set_page = &rs600_gart_set_page, + .cp_init = NULL, + .cp_fini = NULL, + .cp_disable = NULL, + .ring_start = NULL, + .ring_test = &r600_ring_test, + .ring_ib_execute = &r600_ring_ib_execute, + .ib_test = &r600_ib_test, + .irq_set = &r600_irq_set, + .irq_process = &r600_irq_process, + .fence_ring_emit = &r600_fence_ring_emit, + .cs_parse = &r600_cs_parse, + .copy_blit = &r600_copy_blit, + .copy_dma = &r600_copy_blit, + .copy = NULL, + .set_engine_clock = &radeon_atom_set_engine_clock, + .set_memory_clock = &radeon_atom_set_memory_clock, + .set_pcie_lanes = NULL, + .set_clock_gating = &radeon_atom_set_clock_gating, + .set_surface_reg = r600_set_surface_reg, + .clear_surface_reg = r600_clear_surface_reg, + .bandwidth_update = &r520_bandwidth_update, +}; #endif diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index bba9b4bd8f5c..a8fb392c9cd6 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -370,10 +370,6 @@ bool radeon_get_atom_connector_info_from_object_table(struct drm_device *dev) && record-> ucRecordType <= ATOM_MAX_OBJECT_RECORD_NUMBER) { - DRM_ERROR - ("record type %d\n", - record-> - ucRecordType); switch (record-> ucRecordType) { case ATOM_I2C_RECORD_TYPE: diff --git a/drivers/gpu/drm/radeon/radeon_clocks.c b/drivers/gpu/drm/radeon/radeon_clocks.c index a37cbce53181..152eef13197a 100644 --- a/drivers/gpu/drm/radeon/radeon_clocks.c +++ b/drivers/gpu/drm/radeon/radeon_clocks.c @@ -102,10 +102,12 @@ void radeon_get_clock_info(struct drm_device *dev) p1pll->reference_div = 12; if (p2pll->reference_div < 2) p2pll->reference_div = 12; - if (spll->reference_div < 2) - spll->reference_div = - RREG32_PLL(RADEON_M_SPLL_REF_FB_DIV) & - RADEON_M_SPLL_REF_DIV_MASK; + if (rdev->family < CHIP_RS600) { + if (spll->reference_div < 2) + spll->reference_div = + RREG32_PLL(RADEON_M_SPLL_REF_FB_DIV) & + RADEON_M_SPLL_REF_DIV_MASK; + } if (mpll->reference_div < 2) mpll->reference_div = spll->reference_div; } else { diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 7693f7c67bd3..f2469c511789 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -37,7 +37,7 @@ /* * Clear GPU surface registers. */ -static void radeon_surface_init(struct radeon_device *rdev) +void radeon_surface_init(struct radeon_device *rdev) { /* FIXME: check this out */ if (rdev->family < CHIP_R600) { @@ -56,7 +56,7 @@ static void radeon_surface_init(struct radeon_device *rdev) /* * GPU scratch registers helpers function. */ -static void radeon_scratch_init(struct radeon_device *rdev) +void radeon_scratch_init(struct radeon_device *rdev) { int i; @@ -156,16 +156,14 @@ int radeon_mc_setup(struct radeon_device *rdev) tmp = (tmp + rdev->mc.gtt_size - 1) & ~(rdev->mc.gtt_size - 1); rdev->mc.gtt_location = tmp; } - DRM_INFO("radeon: VRAM %uM\n", rdev->mc.real_vram_size >> 20); + DRM_INFO("radeon: VRAM %uM\n", (unsigned)(rdev->mc.mc_vram_size >> 20)); DRM_INFO("radeon: VRAM from 0x%08X to 0x%08X\n", - rdev->mc.vram_location, - rdev->mc.vram_location + rdev->mc.mc_vram_size - 1); - if (rdev->mc.real_vram_size != rdev->mc.mc_vram_size) - DRM_INFO("radeon: VRAM less than aperture workaround enabled\n"); - DRM_INFO("radeon: GTT %uM\n", rdev->mc.gtt_size >> 20); + (unsigned)rdev->mc.vram_location, + (unsigned)(rdev->mc.vram_location + rdev->mc.mc_vram_size - 1)); + DRM_INFO("radeon: GTT %uM\n", (unsigned)(rdev->mc.gtt_size >> 20)); DRM_INFO("radeon: GTT from 0x%08X to 0x%08X\n", - rdev->mc.gtt_location, - rdev->mc.gtt_location + rdev->mc.gtt_size - 1); + (unsigned)rdev->mc.gtt_location, + (unsigned)(rdev->mc.gtt_location + rdev->mc.gtt_size - 1)); return 0; } @@ -205,6 +203,31 @@ static bool radeon_card_posted(struct radeon_device *rdev) } +int radeon_dummy_page_init(struct radeon_device *rdev) +{ + rdev->dummy_page.page = alloc_page(GFP_DMA32 | GFP_KERNEL | __GFP_ZERO); + if (rdev->dummy_page.page == NULL) + return -ENOMEM; + rdev->dummy_page.addr = pci_map_page(rdev->pdev, rdev->dummy_page.page, + 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + if (!rdev->dummy_page.addr) { + __free_page(rdev->dummy_page.page); + rdev->dummy_page.page = NULL; + return -ENOMEM; + } + return 0; +} + +void radeon_dummy_page_fini(struct radeon_device *rdev) +{ + if (rdev->dummy_page.page == NULL) + return; + pci_unmap_page(rdev->pdev, rdev->dummy_page.addr, + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + __free_page(rdev->dummy_page.page); + rdev->dummy_page.page = NULL; +} + /* * Registers accessors functions. @@ -323,9 +346,15 @@ int radeon_asic_init(struct radeon_device *rdev) case CHIP_RV635: case CHIP_RV670: case CHIP_RS780: + case CHIP_RS880: + rdev->asic = &r600_asic; + break; case CHIP_RV770: case CHIP_RV730: case CHIP_RV710: + case CHIP_RV740: + rdev->asic = &rv770_asic; + break; default: /* FIXME: not supported yet */ return -EINVAL; @@ -448,7 +477,7 @@ int radeon_device_init(struct radeon_device *rdev, struct pci_dev *pdev, uint32_t flags) { - int r, ret; + int r, ret = 0; int dma_bits; DRM_INFO("radeon: Initializing kernel modesetting.\n"); @@ -487,10 +516,6 @@ int radeon_device_init(struct radeon_device *rdev, if (r) { return r; } - r = radeon_init(rdev); - if (r) { - return r; - } /* set DMA mask + need_dma32 flags. * PCIE - can handle 40-bits. @@ -521,111 +546,118 @@ int radeon_device_init(struct radeon_device *rdev, DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)rdev->rmmio_base); DRM_INFO("register mmio size: %u\n", (unsigned)rdev->rmmio_size); - /* Setup errata flags */ - radeon_errata(rdev); - /* Initialize scratch registers */ - radeon_scratch_init(rdev); - /* Initialize surface registers */ - radeon_surface_init(rdev); - - /* TODO: disable VGA need to use VGA request */ - /* BIOS*/ - if (!radeon_get_bios(rdev)) { - if (ASIC_IS_AVIVO(rdev)) - return -EINVAL; - } - if (rdev->is_atom_bios) { - r = radeon_atombios_init(rdev); + rdev->new_init_path = false; + r = radeon_init(rdev); + if (r) { + return r; + } + if (!rdev->new_init_path) { + /* Setup errata flags */ + radeon_errata(rdev); + /* Initialize scratch registers */ + radeon_scratch_init(rdev); + /* Initialize surface registers */ + radeon_surface_init(rdev); + + /* TODO: disable VGA need to use VGA request */ + /* BIOS*/ + if (!radeon_get_bios(rdev)) { + if (ASIC_IS_AVIVO(rdev)) + return -EINVAL; + } + if (rdev->is_atom_bios) { + r = radeon_atombios_init(rdev); + if (r) { + return r; + } + } else { + r = radeon_combios_init(rdev); + if (r) { + return r; + } + } + /* Reset gpu before posting otherwise ATOM will enter infinite loop */ + if (radeon_gpu_reset(rdev)) { + /* FIXME: what do we want to do here ? */ + } + /* check if cards are posted or not */ + if (!radeon_card_posted(rdev) && rdev->bios) { + DRM_INFO("GPU not posted. posting now...\n"); + if (rdev->is_atom_bios) { + atom_asic_init(rdev->mode_info.atom_context); + } else { + radeon_combios_asic_init(rdev->ddev); + } + } + /* Initialize clocks */ + r = radeon_clocks_init(rdev); if (r) { return r; } - } else { - r = radeon_combios_init(rdev); + /* Get vram informations */ + radeon_vram_info(rdev); + + /* Add an MTRR for the VRAM */ + rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size, + MTRR_TYPE_WRCOMB, 1); + DRM_INFO("Detected VRAM RAM=%uM, BAR=%uM\n", + (unsigned)(rdev->mc.mc_vram_size >> 20), + (unsigned)(rdev->mc.aper_size >> 20)); + DRM_INFO("RAM width %dbits %cDR\n", + rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S'); + /* Initialize memory controller (also test AGP) */ + r = radeon_mc_init(rdev); if (r) { return r; } - } - /* Reset gpu before posting otherwise ATOM will enter infinite loop */ - if (radeon_gpu_reset(rdev)) { - /* FIXME: what do we want to do here ? */ - } - /* check if cards are posted or not */ - if (!radeon_card_posted(rdev) && rdev->bios) { - DRM_INFO("GPU not posted. posting now...\n"); - if (rdev->is_atom_bios) { - atom_asic_init(rdev->mode_info.atom_context); - } else { - radeon_combios_asic_init(rdev->ddev); - } - } - /* Initialize clocks */ - r = radeon_clocks_init(rdev); - if (r) { - return r; - } - /* Get vram informations */ - radeon_vram_info(rdev); - - /* Add an MTRR for the VRAM */ - rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size, - MTRR_TYPE_WRCOMB, 1); - DRM_INFO("Detected VRAM RAM=%uM, BAR=%uM\n", - rdev->mc.real_vram_size >> 20, - (unsigned)rdev->mc.aper_size >> 20); - DRM_INFO("RAM width %dbits %cDR\n", - rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S'); - /* Initialize memory controller (also test AGP) */ - r = radeon_mc_init(rdev); - if (r) { - return r; - } - /* Fence driver */ - r = radeon_fence_driver_init(rdev); - if (r) { - return r; - } - r = radeon_irq_kms_init(rdev); - if (r) { - return r; - } - /* Memory manager */ - r = radeon_object_init(rdev); - if (r) { - return r; - } - /* Initialize GART (initialize after TTM so we can allocate - * memory through TTM but finalize after TTM) */ - r = radeon_gart_enable(rdev); - if (!r) { - r = radeon_gem_init(rdev); - } - - /* 1M ring buffer */ - if (!r) { - r = radeon_cp_init(rdev, 1024 * 1024); - } - if (!r) { - r = radeon_wb_init(rdev); + /* Fence driver */ + r = radeon_fence_driver_init(rdev); if (r) { - DRM_ERROR("radeon: failled initializing WB (%d).\n", r); return r; } - } - if (!r) { - r = radeon_ib_pool_init(rdev); + r = radeon_irq_kms_init(rdev); if (r) { - DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r); return r; } - } - if (!r) { - r = radeon_ib_test(rdev); + /* Memory manager */ + r = radeon_object_init(rdev); if (r) { - DRM_ERROR("radeon: failled testing IB (%d).\n", r); return r; } + /* Initialize GART (initialize after TTM so we can allocate + * memory through TTM but finalize after TTM) */ + r = radeon_gart_enable(rdev); + if (!r) { + r = radeon_gem_init(rdev); + } + + /* 1M ring buffer */ + if (!r) { + r = radeon_cp_init(rdev, 1024 * 1024); + } + if (!r) { + r = radeon_wb_init(rdev); + if (r) { + DRM_ERROR("radeon: failled initializing WB (%d).\n", r); + return r; + } + } + if (!r) { + r = radeon_ib_pool_init(rdev); + if (r) { + DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r); + return r; + } + } + if (!r) { + r = radeon_ib_test(rdev); + if (r) { + DRM_ERROR("radeon: failled testing IB (%d).\n", r); + return r; + } + } + ret = r; } - ret = r; r = radeon_modeset_init(rdev); if (r) { return r; @@ -651,26 +683,29 @@ void radeon_device_fini(struct radeon_device *rdev) rdev->shutdown = true; /* Order matter so becarefull if you rearrange anythings */ radeon_modeset_fini(rdev); - radeon_ib_pool_fini(rdev); - radeon_cp_fini(rdev); - radeon_wb_fini(rdev); - radeon_gem_fini(rdev); - radeon_object_fini(rdev); - /* mc_fini must be after object_fini */ - radeon_mc_fini(rdev); + if (!rdev->new_init_path) { + radeon_ib_pool_fini(rdev); + radeon_cp_fini(rdev); + radeon_wb_fini(rdev); + radeon_gem_fini(rdev); + radeon_mc_fini(rdev); #if __OS_HAS_AGP - radeon_agp_fini(rdev); + radeon_agp_fini(rdev); #endif - radeon_irq_kms_fini(rdev); - radeon_fence_driver_fini(rdev); - radeon_clocks_fini(rdev); - if (rdev->is_atom_bios) { - radeon_atombios_fini(rdev); + radeon_irq_kms_fini(rdev); + radeon_fence_driver_fini(rdev); + radeon_clocks_fini(rdev); + radeon_object_fini(rdev); + if (rdev->is_atom_bios) { + radeon_atombios_fini(rdev); + } else { + radeon_combios_fini(rdev); + } + kfree(rdev->bios); + rdev->bios = NULL; } else { - radeon_combios_fini(rdev); + radeon_fini(rdev); } - kfree(rdev->bios); - rdev->bios = NULL; iounmap(rdev->rmmio); rdev->rmmio = NULL; } @@ -708,9 +743,12 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state) /* wait for gpu to finish processing current batch */ radeon_fence_wait_last(rdev); - radeon_cp_disable(rdev); - radeon_gart_disable(rdev); - + if (!rdev->new_init_path) { + radeon_cp_disable(rdev); + radeon_gart_disable(rdev); + } else { + radeon_suspend(rdev); + } /* evict remaining vram memory */ radeon_object_evict_vram(rdev); @@ -746,33 +784,37 @@ int radeon_resume_kms(struct drm_device *dev) if (radeon_gpu_reset(rdev)) { /* FIXME: what do we want to do here ? */ } - /* post card */ - if (rdev->is_atom_bios) { - atom_asic_init(rdev->mode_info.atom_context); + if (!rdev->new_init_path) { + /* post card */ + if (rdev->is_atom_bios) { + atom_asic_init(rdev->mode_info.atom_context); + } else { + radeon_combios_asic_init(rdev->ddev); + } + /* Initialize clocks */ + r = radeon_clocks_init(rdev); + if (r) { + release_console_sem(); + return r; + } + /* Enable IRQ */ + rdev->irq.sw_int = true; + radeon_irq_set(rdev); + /* Initialize GPU Memory Controller */ + r = radeon_mc_init(rdev); + if (r) { + goto out; + } + r = radeon_gart_enable(rdev); + if (r) { + goto out; + } + r = radeon_cp_init(rdev, rdev->cp.ring_size); + if (r) { + goto out; + } } else { - radeon_combios_asic_init(rdev->ddev); - } - /* Initialize clocks */ - r = radeon_clocks_init(rdev); - if (r) { - release_console_sem(); - return r; - } - /* Enable IRQ */ - rdev->irq.sw_int = true; - radeon_irq_set(rdev); - /* Initialize GPU Memory Controller */ - r = radeon_mc_init(rdev); - if (r) { - goto out; - } - r = radeon_gart_enable(rdev); - if (r) { - goto out; - } - r = radeon_cp_init(rdev, rdev->cp.ring_size); - if (r) { - goto out; + radeon_resume(rdev); } out: fb_set_suspend(rdev->fbdev_info, 0); diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h index 40294a07976f..c7b185924f6c 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.h +++ b/drivers/gpu/drm/radeon/radeon_drv.h @@ -356,6 +356,12 @@ typedef struct drm_radeon_private { int r700_sc_hiz_tile_fifo_size; int r700_sc_earlyz_tile_fifo_fize; + struct mutex cs_mutex; + u32 cs_id_scnt; + u32 cs_id_wcnt; + /* r6xx/r7xx drm blit vertex buffer */ + struct drm_buf *blit_vb; + /* firmware */ const struct firmware *me_fw, *pfp_fw; } drm_radeon_private_t; @@ -396,6 +402,9 @@ static __inline__ int radeon_check_offset(drm_radeon_private_t *dev_priv, (off >= gart_start && off <= gart_end)); } +/* radeon_state.c */ +extern void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf); + /* radeon_cp.c */ extern int radeon_cp_init(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int radeon_cp_start(struct drm_device *dev, void *data, struct drm_file *file_priv); @@ -487,6 +496,22 @@ extern int r600_cp_dispatch_indirect(struct drm_device *dev, struct drm_buf *buf, int start, int end); extern int r600_page_table_init(struct drm_device *dev); extern void r600_page_table_cleanup(struct drm_device *dev, struct drm_ati_pcigart_info *gart_info); +extern int r600_cs_legacy_ioctl(struct drm_device *dev, void *data, struct drm_file *fpriv); +extern void r600_cp_dispatch_swap(struct drm_device *dev, struct drm_file *file_priv); +extern int r600_cp_dispatch_texture(struct drm_device *dev, + struct drm_file *file_priv, + drm_radeon_texture_t *tex, + drm_radeon_tex_image_t *image); +/* r600_blit.c */ +extern int r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv); +extern void r600_done_blit_copy(struct drm_device *dev); +extern void r600_blit_copy(struct drm_device *dev, + uint64_t src_gpu_addr, uint64_t dst_gpu_addr, + int size_bytes); +extern void r600_blit_swap(struct drm_device *dev, + uint64_t src_gpu_addr, uint64_t dst_gpu_addr, + int sx, int sy, int dx, int dy, + int w, int h, int src_pitch, int dst_pitch, int cpp); /* Flags for stats.boxes */ @@ -1114,13 +1139,71 @@ extern u32 radeon_get_scratch(drm_radeon_private_t *dev_priv, int index); # define RADEON_CNTL_BITBLT_MULTI 0x00009B00 # define RADEON_CNTL_SET_SCISSORS 0xC0001E00 -# define R600_IT_INDIRECT_BUFFER 0x00003200 -# define R600_IT_ME_INITIALIZE 0x00004400 +# define R600_IT_INDIRECT_BUFFER_END 0x00001700 +# define R600_IT_SET_PREDICATION 0x00002000 +# define R600_IT_REG_RMW 0x00002100 +# define R600_IT_COND_EXEC 0x00002200 +# define R600_IT_PRED_EXEC 0x00002300 +# define R600_IT_START_3D_CMDBUF 0x00002400 +# define R600_IT_DRAW_INDEX_2 0x00002700 +# define R600_IT_CONTEXT_CONTROL 0x00002800 +# define R600_IT_DRAW_INDEX_IMMD_BE 0x00002900 +# define R600_IT_INDEX_TYPE 0x00002A00 +# define R600_IT_DRAW_INDEX 0x00002B00 +# define R600_IT_DRAW_INDEX_AUTO 0x00002D00 +# define R600_IT_DRAW_INDEX_IMMD 0x00002E00 +# define R600_IT_NUM_INSTANCES 0x00002F00 +# define R600_IT_STRMOUT_BUFFER_UPDATE 0x00003400 +# define R600_IT_INDIRECT_BUFFER_MP 0x00003800 +# define R600_IT_MEM_SEMAPHORE 0x00003900 +# define R600_IT_MPEG_INDEX 0x00003A00 +# define R600_IT_WAIT_REG_MEM 0x00003C00 +# define R600_IT_MEM_WRITE 0x00003D00 +# define R600_IT_INDIRECT_BUFFER 0x00003200 +# define R600_IT_CP_INTERRUPT 0x00004000 +# define R600_IT_SURFACE_SYNC 0x00004300 +# define R600_CB0_DEST_BASE_ENA (1 << 6) +# define R600_TC_ACTION_ENA (1 << 23) +# define R600_VC_ACTION_ENA (1 << 24) +# define R600_CB_ACTION_ENA (1 << 25) +# define R600_DB_ACTION_ENA (1 << 26) +# define R600_SH_ACTION_ENA (1 << 27) +# define R600_SMX_ACTION_ENA (1 << 28) +# define R600_IT_ME_INITIALIZE 0x00004400 # define R600_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16) -# define R600_IT_EVENT_WRITE 0x00004600 -# define R600_IT_SET_CONFIG_REG 0x00006800 -# define R600_SET_CONFIG_REG_OFFSET 0x00008000 -# define R600_SET_CONFIG_REG_END 0x0000ac00 +# define R600_IT_COND_WRITE 0x00004500 +# define R600_IT_EVENT_WRITE 0x00004600 +# define R600_IT_EVENT_WRITE_EOP 0x00004700 +# define R600_IT_ONE_REG_WRITE 0x00005700 +# define R600_IT_SET_CONFIG_REG 0x00006800 +# define R600_SET_CONFIG_REG_OFFSET 0x00008000 +# define R600_SET_CONFIG_REG_END 0x0000ac00 +# define R600_IT_SET_CONTEXT_REG 0x00006900 +# define R600_SET_CONTEXT_REG_OFFSET 0x00028000 +# define R600_SET_CONTEXT_REG_END 0x00029000 +# define R600_IT_SET_ALU_CONST 0x00006A00 +# define R600_SET_ALU_CONST_OFFSET 0x00030000 +# define R600_SET_ALU_CONST_END 0x00032000 +# define R600_IT_SET_BOOL_CONST 0x00006B00 +# define R600_SET_BOOL_CONST_OFFSET 0x0003e380 +# define R600_SET_BOOL_CONST_END 0x00040000 +# define R600_IT_SET_LOOP_CONST 0x00006C00 +# define R600_SET_LOOP_CONST_OFFSET 0x0003e200 +# define R600_SET_LOOP_CONST_END 0x0003e380 +# define R600_IT_SET_RESOURCE 0x00006D00 +# define R600_SET_RESOURCE_OFFSET 0x00038000 +# define R600_SET_RESOURCE_END 0x0003c000 +# define R600_SQ_TEX_VTX_INVALID_TEXTURE 0x0 +# define R600_SQ_TEX_VTX_INVALID_BUFFER 0x1 +# define R600_SQ_TEX_VTX_VALID_TEXTURE 0x2 +# define R600_SQ_TEX_VTX_VALID_BUFFER 0x3 +# define R600_IT_SET_SAMPLER 0x00006E00 +# define R600_SET_SAMPLER_OFFSET 0x0003c000 +# define R600_SET_SAMPLER_END 0x0003cff0 +# define R600_IT_SET_CTL_CONST 0x00006F00 +# define R600_SET_CTL_CONST_OFFSET 0x0003cff0 +# define R600_SET_CTL_CONST_END 0x0003e200 +# define R600_IT_SURFACE_BASE_UPDATE 0x00007300 #define RADEON_CP_PACKET_MASK 0xC0000000 #define RADEON_CP_PACKET_COUNT_MASK 0x3fff0000 @@ -1598,6 +1681,52 @@ extern u32 radeon_get_scratch(drm_radeon_private_t *dev_priv, int index); #define R600_CB_COLOR7_BASE 0x2805c #define R600_CB_COLOR7_FRAG 0x280fc +#define R600_CB_COLOR0_SIZE 0x28060 +#define R600_CB_COLOR0_VIEW 0x28080 +#define R600_CB_COLOR0_INFO 0x280a0 +#define R600_CB_COLOR0_TILE 0x280c0 +#define R600_CB_COLOR0_FRAG 0x280e0 +#define R600_CB_COLOR0_MASK 0x28100 + +#define AVIVO_D1MODE_VLINE_START_END 0x6538 +#define AVIVO_D2MODE_VLINE_START_END 0x6d38 +#define R600_CP_COHER_BASE 0x85f8 +#define R600_DB_DEPTH_BASE 0x2800c +#define R600_SQ_PGM_START_FS 0x28894 +#define R600_SQ_PGM_START_ES 0x28880 +#define R600_SQ_PGM_START_VS 0x28858 +#define R600_SQ_PGM_RESOURCES_VS 0x28868 +#define R600_SQ_PGM_CF_OFFSET_VS 0x288d0 +#define R600_SQ_PGM_START_GS 0x2886c +#define R600_SQ_PGM_START_PS 0x28840 +#define R600_SQ_PGM_RESOURCES_PS 0x28850 +#define R600_SQ_PGM_EXPORTS_PS 0x28854 +#define R600_SQ_PGM_CF_OFFSET_PS 0x288cc +#define R600_VGT_DMA_BASE 0x287e8 +#define R600_VGT_DMA_BASE_HI 0x287e4 +#define R600_VGT_STRMOUT_BASE_OFFSET_0 0x28b10 +#define R600_VGT_STRMOUT_BASE_OFFSET_1 0x28b14 +#define R600_VGT_STRMOUT_BASE_OFFSET_2 0x28b18 +#define R600_VGT_STRMOUT_BASE_OFFSET_3 0x28b1c +#define R600_VGT_STRMOUT_BASE_OFFSET_HI_0 0x28b44 +#define R600_VGT_STRMOUT_BASE_OFFSET_HI_1 0x28b48 +#define R600_VGT_STRMOUT_BASE_OFFSET_HI_2 0x28b4c +#define R600_VGT_STRMOUT_BASE_OFFSET_HI_3 0x28b50 +#define R600_VGT_STRMOUT_BUFFER_BASE_0 0x28ad8 +#define R600_VGT_STRMOUT_BUFFER_BASE_1 0x28ae8 +#define R600_VGT_STRMOUT_BUFFER_BASE_2 0x28af8 +#define R600_VGT_STRMOUT_BUFFER_BASE_3 0x28b08 +#define R600_VGT_STRMOUT_BUFFER_OFFSET_0 0x28adc +#define R600_VGT_STRMOUT_BUFFER_OFFSET_1 0x28aec +#define R600_VGT_STRMOUT_BUFFER_OFFSET_2 0x28afc +#define R600_VGT_STRMOUT_BUFFER_OFFSET_3 0x28b0c + +#define R600_VGT_PRIMITIVE_TYPE 0x8958 + +#define R600_PA_SC_SCREEN_SCISSOR_TL 0x28030 +#define R600_PA_SC_GENERIC_SCISSOR_TL 0x28240 +#define R600_PA_SC_WINDOW_SCISSOR_TL 0x28204 + #define R600_TC_CNTL 0x9608 # define R600_TC_L2_SIZE(x) ((x) << 5) # define R600_L2_DISABLE_LATE_HIT (1 << 9) diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index b4e48dd2e859..506dd4dd3a24 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -53,9 +53,9 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence) * away */ WREG32(rdev->fence_drv.scratch_reg, fence->seq); - } else { + } else radeon_fence_ring_emit(rdev, fence); - } + fence->emited = true; fence->timeout = jiffies + ((2000 * HZ) / 1000); list_del(&fence->list); @@ -168,7 +168,47 @@ bool radeon_fence_signaled(struct radeon_fence *fence) return signaled; } -int radeon_fence_wait(struct radeon_fence *fence, bool interruptible) +int r600_fence_wait(struct radeon_fence *fence, bool intr, bool lazy) +{ + struct radeon_device *rdev; + unsigned long cur_jiffies; + unsigned long timeout; + int ret = 0; + + cur_jiffies = jiffies; + timeout = HZ / 100; + + if (time_after(fence->timeout, cur_jiffies)) { + timeout = fence->timeout - cur_jiffies; + } + + rdev = fence->rdev; + + __set_current_state(intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); + + while (1) { + if (radeon_fence_signaled(fence)) + break; + + if (time_after_eq(jiffies, timeout)) { + ret = -EBUSY; + break; + } + + if (lazy) + schedule_timeout(1); + + if (intr && signal_pending(current)) { + ret = -ERESTART; + break; + } + } + __set_current_state(TASK_RUNNING); + return ret; +} + + +int radeon_fence_wait(struct radeon_fence *fence, bool intr) { struct radeon_device *rdev; unsigned long cur_jiffies; @@ -176,7 +216,6 @@ int radeon_fence_wait(struct radeon_fence *fence, bool interruptible) bool expired = false; int r; - if (fence == NULL) { WARN(1, "Querying an invalid fence : %p !\n", fence); return 0; @@ -185,13 +224,18 @@ int radeon_fence_wait(struct radeon_fence *fence, bool interruptible) if (radeon_fence_signaled(fence)) { return 0; } + + if (rdev->family >= CHIP_R600) + return r600_fence_wait(fence, intr, 0); + retry: cur_jiffies = jiffies; timeout = HZ / 100; if (time_after(fence->timeout, cur_jiffies)) { timeout = fence->timeout - cur_jiffies; } - if (interruptible) { + + if (intr) { r = wait_event_interruptible_timeout(rdev->fence_drv.queue, radeon_fence_signaled(fence), timeout); if (unlikely(r == -ERESTARTSYS)) { diff --git a/drivers/gpu/drm/radeon/radeon_reg.h b/drivers/gpu/drm/radeon/radeon_reg.h index 28be2f1165cb..21da871a793c 100644 --- a/drivers/gpu/drm/radeon/radeon_reg.h +++ b/drivers/gpu/drm/radeon/radeon_reg.h @@ -3255,6 +3255,24 @@ #define RADEON_CP_RB_WPTR 0x0714 #define RADEON_CP_RB_RPTR_WR 0x071c +#define RADEON_SCRATCH_UMSK 0x0770 +#define RADEON_SCRATCH_ADDR 0x0774 + +#define R600_CP_RB_BASE 0xc100 +#define R600_CP_RB_CNTL 0xc104 +# define R600_RB_BUFSZ(x) ((x) << 0) +# define R600_RB_BLKSZ(x) ((x) << 8) +# define R600_RB_NO_UPDATE (1 << 27) +# define R600_RB_RPTR_WR_ENA (1 << 31) +#define R600_CP_RB_RPTR_WR 0xc108 +#define R600_CP_RB_RPTR_ADDR 0xc10c +#define R600_CP_RB_RPTR_ADDR_HI 0xc110 +#define R600_CP_RB_WPTR 0xc114 +#define R600_CP_RB_WPTR_ADDR 0xc118 +#define R600_CP_RB_WPTR_ADDR_HI 0xc11c +#define R600_CP_RB_RPTR 0x8700 +#define R600_CP_RB_WPTR_DELAY 0x8704 + #define RADEON_CP_IB_BASE 0x0738 #define RADEON_CP_IB_BUFSZ 0x073c diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 60d159308b88..aa9837a6aa75 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -110,7 +110,6 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib) return; } list_del(&tmp->list); - INIT_LIST_HEAD(&tmp->list); if (tmp->fence) { radeon_fence_unref(&tmp->fence); } @@ -119,19 +118,11 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib) mutex_unlock(&rdev->ib_pool.mutex); } -static void radeon_ib_align(struct radeon_device *rdev, struct radeon_ib *ib) -{ - while ((ib->length_dw & rdev->cp.align_mask)) { - ib->ptr[ib->length_dw++] = PACKET2(0); - } -} - int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) { int r = 0; mutex_lock(&rdev->ib_pool.mutex); - radeon_ib_align(rdev, ib); if (!ib->length_dw || !rdev->cp.ready) { /* TODO: Nothings in the ib we should report. */ mutex_unlock(&rdev->ib_pool.mutex); @@ -145,9 +136,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) mutex_unlock(&rdev->ib_pool.mutex); return r; } - radeon_ring_write(rdev, PACKET0(RADEON_CP_IB_BASE, 1)); - radeon_ring_write(rdev, ib->gpu_addr); - radeon_ring_write(rdev, ib->length_dw); + radeon_ring_ib_execute(rdev, ib); radeon_fence_emit(rdev, ib->fence); radeon_ring_unlock_commit(rdev); list_add_tail(&ib->list, &rdev->ib_pool.scheduled_ibs); @@ -215,69 +204,16 @@ void radeon_ib_pool_fini(struct radeon_device *rdev) mutex_unlock(&rdev->ib_pool.mutex); } -int radeon_ib_test(struct radeon_device *rdev) -{ - struct radeon_ib *ib; - uint32_t scratch; - uint32_t tmp = 0; - unsigned i; - int r; - - r = radeon_scratch_get(rdev, &scratch); - if (r) { - DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r); - return r; - } - WREG32(scratch, 0xCAFEDEAD); - r = radeon_ib_get(rdev, &ib); - if (r) { - return r; - } - ib->ptr[0] = PACKET0(scratch, 0); - ib->ptr[1] = 0xDEADBEEF; - ib->ptr[2] = PACKET2(0); - ib->ptr[3] = PACKET2(0); - ib->ptr[4] = PACKET2(0); - ib->ptr[5] = PACKET2(0); - ib->ptr[6] = PACKET2(0); - ib->ptr[7] = PACKET2(0); - ib->length_dw = 8; - r = radeon_ib_schedule(rdev, ib); - if (r) { - radeon_scratch_free(rdev, scratch); - radeon_ib_free(rdev, &ib); - return r; - } - r = radeon_fence_wait(ib->fence, false); - if (r) { - return r; - } - for (i = 0; i < rdev->usec_timeout; i++) { - tmp = RREG32(scratch); - if (tmp == 0xDEADBEEF) { - break; - } - DRM_UDELAY(1); - } - if (i < rdev->usec_timeout) { - DRM_INFO("ib test succeeded in %u usecs\n", i); - } else { - DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n", - scratch, tmp); - r = -EINVAL; - } - radeon_scratch_free(rdev, scratch); - radeon_ib_free(rdev, &ib); - return r; -} - /* * Ring. */ void radeon_ring_free_size(struct radeon_device *rdev) { - rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR); + if (rdev->family >= CHIP_R600) + rdev->cp.rptr = RREG32(R600_CP_RB_RPTR); + else + rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR); /* This works because ring_size is a power of 2 */ rdev->cp.ring_free_dw = (rdev->cp.rptr + (rdev->cp.ring_size / 4)); rdev->cp.ring_free_dw -= rdev->cp.wptr; @@ -320,11 +256,10 @@ void radeon_ring_unlock_commit(struct radeon_device *rdev) count_dw_pad = (rdev->cp.align_mask + 1) - (rdev->cp.wptr & rdev->cp.align_mask); for (i = 0; i < count_dw_pad; i++) { - radeon_ring_write(rdev, PACKET2(0)); + radeon_ring_write(rdev, 2 << 30); } DRM_MEMORYBARRIER(); - WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr); - (void)RREG32(RADEON_CP_RB_WPTR); + radeon_cp_commit(rdev); mutex_unlock(&rdev->cp.mutex); } @@ -334,46 +269,6 @@ void radeon_ring_unlock_undo(struct radeon_device *rdev) mutex_unlock(&rdev->cp.mutex); } -int radeon_ring_test(struct radeon_device *rdev) -{ - uint32_t scratch; - uint32_t tmp = 0; - unsigned i; - int r; - - r = radeon_scratch_get(rdev, &scratch); - if (r) { - DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r); - return r; - } - WREG32(scratch, 0xCAFEDEAD); - r = radeon_ring_lock(rdev, 2); - if (r) { - DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); - radeon_scratch_free(rdev, scratch); - return r; - } - radeon_ring_write(rdev, PACKET0(scratch, 0)); - radeon_ring_write(rdev, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev); - for (i = 0; i < rdev->usec_timeout; i++) { - tmp = RREG32(scratch); - if (tmp == 0xDEADBEEF) { - break; - } - DRM_UDELAY(1); - } - if (i < rdev->usec_timeout) { - DRM_INFO("ring test succeeded in %d usecs\n", i); - } else { - DRM_ERROR("radeon: ring test failed (sracth(0x%04X)=0x%08X)\n", - scratch, tmp); - r = -EINVAL; - } - radeon_scratch_free(rdev, scratch); - return r; -} - int radeon_ring_init(struct radeon_device *rdev, unsigned ring_size) { int r; diff --git a/drivers/gpu/drm/radeon/radeon_share.h b/drivers/gpu/drm/radeon/radeon_share.h index 63a773578f17..5f9e358ab506 100644 --- a/drivers/gpu/drm/radeon/radeon_share.h +++ b/drivers/gpu/drm/radeon/radeon_share.h @@ -28,12 +28,89 @@ #ifndef __RADEON_SHARE_H__ #define __RADEON_SHARE_H__ +/* Common */ +struct radeon_device; +struct radeon_cs_parser; +int radeon_clocks_init(struct radeon_device *rdev); +void radeon_clocks_fini(struct radeon_device *rdev); +void radeon_scratch_init(struct radeon_device *rdev); +void radeon_surface_init(struct radeon_device *rdev); +int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data); + + +/* R100, RV100, RS100, RV200, RS200, R200, RV250, RS300, RV280 */ void r100_vram_init_sizes(struct radeon_device *rdev); + +/* R300, R350, RV350, RV380 */ +struct r300_asic { + const unsigned *reg_safe_bm; + unsigned reg_safe_bm_size; +}; + + +/* RS690, RS740 */ void rs690_line_buffer_adjust(struct radeon_device *rdev, struct drm_display_mode *mode1, struct drm_display_mode *mode2); + +/* RV515 */ void rv515_bandwidth_avivo_update(struct radeon_device *rdev); + +/* R600, RV610, RV630, RV620, RV635, RV670, RS780, RS880 */ +bool r600_card_posted(struct radeon_device *rdev); +void r600_cp_stop(struct radeon_device *rdev); +void r600_ring_init(struct radeon_device *rdev, unsigned ring_size); +int r600_cp_resume(struct radeon_device *rdev); +int r600_count_pipe_bits(uint32_t val); +int r600_gart_clear_page(struct radeon_device *rdev, int i); +int r600_mc_wait_for_idle(struct radeon_device *rdev); +void r600_pcie_gart_tlb_flush(struct radeon_device *rdev); +int r600_ib_test(struct radeon_device *rdev); +int r600_ring_test(struct radeon_device *rdev); +int r600_wb_init(struct radeon_device *rdev); +void r600_wb_fini(struct radeon_device *rdev); +void r600_scratch_init(struct radeon_device *rdev); +int r600_blit_init(struct radeon_device *rdev); +void r600_blit_fini(struct radeon_device *rdev); +int r600_cp_init_microcode(struct radeon_device *rdev); +struct r600_asic { + unsigned max_pipes; + unsigned max_tile_pipes; + unsigned max_simds; + unsigned max_backends; + unsigned max_gprs; + unsigned max_threads; + unsigned max_stack_entries; + unsigned max_hw_contexts; + unsigned max_gs_threads; + unsigned sx_max_export_size; + unsigned sx_max_export_pos_size; + unsigned sx_max_export_smx_size; + unsigned sq_num_cf_insts; +}; + +/* RV770, RV7300, RV710 */ +struct rv770_asic { + unsigned max_pipes; + unsigned max_tile_pipes; + unsigned max_simds; + unsigned max_backends; + unsigned max_gprs; + unsigned max_threads; + unsigned max_stack_entries; + unsigned max_hw_contexts; + unsigned max_gs_threads; + unsigned sx_max_export_size; + unsigned sx_max_export_pos_size; + unsigned sx_max_export_smx_size; + unsigned sq_num_cf_insts; + unsigned sx_num_of_sets; + unsigned sc_prim_fifo_size; + unsigned sc_hiz_tile_fifo_size; + unsigned sc_earlyz_tile_fifo_fize; +}; + #endif diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c index 2882f40d5ec5..aad0c6fafcf4 100644 --- a/drivers/gpu/drm/radeon/radeon_state.c +++ b/drivers/gpu/drm/radeon/radeon_state.c @@ -1546,7 +1546,7 @@ static void radeon_cp_dispatch_vertex(struct drm_device * dev, } while (i < nbox); } -static void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf) +void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf) { drm_radeon_private_t *dev_priv = dev->dev_private; struct drm_radeon_master_private *master_priv = master->driver_priv; @@ -2213,7 +2213,10 @@ static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *f if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS) sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS; - radeon_cp_dispatch_swap(dev, file_priv->master); + if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) + r600_cp_dispatch_swap(dev, file_priv); + else + radeon_cp_dispatch_swap(dev, file_priv->master); sarea_priv->ctx_owner = 0; COMMIT_RING(); @@ -2412,7 +2415,10 @@ static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file RING_SPACE_TEST_WITH_RETURN(dev_priv); VB_AGE_TEST_WITH_RETURN(dev_priv); - ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image); + if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) + ret = r600_cp_dispatch_texture(dev, file_priv, tex, &image); + else + ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image); return ret; } @@ -2495,8 +2501,9 @@ static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_fil radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end); } - if (indirect->discard) + if (indirect->discard) { radeon_cp_discard_buffer(dev, file_priv->master, buf); + } COMMIT_RING(); return 0; @@ -3227,7 +3234,8 @@ struct drm_ioctl_desc radeon_ioctls[] = { DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH), DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH), DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH) + DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_CS, r600_cs_legacy_ioctl, DRM_AUTH) }; int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index dc7a44274ea8..acd889c94549 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -376,9 +376,8 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, radeon_move_null(bo, new_mem); return 0; } - if (!rdev->cp.ready) { + if (!rdev->cp.ready || rdev->asic->copy == NULL) { /* use memcpy */ - DRM_ERROR("CP is not ready use memcpy.\n"); goto memcpy; } @@ -495,7 +494,7 @@ int radeon_ttm_init(struct radeon_device *rdev) return r; } DRM_INFO("radeon: %uM of VRAM memory ready\n", - rdev->mc.real_vram_size / (1024 * 1024)); + (unsigned)rdev->mc.real_vram_size / (1024 * 1024)); r = ttm_bo_init_mm(&rdev->mman.bdev, TTM_PL_TT, 0, ((rdev->mc.gtt_size) >> PAGE_SHIFT)); if (r) { @@ -503,7 +502,7 @@ int radeon_ttm_init(struct radeon_device *rdev) return r; } DRM_INFO("radeon: %uM of GTT memory ready.\n", - rdev->mc.gtt_size / (1024 * 1024)); + (unsigned)(rdev->mc.gtt_size / (1024 * 1024))); if (unlikely(rdev->mman.bdev.dev_mapping == NULL)) { rdev->mman.bdev.dev_mapping = rdev->ddev->dev_mapping; } diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index b29affd9c5d8..8c3ea7e36060 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -63,7 +63,7 @@ void rs400_gart_adjust_size(struct radeon_device *rdev) break; default: DRM_ERROR("Unable to use IGP GART size %uM\n", - rdev->mc.gtt_size >> 20); + (unsigned)(rdev->mc.gtt_size >> 20)); DRM_ERROR("Valid GART size for IGP are 32M,64M,128M,256M,512M,1G,2G\n"); DRM_ERROR("Forcing to 32M GART size\n"); rdev->mc.gtt_size = 32 * 1024 * 1024; diff --git a/drivers/gpu/drm/radeon/rs780.c b/drivers/gpu/drm/radeon/rs780.c deleted file mode 100644 index 0affcff81825..000000000000 --- a/drivers/gpu/drm/radeon/rs780.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright 2008 Advanced Micro Devices, Inc. - * Copyright 2008 Red Hat Inc. - * Copyright 2009 Jerome Glisse. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Dave Airlie - * Alex Deucher - * Jerome Glisse - */ -#include "drmP.h" -#include "radeon_reg.h" -#include "radeon.h" - -/* rs780 depends on : */ -void rs600_mc_disable_clients(struct radeon_device *rdev); - -/* This files gather functions specifics to: - * rs780 - * - * Some of these functions might be used by newer ASICs. - */ -int rs780_mc_wait_for_idle(struct radeon_device *rdev); -void rs780_gpu_init(struct radeon_device *rdev); - - -/* - * MC - */ -int rs780_mc_init(struct radeon_device *rdev) -{ - rs780_gpu_init(rdev); - /* FIXME: implement */ - - rs600_mc_disable_clients(rdev); - if (rs780_mc_wait_for_idle(rdev)) { - printk(KERN_WARNING "Failed to wait MC idle while " - "programming pipes. Bad things might happen.\n"); - } - return 0; -} - -void rs780_mc_fini(struct radeon_device *rdev) -{ - /* FIXME: implement */ -} - - -/* - * Global GPU functions - */ -void rs780_errata(struct radeon_device *rdev) -{ - rdev->pll_errata = 0; -} - -int rs780_mc_wait_for_idle(struct radeon_device *rdev) -{ - /* FIXME: implement */ - return 0; -} - -void rs780_gpu_init(struct radeon_device *rdev) -{ - /* FIXME: implement */ -} - - -/* - * VRAM info - */ -void rs780_vram_get_type(struct radeon_device *rdev) -{ - /* FIXME: implement */ -} - -void rs780_vram_info(struct radeon_device *rdev) -{ - rs780_vram_get_type(rdev); - - /* FIXME: implement */ - /* Could aper size report 0 ? */ - rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0); - rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0); -} diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 97965c430c1f..99e397f16384 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -27,7 +27,7 @@ */ #include <linux/seq_file.h> #include "drmP.h" -#include "rv515r.h" +#include "rv515d.h" #include "radeon.h" #include "radeon_share.h" diff --git a/drivers/gpu/drm/radeon/rv515r.h b/drivers/gpu/drm/radeon/rv515d.h index f3cf84039906..a65e17ec1c08 100644 --- a/drivers/gpu/drm/radeon/rv515r.h +++ b/drivers/gpu/drm/radeon/rv515d.h @@ -25,10 +25,12 @@ * Alex Deucher * Jerome Glisse */ -#ifndef RV515R_H -#define RV515R_H +#ifndef __RV515D_H__ +#define __RV515D_H__ -/* RV515 registers */ +/* + * RV515 registers + */ #define PCIE_INDEX 0x0030 #define PCIE_DATA 0x0034 #define MC_IND_INDEX 0x0070 @@ -166,5 +168,53 @@ #define MC_GLOBW_INIT_LAT_MASK 0xF0000000 +/* + * PM4 packet + */ +#define CP_PACKET0 0x00000000 +#define PACKET0_BASE_INDEX_SHIFT 0 +#define PACKET0_BASE_INDEX_MASK (0x1ffff << 0) +#define PACKET0_COUNT_SHIFT 16 +#define PACKET0_COUNT_MASK (0x3fff << 16) +#define CP_PACKET1 0x40000000 +#define CP_PACKET2 0x80000000 +#define PACKET2_PAD_SHIFT 0 +#define PACKET2_PAD_MASK (0x3fffffff << 0) +#define CP_PACKET3 0xC0000000 +#define PACKET3_IT_OPCODE_SHIFT 8 +#define PACKET3_IT_OPCODE_MASK (0xff << 8) +#define PACKET3_COUNT_SHIFT 16 +#define PACKET3_COUNT_MASK (0x3fff << 16) +/* PACKET3 op code */ +#define PACKET3_NOP 0x10 +#define PACKET3_3D_DRAW_VBUF 0x28 +#define PACKET3_3D_DRAW_IMMD 0x29 +#define PACKET3_3D_DRAW_INDX 0x2A +#define PACKET3_3D_LOAD_VBPNTR 0x2F +#define PACKET3_INDX_BUFFER 0x33 +#define PACKET3_3D_DRAW_VBUF_2 0x34 +#define PACKET3_3D_DRAW_IMMD_2 0x35 +#define PACKET3_3D_DRAW_INDX_2 0x36 +#define PACKET3_BITBLT_MULTI 0x9B + +#define PACKET0(reg, n) (CP_PACKET0 | \ + REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) | \ + REG_SET(PACKET0_COUNT, (n))) +#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) +#define PACKET3(op, n) (CP_PACKET3 | \ + REG_SET(PACKET3_IT_OPCODE, (op)) | \ + REG_SET(PACKET3_COUNT, (n))) + +#define PACKET_TYPE0 0 +#define PACKET_TYPE1 1 +#define PACKET_TYPE2 2 +#define PACKET_TYPE3 3 + +#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) +#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) +#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2) +#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1) +#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) + #endif diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 21d8ffd57308..57765f6d5b20 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -25,100 +25,975 @@ * Alex Deucher * Jerome Glisse */ +#include <linux/firmware.h> +#include <linux/platform_device.h> #include "drmP.h" -#include "radeon_reg.h" #include "radeon.h" +#include "radeon_share.h" +#include "rv770d.h" +#include "avivod.h" +#include "atom.h" -/* rv770,rv730,rv710 depends on : */ -void rs600_mc_disable_clients(struct radeon_device *rdev); +#define R700_PFP_UCODE_SIZE 848 +#define R700_PM4_UCODE_SIZE 1360 -/* This files gather functions specifics to: - * rv770,rv730,rv710 - * - * Some of these functions might be used by newer ASICs. - */ -int rv770_mc_wait_for_idle(struct radeon_device *rdev); -void rv770_gpu_init(struct radeon_device *rdev); +static void rv770_gpu_init(struct radeon_device *rdev); +void rv770_fini(struct radeon_device *rdev); /* - * MC + * GART */ -int rv770_mc_init(struct radeon_device *rdev) +int rv770_pcie_gart_enable(struct radeon_device *rdev) { - uint32_t tmp; + u32 tmp; + int r, i; - rv770_gpu_init(rdev); + /* Initialize common gart structure */ + r = radeon_gart_init(rdev); + if (r) { + return r; + } + rdev->gart.table_size = rdev->gart.num_gpu_pages * 8; + r = radeon_gart_table_vram_alloc(rdev); + if (r) { + return r; + } + for (i = 0; i < rdev->gart.num_gpu_pages; i++) + r600_gart_clear_page(rdev, i); + /* Setup L2 cache */ + WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING | + ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | + EFFECTIVE_L2_QUEUE_SIZE(7)); + WREG32(VM_L2_CNTL2, 0); + WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2)); + /* Setup TLB control */ + tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING | + SYSTEM_ACCESS_MODE_NOT_IN_SYS | + SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU | + EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5); + WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp); + WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp); + WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp); + WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp); + WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp); + WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp); + WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp); + WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end - 1) >> 12); + WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); + WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT); + WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, + (u32)(rdev->dummy_page.addr >> 12)); + for (i = 1; i < 7; i++) + WREG32(VM_CONTEXT0_CNTL + (i * 4), 0); - /* setup the gart before changing location so we can ask to - * discard unmapped mc request - */ - /* FIXME: disable out of gart access */ - tmp = rdev->mc.gtt_location / 4096; - tmp = REG_SET(R700_LOGICAL_PAGE_NUMBER, tmp); - WREG32(R700_MC_VM_SYSTEM_APERTURE_LOW_ADDR, tmp); - tmp = (rdev->mc.gtt_location + rdev->mc.gtt_size) / 4096; - tmp = REG_SET(R700_LOGICAL_PAGE_NUMBER, tmp); - WREG32(R700_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, tmp); - - rs600_mc_disable_clients(rdev); - if (rv770_mc_wait_for_idle(rdev)) { - printk(KERN_WARNING "Failed to wait MC idle while " - "programming pipes. Bad things might happen.\n"); - } - - tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1; - tmp = REG_SET(R700_MC_FB_TOP, tmp >> 24); - tmp |= REG_SET(R700_MC_FB_BASE, rdev->mc.vram_location >> 24); - WREG32(R700_MC_VM_FB_LOCATION, tmp); - tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1; - tmp = REG_SET(R700_MC_AGP_TOP, tmp >> 22); - WREG32(R700_MC_VM_AGP_TOP, tmp); - tmp = REG_SET(R700_MC_AGP_BOT, rdev->mc.gtt_location >> 22); - WREG32(R700_MC_VM_AGP_BOT, tmp); + r600_pcie_gart_tlb_flush(rdev); + rdev->gart.ready = true; return 0; } -void rv770_mc_fini(struct radeon_device *rdev) +void rv770_pcie_gart_disable(struct radeon_device *rdev) { - /* FIXME: implement */ + u32 tmp; + int i; + + /* Clear ptes*/ + for (i = 0; i < rdev->gart.num_gpu_pages; i++) + r600_gart_clear_page(rdev, i); + r600_pcie_gart_tlb_flush(rdev); + /* Disable all tables */ + for (i = 0; i < 7; i++) + WREG32(VM_CONTEXT0_CNTL + (i * 4), 0); + + /* Setup L2 cache */ + WREG32(VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING | + EFFECTIVE_L2_QUEUE_SIZE(7)); + WREG32(VM_L2_CNTL2, 0); + WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2)); + /* Setup TLB control */ + tmp = EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5); + WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp); + WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp); + WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp); + WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp); + WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp); + WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp); + WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp); } /* - * Global GPU functions + * MC */ -void rv770_errata(struct radeon_device *rdev) +static void rv770_mc_resume(struct radeon_device *rdev) { - rdev->pll_errata = 0; + u32 d1vga_control, d2vga_control; + u32 vga_render_control, vga_hdp_control; + u32 d1crtc_control, d2crtc_control; + u32 new_d1grph_primary, new_d1grph_secondary; + u32 new_d2grph_primary, new_d2grph_secondary; + u64 old_vram_start; + u32 tmp; + int i, j; + + /* Initialize HDP */ + for (i = 0, j = 0; i < 32; i++, j += 0x18) { + WREG32((0x2c14 + j), 0x00000000); + WREG32((0x2c18 + j), 0x00000000); + WREG32((0x2c1c + j), 0x00000000); + WREG32((0x2c20 + j), 0x00000000); + WREG32((0x2c24 + j), 0x00000000); + } + WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0); + + d1vga_control = RREG32(D1VGA_CONTROL); + d2vga_control = RREG32(D2VGA_CONTROL); + vga_render_control = RREG32(VGA_RENDER_CONTROL); + vga_hdp_control = RREG32(VGA_HDP_CONTROL); + d1crtc_control = RREG32(D1CRTC_CONTROL); + d2crtc_control = RREG32(D2CRTC_CONTROL); + old_vram_start = (u64)(RREG32(MC_VM_FB_LOCATION) & 0xFFFF) << 24; + new_d1grph_primary = RREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS); + new_d1grph_secondary = RREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS); + new_d1grph_primary += rdev->mc.vram_start - old_vram_start; + new_d1grph_secondary += rdev->mc.vram_start - old_vram_start; + new_d2grph_primary = RREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS); + new_d2grph_secondary = RREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS); + new_d2grph_primary += rdev->mc.vram_start - old_vram_start; + new_d2grph_secondary += rdev->mc.vram_start - old_vram_start; + + /* Stop all video */ + WREG32(D1VGA_CONTROL, 0); + WREG32(D2VGA_CONTROL, 0); + WREG32(VGA_RENDER_CONTROL, 0); + WREG32(D1CRTC_UPDATE_LOCK, 1); + WREG32(D2CRTC_UPDATE_LOCK, 1); + WREG32(D1CRTC_CONTROL, 0); + WREG32(D2CRTC_CONTROL, 0); + WREG32(D1CRTC_UPDATE_LOCK, 0); + WREG32(D2CRTC_UPDATE_LOCK, 0); + + mdelay(1); + if (r600_mc_wait_for_idle(rdev)) { + printk(KERN_WARNING "[drm] MC not idle !\n"); + } + + /* Lockout access through VGA aperture*/ + WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE); + + /* Update configuration */ + WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, rdev->mc.vram_start >> 12); + WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (rdev->mc.vram_end - 1) >> 12); + WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0); + tmp = (((rdev->mc.vram_end - 1) >> 24) & 0xFFFF) << 16; + tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF); + WREG32(MC_VM_FB_LOCATION, tmp); + WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8)); + WREG32(HDP_NONSURFACE_INFO, (2 << 7)); + WREG32(HDP_NONSURFACE_SIZE, (rdev->mc.mc_vram_size - 1) | 0x3FF); + if (rdev->flags & RADEON_IS_AGP) { + WREG32(MC_VM_AGP_TOP, (rdev->mc.gtt_end - 1) >> 16); + WREG32(MC_VM_AGP_BOT, rdev->mc.gtt_start >> 16); + WREG32(MC_VM_AGP_BASE, rdev->mc.agp_base >> 22); + } else { + WREG32(MC_VM_AGP_BASE, 0); + WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF); + WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF); + } + WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS, new_d1grph_primary); + WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS, new_d1grph_secondary); + WREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS, new_d2grph_primary); + WREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS, new_d2grph_secondary); + WREG32(VGA_MEMORY_BASE_ADDRESS, rdev->mc.vram_start); + + /* Unlock host access */ + WREG32(VGA_HDP_CONTROL, vga_hdp_control); + + mdelay(1); + if (r600_mc_wait_for_idle(rdev)) { + printk(KERN_WARNING "[drm] MC not idle !\n"); + } + + /* Restore video state */ + WREG32(D1CRTC_UPDATE_LOCK, 1); + WREG32(D2CRTC_UPDATE_LOCK, 1); + WREG32(D1CRTC_CONTROL, d1crtc_control); + WREG32(D2CRTC_CONTROL, d2crtc_control); + WREG32(D1CRTC_UPDATE_LOCK, 0); + WREG32(D2CRTC_UPDATE_LOCK, 0); + WREG32(D1VGA_CONTROL, d1vga_control); + WREG32(D2VGA_CONTROL, d2vga_control); + WREG32(VGA_RENDER_CONTROL, vga_render_control); } -int rv770_mc_wait_for_idle(struct radeon_device *rdev) + +/* + * CP. + */ +void r700_cp_stop(struct radeon_device *rdev) { - /* FIXME: implement */ - return 0; + WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); } -void rv770_gpu_init(struct radeon_device *rdev) + +static int rv770_cp_load_microcode(struct radeon_device *rdev) { - /* FIXME: implement */ + const __be32 *fw_data; + int i; + + if (!rdev->me_fw || !rdev->pfp_fw) + return -EINVAL; + + r700_cp_stop(rdev); + WREG32(CP_RB_CNTL, RB_NO_UPDATE | (15 << 8) | (3 << 0)); + + /* Reset cp */ + WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP); + RREG32(GRBM_SOFT_RESET); + mdelay(15); + WREG32(GRBM_SOFT_RESET, 0); + + fw_data = (const __be32 *)rdev->pfp_fw->data; + WREG32(CP_PFP_UCODE_ADDR, 0); + for (i = 0; i < R700_PFP_UCODE_SIZE; i++) + WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++)); + WREG32(CP_PFP_UCODE_ADDR, 0); + + fw_data = (const __be32 *)rdev->me_fw->data; + WREG32(CP_ME_RAM_WADDR, 0); + for (i = 0; i < R700_PM4_UCODE_SIZE; i++) + WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++)); + + WREG32(CP_PFP_UCODE_ADDR, 0); + WREG32(CP_ME_RAM_WADDR, 0); + WREG32(CP_ME_RAM_RADDR, 0); + return 0; } /* - * VRAM info + * Core functions */ -void rv770_vram_get_type(struct radeon_device *rdev) +static u32 r700_get_tile_pipe_to_backend_map(u32 num_tile_pipes, + u32 num_backends, + u32 backend_disable_mask) { - /* FIXME: implement */ + u32 backend_map = 0; + u32 enabled_backends_mask; + u32 enabled_backends_count; + u32 cur_pipe; + u32 swizzle_pipe[R7XX_MAX_PIPES]; + u32 cur_backend; + u32 i; + + if (num_tile_pipes > R7XX_MAX_PIPES) + num_tile_pipes = R7XX_MAX_PIPES; + if (num_tile_pipes < 1) + num_tile_pipes = 1; + if (num_backends > R7XX_MAX_BACKENDS) + num_backends = R7XX_MAX_BACKENDS; + if (num_backends < 1) + num_backends = 1; + + enabled_backends_mask = 0; + enabled_backends_count = 0; + for (i = 0; i < R7XX_MAX_BACKENDS; ++i) { + if (((backend_disable_mask >> i) & 1) == 0) { + enabled_backends_mask |= (1 << i); + ++enabled_backends_count; + } + if (enabled_backends_count == num_backends) + break; + } + + if (enabled_backends_count == 0) { + enabled_backends_mask = 1; + enabled_backends_count = 1; + } + + if (enabled_backends_count != num_backends) + num_backends = enabled_backends_count; + + memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES); + switch (num_tile_pipes) { + case 1: + swizzle_pipe[0] = 0; + break; + case 2: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + break; + case 3: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 1; + break; + case 4: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 3; + swizzle_pipe[3] = 1; + break; + case 5: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 1; + swizzle_pipe[4] = 3; + break; + case 6: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 5; + swizzle_pipe[4] = 3; + swizzle_pipe[5] = 1; + break; + case 7: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 6; + swizzle_pipe[4] = 3; + swizzle_pipe[5] = 1; + swizzle_pipe[6] = 5; + break; + case 8: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 6; + swizzle_pipe[4] = 3; + swizzle_pipe[5] = 1; + swizzle_pipe[6] = 7; + swizzle_pipe[7] = 5; + break; + } + + cur_backend = 0; + for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) { + while (((1 << cur_backend) & enabled_backends_mask) == 0) + cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS; + + backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2))); + + cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS; + } + + return backend_map; } -void rv770_vram_info(struct radeon_device *rdev) +static void rv770_gpu_init(struct radeon_device *rdev) { - rv770_vram_get_type(rdev); + int i, j, num_qd_pipes; + u32 sx_debug_1; + u32 smx_dc_ctl0; + u32 num_gs_verts_per_thread; + u32 vgt_gs_per_es; + u32 gs_prim_buffer_depth = 0; + u32 sq_ms_fifo_sizes; + u32 sq_config; + u32 sq_thread_resource_mgmt; + u32 hdp_host_path_cntl; + u32 sq_dyn_gpr_size_simd_ab_0; + u32 backend_map; + u32 gb_tiling_config = 0; + u32 cc_rb_backend_disable = 0; + u32 cc_gc_shader_pipe_config = 0; + u32 mc_arb_ramcfg; + u32 db_debug4; - /* FIXME: implement */ + /* setup chip specs */ + switch (rdev->family) { + case CHIP_RV770: + rdev->config.rv770.max_pipes = 4; + rdev->config.rv770.max_tile_pipes = 8; + rdev->config.rv770.max_simds = 10; + rdev->config.rv770.max_backends = 4; + rdev->config.rv770.max_gprs = 256; + rdev->config.rv770.max_threads = 248; + rdev->config.rv770.max_stack_entries = 512; + rdev->config.rv770.max_hw_contexts = 8; + rdev->config.rv770.max_gs_threads = 16 * 2; + rdev->config.rv770.sx_max_export_size = 128; + rdev->config.rv770.sx_max_export_pos_size = 16; + rdev->config.rv770.sx_max_export_smx_size = 112; + rdev->config.rv770.sq_num_cf_insts = 2; + + rdev->config.rv770.sx_num_of_sets = 7; + rdev->config.rv770.sc_prim_fifo_size = 0xF9; + rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30; + rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130; + break; + case CHIP_RV730: + rdev->config.rv770.max_pipes = 2; + rdev->config.rv770.max_tile_pipes = 4; + rdev->config.rv770.max_simds = 8; + rdev->config.rv770.max_backends = 2; + rdev->config.rv770.max_gprs = 128; + rdev->config.rv770.max_threads = 248; + rdev->config.rv770.max_stack_entries = 256; + rdev->config.rv770.max_hw_contexts = 8; + rdev->config.rv770.max_gs_threads = 16 * 2; + rdev->config.rv770.sx_max_export_size = 256; + rdev->config.rv770.sx_max_export_pos_size = 32; + rdev->config.rv770.sx_max_export_smx_size = 224; + rdev->config.rv770.sq_num_cf_insts = 2; + + rdev->config.rv770.sx_num_of_sets = 7; + rdev->config.rv770.sc_prim_fifo_size = 0xf9; + rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30; + rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130; + if (rdev->config.rv770.sx_max_export_pos_size > 16) { + rdev->config.rv770.sx_max_export_pos_size -= 16; + rdev->config.rv770.sx_max_export_smx_size += 16; + } + break; + case CHIP_RV710: + rdev->config.rv770.max_pipes = 2; + rdev->config.rv770.max_tile_pipes = 2; + rdev->config.rv770.max_simds = 2; + rdev->config.rv770.max_backends = 1; + rdev->config.rv770.max_gprs = 256; + rdev->config.rv770.max_threads = 192; + rdev->config.rv770.max_stack_entries = 256; + rdev->config.rv770.max_hw_contexts = 4; + rdev->config.rv770.max_gs_threads = 8 * 2; + rdev->config.rv770.sx_max_export_size = 128; + rdev->config.rv770.sx_max_export_pos_size = 16; + rdev->config.rv770.sx_max_export_smx_size = 112; + rdev->config.rv770.sq_num_cf_insts = 1; + + rdev->config.rv770.sx_num_of_sets = 7; + rdev->config.rv770.sc_prim_fifo_size = 0x40; + rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30; + rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130; + break; + case CHIP_RV740: + rdev->config.rv770.max_pipes = 4; + rdev->config.rv770.max_tile_pipes = 4; + rdev->config.rv770.max_simds = 8; + rdev->config.rv770.max_backends = 4; + rdev->config.rv770.max_gprs = 256; + rdev->config.rv770.max_threads = 248; + rdev->config.rv770.max_stack_entries = 512; + rdev->config.rv770.max_hw_contexts = 8; + rdev->config.rv770.max_gs_threads = 16 * 2; + rdev->config.rv770.sx_max_export_size = 256; + rdev->config.rv770.sx_max_export_pos_size = 32; + rdev->config.rv770.sx_max_export_smx_size = 224; + rdev->config.rv770.sq_num_cf_insts = 2; + + rdev->config.rv770.sx_num_of_sets = 7; + rdev->config.rv770.sc_prim_fifo_size = 0x100; + rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30; + rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130; + + if (rdev->config.rv770.sx_max_export_pos_size > 16) { + rdev->config.rv770.sx_max_export_pos_size -= 16; + rdev->config.rv770.sx_max_export_smx_size += 16; + } + break; + default: + break; + } + + /* Initialize HDP */ + j = 0; + for (i = 0; i < 32; i++) { + WREG32((0x2c14 + j), 0x00000000); + WREG32((0x2c18 + j), 0x00000000); + WREG32((0x2c1c + j), 0x00000000); + WREG32((0x2c20 + j), 0x00000000); + WREG32((0x2c24 + j), 0x00000000); + j += 0x18; + } + + WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff)); + + /* setup tiling, simd, pipe config */ + mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); + + switch (rdev->config.rv770.max_tile_pipes) { + case 1: + gb_tiling_config |= PIPE_TILING(0); + break; + case 2: + gb_tiling_config |= PIPE_TILING(1); + break; + case 4: + gb_tiling_config |= PIPE_TILING(2); + break; + case 8: + gb_tiling_config |= PIPE_TILING(3); + break; + default: + break; + } + + if (rdev->family == CHIP_RV770) + gb_tiling_config |= BANK_TILING(1); + else + gb_tiling_config |= BANK_TILING((mc_arb_ramcfg & NOOFBANK_SHIFT) >> NOOFBANK_MASK); + + gb_tiling_config |= GROUP_SIZE(0); + + if (((mc_arb_ramcfg & NOOFROWS_MASK) & NOOFROWS_SHIFT) > 3) { + gb_tiling_config |= ROW_TILING(3); + gb_tiling_config |= SAMPLE_SPLIT(3); + } else { + gb_tiling_config |= + ROW_TILING(((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT)); + gb_tiling_config |= + SAMPLE_SPLIT(((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT)); + } + + gb_tiling_config |= BANK_SWAPS(1); + + backend_map = r700_get_tile_pipe_to_backend_map(rdev->config.rv770.max_tile_pipes, + rdev->config.rv770.max_backends, + (0xff << rdev->config.rv770.max_backends) & 0xff); + gb_tiling_config |= BACKEND_MAP(backend_map); + + cc_gc_shader_pipe_config = + INACTIVE_QD_PIPES((R7XX_MAX_PIPES_MASK << rdev->config.rv770.max_pipes) & R7XX_MAX_PIPES_MASK); + cc_gc_shader_pipe_config |= + INACTIVE_SIMDS((R7XX_MAX_SIMDS_MASK << rdev->config.rv770.max_simds) & R7XX_MAX_SIMDS_MASK); + + cc_rb_backend_disable = + BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << rdev->config.rv770.max_backends) & R7XX_MAX_BACKENDS_MASK); + + WREG32(GB_TILING_CONFIG, gb_tiling_config); + WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff)); + WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff)); + + WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable); + WREG32(CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); + WREG32(GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); + + WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable); + WREG32(CGTS_SYS_TCC_DISABLE, 0); + WREG32(CGTS_TCC_DISABLE, 0); + WREG32(CGTS_USER_SYS_TCC_DISABLE, 0); + WREG32(CGTS_USER_TCC_DISABLE, 0); + + num_qd_pipes = + R7XX_MAX_BACKENDS - r600_count_pipe_bits(cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK); + WREG32(VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & DEALLOC_DIST_MASK); + WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & VTX_REUSE_DEPTH_MASK); + + /* set HW defaults for 3D engine */ + WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) | + ROQ_IB2_START(0x2b))); + + WREG32(CP_MEQ_THRESHOLDS, STQ_SPLIT(0x30)); + + WREG32(TA_CNTL_AUX, (DISABLE_CUBE_ANISO | + SYNC_GRADIENT | + SYNC_WALKER | + SYNC_ALIGNER)); + + sx_debug_1 = RREG32(SX_DEBUG_1); + sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS; + WREG32(SX_DEBUG_1, sx_debug_1); + + smx_dc_ctl0 = RREG32(SMX_DC_CTL0); + smx_dc_ctl0 &= ~CACHE_DEPTH(0x1ff); + smx_dc_ctl0 |= CACHE_DEPTH((rdev->config.rv770.sx_num_of_sets * 64) - 1); + WREG32(SMX_DC_CTL0, smx_dc_ctl0); + + WREG32(SMX_EVENT_CTL, (ES_FLUSH_CTL(4) | + GS_FLUSH_CTL(4) | + ACK_FLUSH_CTL(3) | + SYNC_FLUSH_CTL)); + + if (rdev->family == CHIP_RV770) + WREG32(DB_DEBUG3, DB_CLK_OFF_DELAY(0x1f)); + else { + db_debug4 = RREG32(DB_DEBUG4); + db_debug4 |= DISABLE_TILE_COVERED_FOR_PS_ITER; + WREG32(DB_DEBUG4, db_debug4); + } + + WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.rv770.sx_max_export_size / 4) - 1) | + POSITION_BUFFER_SIZE((rdev->config.rv770.sx_max_export_pos_size / 4) - 1) | + SMX_BUFFER_SIZE((rdev->config.rv770.sx_max_export_smx_size / 4) - 1))); + + WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.rv770.sc_prim_fifo_size) | + SC_HIZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_hiz_tile_fifo_size) | + SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_earlyz_tile_fifo_fize))); + + WREG32(PA_SC_MULTI_CHIP_CNTL, 0); + + WREG32(VGT_NUM_INSTANCES, 1); + + WREG32(SPI_CONFIG_CNTL, GPR_WRITE_PRIORITY(0)); + + WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4)); + + WREG32(CP_PERFMON_CNTL, 0); + + sq_ms_fifo_sizes = (CACHE_FIFO_SIZE(16 * rdev->config.rv770.sq_num_cf_insts) | + DONE_FIFO_HIWATER(0xe0) | + ALU_UPDATE_FIFO_HIWATER(0x8)); + switch (rdev->family) { + case CHIP_RV770: + sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x1); + break; + case CHIP_RV730: + case CHIP_RV710: + case CHIP_RV740: + default: + sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x4); + break; + } + WREG32(SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes); + + /* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT + * should be adjusted as needed by the 2D/3D drivers. This just sets default values + */ + sq_config = RREG32(SQ_CONFIG); + sq_config &= ~(PS_PRIO(3) | + VS_PRIO(3) | + GS_PRIO(3) | + ES_PRIO(3)); + sq_config |= (DX9_CONSTS | + VC_ENABLE | + EXPORT_SRC_C | + PS_PRIO(0) | + VS_PRIO(1) | + GS_PRIO(2) | + ES_PRIO(3)); + if (rdev->family == CHIP_RV710) + /* no vertex cache */ + sq_config &= ~VC_ENABLE; + + WREG32(SQ_CONFIG, sq_config); + + WREG32(SQ_GPR_RESOURCE_MGMT_1, (NUM_PS_GPRS((rdev->config.rv770.max_gprs * 24)/64) | + NUM_VS_GPRS((rdev->config.rv770.max_gprs * 24)/64) | + NUM_CLAUSE_TEMP_GPRS(((rdev->config.rv770.max_gprs * 24)/64)/2))); + + WREG32(SQ_GPR_RESOURCE_MGMT_2, (NUM_GS_GPRS((rdev->config.rv770.max_gprs * 7)/64) | + NUM_ES_GPRS((rdev->config.rv770.max_gprs * 7)/64))); + + sq_thread_resource_mgmt = (NUM_PS_THREADS((rdev->config.rv770.max_threads * 4)/8) | + NUM_VS_THREADS((rdev->config.rv770.max_threads * 2)/8) | + NUM_ES_THREADS((rdev->config.rv770.max_threads * 1)/8)); + if (((rdev->config.rv770.max_threads * 1) / 8) > rdev->config.rv770.max_gs_threads) + sq_thread_resource_mgmt |= NUM_GS_THREADS(rdev->config.rv770.max_gs_threads); + else + sq_thread_resource_mgmt |= NUM_GS_THREADS((rdev->config.rv770.max_gs_threads * 1)/8); + WREG32(SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt); + + WREG32(SQ_STACK_RESOURCE_MGMT_1, (NUM_PS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4) | + NUM_VS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4))); + + WREG32(SQ_STACK_RESOURCE_MGMT_2, (NUM_GS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4) | + NUM_ES_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4))); + + sq_dyn_gpr_size_simd_ab_0 = (SIMDA_RING0((rdev->config.rv770.max_gprs * 38)/64) | + SIMDA_RING1((rdev->config.rv770.max_gprs * 38)/64) | + SIMDB_RING0((rdev->config.rv770.max_gprs * 38)/64) | + SIMDB_RING1((rdev->config.rv770.max_gprs * 38)/64)); + + WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_0, sq_dyn_gpr_size_simd_ab_0); + WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_1, sq_dyn_gpr_size_simd_ab_0); + WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_2, sq_dyn_gpr_size_simd_ab_0); + WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_3, sq_dyn_gpr_size_simd_ab_0); + WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_4, sq_dyn_gpr_size_simd_ab_0); + WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_5, sq_dyn_gpr_size_simd_ab_0); + WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_6, sq_dyn_gpr_size_simd_ab_0); + WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_7, sq_dyn_gpr_size_simd_ab_0); + + WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) | + FORCE_EOV_MAX_REZ_CNT(255))); + + if (rdev->family == CHIP_RV710) + WREG32(VGT_CACHE_INVALIDATION, (CACHE_INVALIDATION(TC_ONLY) | + AUTO_INVLD_EN(ES_AND_GS_AUTO))); + else + WREG32(VGT_CACHE_INVALIDATION, (CACHE_INVALIDATION(VC_AND_TC) | + AUTO_INVLD_EN(ES_AND_GS_AUTO))); + + switch (rdev->family) { + case CHIP_RV770: + case CHIP_RV730: + case CHIP_RV740: + gs_prim_buffer_depth = 384; + break; + case CHIP_RV710: + gs_prim_buffer_depth = 128; + break; + default: + break; + } + + num_gs_verts_per_thread = rdev->config.rv770.max_pipes * 16; + vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread; + /* Max value for this is 256 */ + if (vgt_gs_per_es > 256) + vgt_gs_per_es = 256; + + WREG32(VGT_ES_PER_GS, 128); + WREG32(VGT_GS_PER_ES, vgt_gs_per_es); + WREG32(VGT_GS_PER_VS, 2); + + /* more default values. 2D/3D driver should adjust as needed */ + WREG32(VGT_GS_VERTEX_REUSE, 16); + WREG32(PA_SC_LINE_STIPPLE_STATE, 0); + WREG32(VGT_STRMOUT_EN, 0); + WREG32(SX_MISC, 0); + WREG32(PA_SC_MODE_CNTL, 0); + WREG32(PA_SC_EDGERULE, 0xaaaaaaaa); + WREG32(PA_SC_AA_CONFIG, 0); + WREG32(PA_SC_CLIPRECT_RULE, 0xffff); + WREG32(PA_SC_LINE_STIPPLE, 0); + WREG32(SPI_INPUT_Z, 0); + WREG32(SPI_PS_IN_CONTROL_0, NUM_INTERP(2)); + WREG32(CB_COLOR7_FRAG, 0); + + /* clear render buffer base addresses */ + WREG32(CB_COLOR0_BASE, 0); + WREG32(CB_COLOR1_BASE, 0); + WREG32(CB_COLOR2_BASE, 0); + WREG32(CB_COLOR3_BASE, 0); + WREG32(CB_COLOR4_BASE, 0); + WREG32(CB_COLOR5_BASE, 0); + WREG32(CB_COLOR6_BASE, 0); + WREG32(CB_COLOR7_BASE, 0); + + WREG32(TCP_CNTL, 0); + + hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL); + WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl); + + WREG32(PA_SC_MULTI_CHIP_CNTL, 0); + + WREG32(PA_CL_ENHANCE, (CLIP_VTX_REORDER_ENA | + NUM_CLIP_SEQ(3))); + +} + +int rv770_mc_init(struct radeon_device *rdev) +{ + fixed20_12 a; + u32 tmp; + int r; + + /* Get VRAM informations */ + /* FIXME: Don't know how to determine vram width, need to check + * vram_width usage + */ + rdev->mc.vram_width = 128; + rdev->mc.vram_is_ddr = true; /* Could aper size report 0 ? */ rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0); rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0); + /* Setup GPU memory space */ + rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE); + rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE); + if (rdev->flags & RADEON_IS_AGP) { + r = radeon_agp_init(rdev); + if (r) + return r; + /* gtt_size is setup by radeon_agp_init */ + rdev->mc.gtt_location = rdev->mc.agp_base; + tmp = 0xFFFFFFFFUL - rdev->mc.agp_base - rdev->mc.gtt_size; + /* Try to put vram before or after AGP because we + * we want SYSTEM_APERTURE to cover both VRAM and + * AGP so that GPU can catch out of VRAM/AGP access + */ + if (rdev->mc.gtt_location > rdev->mc.mc_vram_size) { + /* Enought place before */ + rdev->mc.vram_location = rdev->mc.gtt_location - + rdev->mc.mc_vram_size; + } else if (tmp > rdev->mc.mc_vram_size) { + /* Enought place after */ + rdev->mc.vram_location = rdev->mc.gtt_location + + rdev->mc.gtt_size; + } else { + /* Try to setup VRAM then AGP might not + * not work on some card + */ + rdev->mc.vram_location = 0x00000000UL; + rdev->mc.gtt_location = rdev->mc.mc_vram_size; + } + } else { + rdev->mc.vram_location = 0x00000000UL; + rdev->mc.gtt_location = rdev->mc.mc_vram_size; + rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024; + } + rdev->mc.vram_start = rdev->mc.vram_location; + rdev->mc.vram_end = rdev->mc.vram_location + rdev->mc.mc_vram_size; + rdev->mc.gtt_start = rdev->mc.gtt_location; + rdev->mc.gtt_end = rdev->mc.gtt_location + rdev->mc.gtt_size; + /* FIXME: we should enforce default clock in case GPU is not in + * default setup + */ + a.full = rfixed_const(100); + rdev->pm.sclk.full = rfixed_const(rdev->clock.default_sclk); + rdev->pm.sclk.full = rfixed_div(rdev->pm.sclk, a); + return 0; +} +int rv770_gpu_reset(struct radeon_device *rdev) +{ + /* FIXME: implement */ + return 0; +} + +int rv770_resume(struct radeon_device *rdev) +{ + int r; + + rv770_mc_resume(rdev); + r = rv770_pcie_gart_enable(rdev); + if (r) + return r; + rv770_gpu_init(rdev); + r = radeon_ring_init(rdev, rdev->cp.ring_size); + if (r) + return r; + r = rv770_cp_load_microcode(rdev); + if (r) + return r; + r = r600_cp_resume(rdev); + if (r) + return r; + r = r600_wb_init(rdev); + if (r) + return r; + return 0; +} + +int rv770_suspend(struct radeon_device *rdev) +{ + /* FIXME: we should wait for ring to be empty */ + r700_cp_stop(rdev); + return 0; +} + +/* Plan is to move initialization in that function and use + * helper function so that radeon_device_init pretty much + * do nothing more than calling asic specific function. This + * should also allow to remove a bunch of callback function + * like vram_info. + */ +int rv770_init(struct radeon_device *rdev) +{ + int r; + + rdev->new_init_path = true; + r = radeon_dummy_page_init(rdev); + if (r) + return r; + /* This don't do much */ + r = radeon_gem_init(rdev); + if (r) + return r; + /* Read BIOS */ + if (!radeon_get_bios(rdev)) { + if (ASIC_IS_AVIVO(rdev)) + return -EINVAL; + } + /* Must be an ATOMBIOS */ + if (!rdev->is_atom_bios) + return -EINVAL; + r = radeon_atombios_init(rdev); + if (r) + return r; + /* Post card if necessary */ + if (!r600_card_posted(rdev) && rdev->bios) { + DRM_INFO("GPU not posted. posting now...\n"); + atom_asic_init(rdev->mode_info.atom_context); + } + /* Initialize scratch registers */ + r600_scratch_init(rdev); + /* Initialize surface registers */ + radeon_surface_init(rdev); + r = radeon_clocks_init(rdev); + if (r) + return r; + /* Fence driver */ + r = radeon_fence_driver_init(rdev); + if (r) + return r; + r = rv770_mc_init(rdev); + if (r) { + if (rdev->flags & RADEON_IS_AGP) { + /* Retry with disabling AGP */ + rv770_fini(rdev); + rdev->flags &= ~RADEON_IS_AGP; + return rv770_init(rdev); + } + return r; + } + /* Memory manager */ + r = radeon_object_init(rdev); + if (r) + return r; + rdev->cp.ring_obj = NULL; + r600_ring_init(rdev, 1024 * 1024); + + if (!rdev->me_fw || !rdev->pfp_fw) { + r = r600_cp_init_microcode(rdev); + if (r) { + DRM_ERROR("Failed to load firmware!\n"); + return r; + } + } + + r = rv770_resume(rdev); + if (r) { + if (rdev->flags & RADEON_IS_AGP) { + /* Retry with disabling AGP */ + rv770_fini(rdev); + rdev->flags &= ~RADEON_IS_AGP; + return rv770_init(rdev); + } + return r; + } + r = r600_blit_init(rdev); + if (r) { + DRM_ERROR("radeon: failled blitter (%d).\n", r); + return r; + } + r = radeon_ib_pool_init(rdev); + if (r) { + DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r); + return r; + } + r = radeon_ib_test(rdev); + if (r) { + DRM_ERROR("radeon: failled testing IB (%d).\n", r); + return r; + } + return 0; +} + +void rv770_fini(struct radeon_device *rdev) +{ + r600_blit_fini(rdev); + radeon_ring_fini(rdev); + rv770_pcie_gart_disable(rdev); + radeon_gart_table_vram_free(rdev); + radeon_gart_fini(rdev); + radeon_gem_fini(rdev); + radeon_fence_driver_fini(rdev); + radeon_clocks_fini(rdev); +#if __OS_HAS_AGP + if (rdev->flags & RADEON_IS_AGP) + radeon_agp_fini(rdev); +#endif + radeon_object_fini(rdev); + if (rdev->is_atom_bios) { + radeon_atombios_fini(rdev); + } else { + radeon_combios_fini(rdev); + } + kfree(rdev->bios); + rdev->bios = NULL; + radeon_dummy_page_fini(rdev); } diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h new file mode 100644 index 000000000000..4b9c3d6396ff --- /dev/null +++ b/drivers/gpu/drm/radeon/rv770d.h @@ -0,0 +1,341 @@ +/* + * Copyright 2009 Advanced Micro Devices, Inc. + * Copyright 2009 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + * Alex Deucher + * Jerome Glisse + */ +#ifndef RV770_H +#define RV770_H + +#define R7XX_MAX_SH_GPRS 256 +#define R7XX_MAX_TEMP_GPRS 16 +#define R7XX_MAX_SH_THREADS 256 +#define R7XX_MAX_SH_STACK_ENTRIES 4096 +#define R7XX_MAX_BACKENDS 8 +#define R7XX_MAX_BACKENDS_MASK 0xff +#define R7XX_MAX_SIMDS 16 +#define R7XX_MAX_SIMDS_MASK 0xffff +#define R7XX_MAX_PIPES 8 +#define R7XX_MAX_PIPES_MASK 0xff + +/* Registers */ +#define CB_COLOR0_BASE 0x28040 +#define CB_COLOR1_BASE 0x28044 +#define CB_COLOR2_BASE 0x28048 +#define CB_COLOR3_BASE 0x2804C +#define CB_COLOR4_BASE 0x28050 +#define CB_COLOR5_BASE 0x28054 +#define CB_COLOR6_BASE 0x28058 +#define CB_COLOR7_BASE 0x2805C +#define CB_COLOR7_FRAG 0x280FC + +#define CC_GC_SHADER_PIPE_CONFIG 0x8950 +#define CC_RB_BACKEND_DISABLE 0x98F4 +#define BACKEND_DISABLE(x) ((x) << 16) +#define CC_SYS_RB_BACKEND_DISABLE 0x3F88 + +#define CGTS_SYS_TCC_DISABLE 0x3F90 +#define CGTS_TCC_DISABLE 0x9148 +#define CGTS_USER_SYS_TCC_DISABLE 0x3F94 +#define CGTS_USER_TCC_DISABLE 0x914C + +#define CONFIG_MEMSIZE 0x5428 + +#define CP_ME_CNTL 0x86D8 +#define CP_ME_HALT (1<<28) +#define CP_PFP_HALT (1<<26) +#define CP_ME_RAM_DATA 0xC160 +#define CP_ME_RAM_RADDR 0xC158 +#define CP_ME_RAM_WADDR 0xC15C +#define CP_MEQ_THRESHOLDS 0x8764 +#define STQ_SPLIT(x) ((x) << 0) +#define CP_PERFMON_CNTL 0x87FC +#define CP_PFP_UCODE_ADDR 0xC150 +#define CP_PFP_UCODE_DATA 0xC154 +#define CP_QUEUE_THRESHOLDS 0x8760 +#define ROQ_IB1_START(x) ((x) << 0) +#define ROQ_IB2_START(x) ((x) << 8) +#define CP_RB_CNTL 0xC104 +#define RB_BUFSZ(x) ((x)<<0) +#define RB_BLKSZ(x) ((x)<<8) +#define RB_NO_UPDATE (1<<27) +#define RB_RPTR_WR_ENA (1<<31) +#define BUF_SWAP_32BIT (2 << 16) +#define CP_RB_RPTR 0x8700 +#define CP_RB_RPTR_ADDR 0xC10C +#define CP_RB_RPTR_ADDR_HI 0xC110 +#define CP_RB_RPTR_WR 0xC108 +#define CP_RB_WPTR 0xC114 +#define CP_RB_WPTR_ADDR 0xC118 +#define CP_RB_WPTR_ADDR_HI 0xC11C +#define CP_RB_WPTR_DELAY 0x8704 +#define CP_SEM_WAIT_TIMER 0x85BC + +#define DB_DEBUG3 0x98B0 +#define DB_CLK_OFF_DELAY(x) ((x) << 11) +#define DB_DEBUG4 0x9B8C +#define DISABLE_TILE_COVERED_FOR_PS_ITER (1 << 6) + +#define DCP_TILING_CONFIG 0x6CA0 +#define PIPE_TILING(x) ((x) << 1) +#define BANK_TILING(x) ((x) << 4) +#define GROUP_SIZE(x) ((x) << 6) +#define ROW_TILING(x) ((x) << 8) +#define BANK_SWAPS(x) ((x) << 11) +#define SAMPLE_SPLIT(x) ((x) << 14) +#define BACKEND_MAP(x) ((x) << 16) + +#define GB_TILING_CONFIG 0x98F0 + +#define GC_USER_SHADER_PIPE_CONFIG 0x8954 +#define INACTIVE_QD_PIPES(x) ((x) << 8) +#define INACTIVE_QD_PIPES_MASK 0x0000FF00 +#define INACTIVE_SIMDS(x) ((x) << 16) +#define INACTIVE_SIMDS_MASK 0x00FF0000 + +#define GRBM_CNTL 0x8000 +#define GRBM_READ_TIMEOUT(x) ((x) << 0) +#define GRBM_SOFT_RESET 0x8020 +#define SOFT_RESET_CP (1<<0) +#define GRBM_STATUS 0x8010 +#define CMDFIFO_AVAIL_MASK 0x0000000F +#define GUI_ACTIVE (1<<31) +#define GRBM_STATUS2 0x8014 + +#define HDP_HOST_PATH_CNTL 0x2C00 +#define HDP_NONSURFACE_BASE 0x2C04 +#define HDP_NONSURFACE_INFO 0x2C08 +#define HDP_NONSURFACE_SIZE 0x2C0C +#define HDP_REG_COHERENCY_FLUSH_CNTL 0x54A0 +#define HDP_TILING_CONFIG 0x2F3C + +#define MC_ARB_RAMCFG 0x2760 +#define NOOFBANK_SHIFT 0 +#define NOOFBANK_MASK 0x00000003 +#define NOOFRANK_SHIFT 2 +#define NOOFRANK_MASK 0x00000004 +#define NOOFROWS_SHIFT 3 +#define NOOFROWS_MASK 0x00000038 +#define NOOFCOLS_SHIFT 6 +#define NOOFCOLS_MASK 0x000000C0 +#define CHANSIZE_SHIFT 8 +#define CHANSIZE_MASK 0x00000100 +#define BURSTLENGTH_SHIFT 9 +#define BURSTLENGTH_MASK 0x00000200 +#define MC_VM_AGP_TOP 0x2028 +#define MC_VM_AGP_BOT 0x202C +#define MC_VM_AGP_BASE 0x2030 +#define MC_VM_FB_LOCATION 0x2024 +#define MC_VM_MB_L1_TLB0_CNTL 0x2234 +#define MC_VM_MB_L1_TLB1_CNTL 0x2238 +#define MC_VM_MB_L1_TLB2_CNTL 0x223C +#define MC_VM_MB_L1_TLB3_CNTL 0x2240 +#define ENABLE_L1_TLB (1 << 0) +#define ENABLE_L1_FRAGMENT_PROCESSING (1 << 1) +#define SYSTEM_ACCESS_MODE_PA_ONLY (0 << 3) +#define SYSTEM_ACCESS_MODE_USE_SYS_MAP (1 << 3) +#define SYSTEM_ACCESS_MODE_IN_SYS (2 << 3) +#define SYSTEM_ACCESS_MODE_NOT_IN_SYS (3 << 3) +#define SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU (0 << 5) +#define EFFECTIVE_L1_TLB_SIZE(x) ((x)<<15) +#define EFFECTIVE_L1_QUEUE_SIZE(x) ((x)<<18) +#define MC_VM_MD_L1_TLB0_CNTL 0x2654 +#define MC_VM_MD_L1_TLB1_CNTL 0x2658 +#define MC_VM_MD_L1_TLB2_CNTL 0x265C +#define MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR 0x203C +#define MC_VM_SYSTEM_APERTURE_HIGH_ADDR 0x2038 +#define MC_VM_SYSTEM_APERTURE_LOW_ADDR 0x2034 + +#define PA_CL_ENHANCE 0x8A14 +#define CLIP_VTX_REORDER_ENA (1 << 0) +#define NUM_CLIP_SEQ(x) ((x) << 1) +#define PA_SC_AA_CONFIG 0x28C04 +#define PA_SC_CLIPRECT_RULE 0x2820C +#define PA_SC_EDGERULE 0x28230 +#define PA_SC_FIFO_SIZE 0x8BCC +#define SC_PRIM_FIFO_SIZE(x) ((x) << 0) +#define SC_HIZ_TILE_FIFO_SIZE(x) ((x) << 12) +#define PA_SC_FORCE_EOV_MAX_CNTS 0x8B24 +#define FORCE_EOV_MAX_CLK_CNT(x) ((x)<<0) +#define FORCE_EOV_MAX_REZ_CNT(x) ((x)<<16) +#define PA_SC_LINE_STIPPLE 0x28A0C +#define PA_SC_LINE_STIPPLE_STATE 0x8B10 +#define PA_SC_MODE_CNTL 0x28A4C +#define PA_SC_MULTI_CHIP_CNTL 0x8B20 +#define SC_EARLYZ_TILE_FIFO_SIZE(x) ((x) << 20) + +#define SCRATCH_REG0 0x8500 +#define SCRATCH_REG1 0x8504 +#define SCRATCH_REG2 0x8508 +#define SCRATCH_REG3 0x850C +#define SCRATCH_REG4 0x8510 +#define SCRATCH_REG5 0x8514 +#define SCRATCH_REG6 0x8518 +#define SCRATCH_REG7 0x851C +#define SCRATCH_UMSK 0x8540 +#define SCRATCH_ADDR 0x8544 + +#define SMX_DC_CTL0 0xA020 +#define USE_HASH_FUNCTION (1 << 0) +#define CACHE_DEPTH(x) ((x) << 1) +#define FLUSH_ALL_ON_EVENT (1 << 10) +#define STALL_ON_EVENT (1 << 11) +#define SMX_EVENT_CTL 0xA02C +#define ES_FLUSH_CTL(x) ((x) << 0) +#define GS_FLUSH_CTL(x) ((x) << 3) +#define ACK_FLUSH_CTL(x) ((x) << 6) +#define SYNC_FLUSH_CTL (1 << 8) + +#define SPI_CONFIG_CNTL 0x9100 +#define GPR_WRITE_PRIORITY(x) ((x) << 0) +#define DISABLE_INTERP_1 (1 << 5) +#define SPI_CONFIG_CNTL_1 0x913C +#define VTX_DONE_DELAY(x) ((x) << 0) +#define INTERP_ONE_PRIM_PER_ROW (1 << 4) +#define SPI_INPUT_Z 0x286D8 +#define SPI_PS_IN_CONTROL_0 0x286CC +#define NUM_INTERP(x) ((x)<<0) +#define POSITION_ENA (1<<8) +#define POSITION_CENTROID (1<<9) +#define POSITION_ADDR(x) ((x)<<10) +#define PARAM_GEN(x) ((x)<<15) +#define PARAM_GEN_ADDR(x) ((x)<<19) +#define BARYC_SAMPLE_CNTL(x) ((x)<<26) +#define PERSP_GRADIENT_ENA (1<<28) +#define LINEAR_GRADIENT_ENA (1<<29) +#define POSITION_SAMPLE (1<<30) +#define BARYC_AT_SAMPLE_ENA (1<<31) + +#define SQ_CONFIG 0x8C00 +#define VC_ENABLE (1 << 0) +#define EXPORT_SRC_C (1 << 1) +#define DX9_CONSTS (1 << 2) +#define ALU_INST_PREFER_VECTOR (1 << 3) +#define DX10_CLAMP (1 << 4) +#define CLAUSE_SEQ_PRIO(x) ((x) << 8) +#define PS_PRIO(x) ((x) << 24) +#define VS_PRIO(x) ((x) << 26) +#define GS_PRIO(x) ((x) << 28) +#define SQ_DYN_GPR_SIZE_SIMD_AB_0 0x8DB0 +#define SIMDA_RING0(x) ((x)<<0) +#define SIMDA_RING1(x) ((x)<<8) +#define SIMDB_RING0(x) ((x)<<16) +#define SIMDB_RING1(x) ((x)<<24) +#define SQ_DYN_GPR_SIZE_SIMD_AB_1 0x8DB4 +#define SQ_DYN_GPR_SIZE_SIMD_AB_2 0x8DB8 +#define SQ_DYN_GPR_SIZE_SIMD_AB_3 0x8DBC +#define SQ_DYN_GPR_SIZE_SIMD_AB_4 0x8DC0 +#define SQ_DYN_GPR_SIZE_SIMD_AB_5 0x8DC4 +#define SQ_DYN_GPR_SIZE_SIMD_AB_6 0x8DC8 +#define SQ_DYN_GPR_SIZE_SIMD_AB_7 0x8DCC +#define ES_PRIO(x) ((x) << 30) +#define SQ_GPR_RESOURCE_MGMT_1 0x8C04 +#define NUM_PS_GPRS(x) ((x) << 0) +#define NUM_VS_GPRS(x) ((x) << 16) +#define DYN_GPR_ENABLE (1 << 27) +#define NUM_CLAUSE_TEMP_GPRS(x) ((x) << 28) +#define SQ_GPR_RESOURCE_MGMT_2 0x8C08 +#define NUM_GS_GPRS(x) ((x) << 0) +#define NUM_ES_GPRS(x) ((x) << 16) +#define SQ_MS_FIFO_SIZES 0x8CF0 +#define CACHE_FIFO_SIZE(x) ((x) << 0) +#define FETCH_FIFO_HIWATER(x) ((x) << 8) +#define DONE_FIFO_HIWATER(x) ((x) << 16) +#define ALU_UPDATE_FIFO_HIWATER(x) ((x) << 24) +#define SQ_STACK_RESOURCE_MGMT_1 0x8C10 +#define NUM_PS_STACK_ENTRIES(x) ((x) << 0) +#define NUM_VS_STACK_ENTRIES(x) ((x) << 16) +#define SQ_STACK_RESOURCE_MGMT_2 0x8C14 +#define NUM_GS_STACK_ENTRIES(x) ((x) << 0) +#define NUM_ES_STACK_ENTRIES(x) ((x) << 16) +#define SQ_THREAD_RESOURCE_MGMT 0x8C0C +#define NUM_PS_THREADS(x) ((x) << 0) +#define NUM_VS_THREADS(x) ((x) << 8) +#define NUM_GS_THREADS(x) ((x) << 16) +#define NUM_ES_THREADS(x) ((x) << 24) + +#define SX_DEBUG_1 0x9058 +#define ENABLE_NEW_SMX_ADDRESS (1 << 16) +#define SX_EXPORT_BUFFER_SIZES 0x900C +#define COLOR_BUFFER_SIZE(x) ((x) << 0) +#define POSITION_BUFFER_SIZE(x) ((x) << 8) +#define SMX_BUFFER_SIZE(x) ((x) << 16) +#define SX_MISC 0x28350 + +#define TA_CNTL_AUX 0x9508 +#define DISABLE_CUBE_WRAP (1 << 0) +#define DISABLE_CUBE_ANISO (1 << 1) +#define SYNC_GRADIENT (1 << 24) +#define SYNC_WALKER (1 << 25) +#define SYNC_ALIGNER (1 << 26) +#define BILINEAR_PRECISION_6_BIT (0 << 31) +#define BILINEAR_PRECISION_8_BIT (1 << 31) + +#define TCP_CNTL 0x9610 + +#define VGT_CACHE_INVALIDATION 0x88C4 +#define CACHE_INVALIDATION(x) ((x)<<0) +#define VC_ONLY 0 +#define TC_ONLY 1 +#define VC_AND_TC 2 +#define AUTO_INVLD_EN(x) ((x) << 6) +#define NO_AUTO 0 +#define ES_AUTO 1 +#define GS_AUTO 2 +#define ES_AND_GS_AUTO 3 +#define VGT_ES_PER_GS 0x88CC +#define VGT_GS_PER_ES 0x88C8 +#define VGT_GS_PER_VS 0x88E8 +#define VGT_GS_VERTEX_REUSE 0x88D4 +#define VGT_NUM_INSTANCES 0x8974 +#define VGT_OUT_DEALLOC_CNTL 0x28C5C +#define DEALLOC_DIST_MASK 0x0000007F +#define VGT_STRMOUT_EN 0x28AB0 +#define VGT_VERTEX_REUSE_BLOCK_CNTL 0x28C58 +#define VTX_REUSE_DEPTH_MASK 0x000000FF + +#define VM_CONTEXT0_CNTL 0x1410 +#define ENABLE_CONTEXT (1 << 0) +#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1) +#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4) +#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x153C +#define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x157C +#define VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x155C +#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR 0x1518 +#define VM_L2_CNTL 0x1400 +#define ENABLE_L2_CACHE (1 << 0) +#define ENABLE_L2_FRAGMENT_PROCESSING (1 << 1) +#define ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE (1 << 9) +#define EFFECTIVE_L2_QUEUE_SIZE(x) (((x) & 7) << 14) +#define VM_L2_CNTL2 0x1404 +#define INVALIDATE_ALL_L1_TLBS (1 << 0) +#define INVALIDATE_L2_CACHE (1 << 1) +#define VM_L2_CNTL3 0x1408 +#define BANK_SELECT(x) ((x) << 0) +#define CACHE_UPDATE_MODE(x) ((x) << 6) +#define VM_L2_STATUS 0x140C +#define L2_BUSY (1 << 0) + +#define WAIT_UNTIL 0x8040 + +#endif |