summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/vc4/vc4_plane.c
diff options
context:
space:
mode:
authorMaxime Ripard <mripard@kernel.org>2024-10-25 18:15:41 +0100
committerDave Stevenson <dave.stevenson@raspberrypi.com>2024-11-27 14:37:19 +0000
commit7687a12153d344b5d97a19f1e156a02da8d0d756 (patch)
treec2773161ed5b7b17eff1aa20f2dda2c9867e7f72 /drivers/gpu/drm/vc4/vc4_plane.c
parent626ffc5f87c7f653fd9a5fda1f11f4071a9e9570 (diff)
drm/vc4: hvs: Add support for BCM2712 HVS
The HVS found in the BCM2712, while having a similar role, is very different from the one found in the previous SoCs. Indeed, the register layout is fairly different, and the DLIST format is new as well. Let's introduce the needed functions to support the new HVS. This commit adds the C-step register layout. The D-step will be added later. Signed-off-by: Maxime Ripard <mripard@kernel.org> Link: https://patchwork.freedesktop.org/patch/msgid/20241025-drm-vc4-2712-support-v2-10-35efa83c8fc0@raspberrypi.com Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Diffstat (limited to 'drivers/gpu/drm/vc4/vc4_plane.c')
-rw-r--r--drivers/gpu/drm/vc4/vc4_plane.c767
1 files changed, 747 insertions, 20 deletions
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index ba6e86d62a77..5749287f6e3c 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -278,7 +278,10 @@ static bool plane_enabled(struct drm_plane_state *state)
static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
{
+ struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
+ struct vc4_hvs *hvs = vc4->hvs;
struct vc4_plane_state *vc4_state;
+ unsigned int i;
if (WARN_ON(!plane->state))
return NULL;
@@ -288,6 +291,12 @@ static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane
return NULL;
memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
+
+ for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
+ if (vc4_state->upm_handle[i])
+ refcount_inc(&hvs->upm_refcounts[vc4_state->upm_handle[i]].refcount);
+ }
+
vc4_state->dlist_initialized = 0;
__drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base);
@@ -306,18 +315,47 @@ static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane
return &vc4_state->base;
}
+static void vc4_plane_release_upm_ida(struct vc4_hvs *hvs, unsigned int upm_handle)
+{
+ struct vc4_upm_refcounts *refcount = &hvs->upm_refcounts[upm_handle];
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&hvs->mm_lock, irqflags);
+ drm_mm_remove_node(&refcount->upm);
+ spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
+ refcount->upm.start = 0;
+ refcount->upm.size = 0;
+ refcount->size = 0;
+
+ ida_free(&hvs->upm_handles, upm_handle);
+}
+
static void vc4_plane_destroy_state(struct drm_plane *plane,
struct drm_plane_state *state)
{
struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
+ struct vc4_hvs *hvs = vc4->hvs;
struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+ unsigned int i;
if (drm_mm_node_allocated(&vc4_state->lbm)) {
unsigned long irqflags;
- spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
+ spin_lock_irqsave(&hvs->mm_lock, irqflags);
drm_mm_remove_node(&vc4_state->lbm);
- spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
+ spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
+ }
+
+ for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
+ struct vc4_upm_refcounts *refcount;
+
+ if (!vc4_state->upm_handle[i])
+ continue;
+
+ refcount = &hvs->upm_refcounts[vc4_state->upm_handle[i]];
+
+ if (refcount_dec_and_test(&refcount->refcount))
+ vc4_plane_release_upm_ida(hvs, vc4_state->upm_handle[i]);
}
kfree(vc4_state->dlist);
@@ -538,6 +576,11 @@ static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
recip = ~0 / scale;
vc4_dlist_write(vc4_state,
+ /*
+ * The BCM2712 is lacking BIT(31) compared to
+ * the previous generations, but we don't use
+ * it.
+ */
VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) |
VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE));
vc4_dlist_write(vc4_state,
@@ -598,10 +641,15 @@ static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst,
vc4_dlist_write(vc4_state,
SCALER_PPF_AGC |
VC4_SET_FIELD(scale, SCALER_PPF_SCALE) |
+ /*
+ * The register layout documentation is slightly
+ * different to setup the phase in the BCM2712,
+ * but they seem equivalent.
+ */
VC4_SET_FIELD(phase, SCALER_PPF_IPHASE));
}
-static u32 vc4_lbm_size(struct drm_plane_state *state)
+static u32 __vc4_lbm_size(struct drm_plane_state *state)
{
struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
@@ -649,6 +697,131 @@ static u32 vc4_lbm_size(struct drm_plane_state *state)
return lbm;
}
+static unsigned int vc4_lbm_words_per_component(const struct drm_plane_state *state,
+ unsigned int channel)
+{
+ const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+
+ switch (vc4_state->y_scaling[channel]) {
+ case VC4_SCALING_PPF:
+ return 4;
+
+ case VC4_SCALING_TPZ:
+ return 2;
+
+ default:
+ return 0;
+ }
+}
+
+static unsigned int vc4_lbm_components(const struct drm_plane_state *state,
+ unsigned int channel)
+{
+ const struct drm_format_info *info = state->fb->format;
+ const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+
+ if (vc4_state->y_scaling[channel] == VC4_SCALING_NONE)
+ return 0;
+
+ if (info->is_yuv)
+ return channel ? 2 : 1;
+
+ if (info->has_alpha)
+ return 4;
+
+ return 3;
+}
+
+static unsigned int vc4_lbm_channel_size(const struct drm_plane_state *state,
+ unsigned int channel)
+{
+ const struct drm_format_info *info = state->fb->format;
+ const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+ unsigned int channels_scaled = 0;
+ unsigned int components, words, wpc;
+ unsigned int width, lines;
+ unsigned int i;
+
+ /* LBM is meant to use the smaller of source or dest width, but there
+ * is a issue with UV scaling that the size required for the second
+ * channel is based on the source width only.
+ */
+ if (info->hsub > 1 && channel == 1)
+ width = state->src_w >> 16;
+ else
+ width = min(state->src_w >> 16, state->crtc_w);
+ width = round_up(width / info->hsub, 4);
+
+ wpc = vc4_lbm_words_per_component(state, channel);
+ if (!wpc)
+ return 0;
+
+ components = vc4_lbm_components(state, channel);
+ if (!components)
+ return 0;
+
+ if (state->alpha != DRM_BLEND_ALPHA_OPAQUE && info->has_alpha)
+ components -= 1;
+
+ words = width * wpc * components;
+
+ lines = DIV_ROUND_UP(words, 128 / info->hsub);
+
+ for (i = 0; i < 2; i++)
+ if (vc4_state->y_scaling[channel] != VC4_SCALING_NONE)
+ channels_scaled++;
+
+ if (channels_scaled == 1)
+ lines = lines / 2;
+
+ return lines;
+}
+
+static unsigned int __vc6_lbm_size(const struct drm_plane_state *state)
+{
+ const struct drm_format_info *info = state->fb->format;
+
+ if (info->hsub > 1)
+ return max(vc4_lbm_channel_size(state, 0),
+ vc4_lbm_channel_size(state, 1));
+ else
+ return vc4_lbm_channel_size(state, 0);
+}
+
+static u32 vc4_lbm_size(struct drm_plane_state *state)
+{
+ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+ struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
+
+ /* LBM is not needed when there's no vertical scaling. */
+ if (vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
+ vc4_state->y_scaling[1] == VC4_SCALING_NONE)
+ return 0;
+
+ if (vc4->gen >= VC4_GEN_6_C)
+ return __vc6_lbm_size(state);
+ else
+ return __vc4_lbm_size(state);
+}
+
+static size_t vc6_upm_size(const struct drm_plane_state *state,
+ unsigned int plane)
+{
+ const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+ unsigned int stride = state->fb->pitches[plane];
+
+ /*
+ * TODO: This only works for raster formats, and is sub-optimal
+ * for buffers with a stride aligned on 32 bytes.
+ */
+ unsigned int words_per_line = (stride + 62) / 32;
+ unsigned int fetch_region_size = words_per_line * 32;
+ unsigned int buffer_lines = 2 << vc4_state->upm_buffer_lines;
+ unsigned int buffer_size = fetch_region_size * buffer_lines;
+
+ return ALIGN(buffer_size, HVS_UBM_WORD_SIZE);
+}
+
static void vc4_write_scaling_parameters(struct drm_plane_state *state,
int channel)
{
@@ -750,6 +923,10 @@ static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
if (!lbm_size)
return 0;
+ /*
+ * NOTE: BCM2712 doesn't need to be aligned, since the size
+ * returned by vc4_lbm_size() is in words already.
+ */
if (vc4->gen == VC4_GEN_5)
lbm_size = ALIGN(lbm_size, 64);
else if (vc4->gen == VC4_GEN_4)
@@ -787,6 +964,108 @@ static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
return 0;
}
+static int vc6_plane_allocate_upm(struct drm_plane_state *state)
+{
+ const struct drm_format_info *info = state->fb->format;
+ struct drm_device *drm = state->plane->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+ struct vc4_hvs *hvs = vc4->hvs;
+ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+ unsigned int i;
+ int ret;
+
+ WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C);
+
+ vc4_state->upm_buffer_lines = SCALER6_PTR0_UPM_BUFF_SIZE_2_LINES;
+
+ for (i = 0; i < info->num_planes; i++) {
+ struct vc4_upm_refcounts *refcount;
+ int upm_handle;
+ unsigned long irqflags;
+ size_t upm_size;
+
+ upm_size = vc6_upm_size(state, i);
+ if (!upm_size)
+ return -EINVAL;
+ upm_handle = vc4_state->upm_handle[i];
+
+ if (upm_handle &&
+ hvs->upm_refcounts[upm_handle].size == upm_size) {
+ /* Allocation is the same size as the previous user of
+ * the plane. Keep the allocation.
+ */
+ vc4_state->upm_handle[i] = upm_handle;
+ } else {
+ if (upm_handle &&
+ refcount_dec_and_test(&hvs->upm_refcounts[upm_handle].refcount)) {
+ vc4_plane_release_upm_ida(hvs, upm_handle);
+ vc4_state->upm_handle[i] = 0;
+ }
+
+ upm_handle = ida_alloc_range(&hvs->upm_handles, 1,
+ VC4_NUM_UPM_HANDLES,
+ GFP_KERNEL);
+ if (upm_handle < 0) {
+ drm_dbg(drm, "Out of upm_handles\n");
+ return upm_handle;
+ }
+ vc4_state->upm_handle[i] = upm_handle;
+
+ refcount = &hvs->upm_refcounts[upm_handle];
+ refcount_set(&refcount->refcount, 1);
+ refcount->size = upm_size;
+
+ spin_lock_irqsave(&hvs->mm_lock, irqflags);
+ ret = drm_mm_insert_node_generic(&hvs->upm_mm,
+ &refcount->upm,
+ upm_size, HVS_UBM_WORD_SIZE,
+ 0, 0);
+ spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
+ if (ret) {
+ drm_err(drm, "Failed to allocate UPM entry: %d\n", ret);
+ refcount_set(&refcount->refcount, 0);
+ ida_free(&hvs->upm_handles, upm_handle);
+ vc4_state->upm_handle[i] = 0;
+ return ret;
+ }
+ }
+
+ refcount = &hvs->upm_refcounts[upm_handle];
+ vc4_state->dlist[vc4_state->ptr0_offset[i]] |=
+ VC4_SET_FIELD(refcount->upm.start / HVS_UBM_WORD_SIZE,
+ SCALER6_PTR0_UPM_BASE) |
+ VC4_SET_FIELD(vc4_state->upm_handle[i] - 1,
+ SCALER6_PTR0_UPM_HANDLE) |
+ VC4_SET_FIELD(vc4_state->upm_buffer_lines,
+ SCALER6_PTR0_UPM_BUFF_SIZE);
+ }
+
+ return 0;
+}
+
+static void vc6_plane_free_upm(struct drm_plane_state *state)
+{
+ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+ struct drm_device *drm = state->plane->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+ struct vc4_hvs *hvs = vc4->hvs;
+ unsigned int i;
+
+ WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C);
+
+ for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
+ unsigned int upm_handle;
+
+ upm_handle = vc4_state->upm_handle[i];
+ if (!upm_handle)
+ continue;
+
+ if (refcount_dec_and_test(&hvs->upm_refcounts[upm_handle].refcount))
+ vc4_plane_release_upm_ida(hvs, upm_handle);
+ vc4_state->upm_handle[i] = 0;
+ }
+}
+
/*
* The colorspace conversion matrices are held in 3 entries in the dlist.
* Create an array of them, with entries for each full and limited mode, and
@@ -1363,6 +1642,412 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
return 0;
}
+static u32 vc6_plane_get_csc_mode(struct vc4_plane_state *vc4_state)
+{
+ struct drm_plane_state *state = &vc4_state->base;
+ u32 ret = 0;
+
+ if (vc4_state->is_yuv) {
+ enum drm_color_encoding color_encoding = state->color_encoding;
+ enum drm_color_range color_range = state->color_range;
+
+ ret |= SCALER6_CTL2_CSC_ENABLE;
+
+ /* CSC pre-loaded with:
+ * 0 = BT601 limited range
+ * 1 = BT709 limited range
+ * 2 = BT2020 limited range
+ * 3 = BT601 full range
+ * 4 = BT709 full range
+ * 5 = BT2020 full range
+ */
+ if (color_encoding > DRM_COLOR_YCBCR_BT2020)
+ color_encoding = DRM_COLOR_YCBCR_BT601;
+ if (color_range > DRM_COLOR_YCBCR_FULL_RANGE)
+ color_range = DRM_COLOR_YCBCR_LIMITED_RANGE;
+
+ ret |= VC4_SET_FIELD(color_encoding + (color_range * 3),
+ SCALER6_CTL2_BRCM_CFC_CONTROL);
+ }
+
+ return ret;
+}
+
+static int vc6_plane_mode_set(struct drm_plane *plane,
+ struct drm_plane_state *state)
+{
+ struct drm_device *drm = plane->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+ struct drm_framebuffer *fb = state->fb;
+ const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
+ u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
+ int num_planes = fb->format->num_planes;
+ u32 h_subsample = fb->format->hsub;
+ u32 v_subsample = fb->format->vsub;
+ bool mix_plane_alpha;
+ bool covers_screen;
+ u32 scl0, scl1, pitch0;
+ u32 tiling, src_x, src_y;
+ u32 width, height;
+ u32 hvs_format = format->hvs;
+ u32 offsets[3] = { 0 };
+ unsigned int rotation;
+ int ret, i;
+
+ if (vc4_state->dlist_initialized)
+ return 0;
+
+ ret = vc4_plane_setup_clipping_and_scaling(state);
+ if (ret)
+ return ret;
+
+ width = vc4_state->src_w[0] >> 16;
+ height = vc4_state->src_h[0] >> 16;
+
+ /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB
+ * and 4:4:4, scl1 should be set to scl0 so both channels of
+ * the scaler do the same thing. For YUV, the Y plane needs
+ * to be put in channel 1 and Cb/Cr in channel 0, so we swap
+ * the scl fields here.
+ */
+ if (num_planes == 1) {
+ scl0 = vc4_get_scl_field(state, 0);
+ scl1 = scl0;
+ } else {
+ scl0 = vc4_get_scl_field(state, 1);
+ scl1 = vc4_get_scl_field(state, 0);
+ }
+
+ rotation = drm_rotation_simplify(state->rotation,
+ DRM_MODE_ROTATE_0 |
+ DRM_MODE_REFLECT_X |
+ DRM_MODE_REFLECT_Y);
+
+ /* We must point to the last line when Y reflection is enabled. */
+ src_y = vc4_state->src_y >> 16;
+ if (rotation & DRM_MODE_REFLECT_Y)
+ src_y += height - 1;
+
+ src_x = vc4_state->src_x >> 16;
+
+ switch (base_format_mod) {
+ case DRM_FORMAT_MOD_LINEAR:
+ tiling = SCALER6_CTL0_ADDR_MODE_LINEAR;
+
+ /* Adjust the base pointer to the first pixel to be scanned
+ * out.
+ */
+ for (i = 0; i < num_planes; i++) {
+ offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i];
+ offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i];
+ }
+
+ break;
+
+ case DRM_FORMAT_MOD_BROADCOM_SAND128:
+ case DRM_FORMAT_MOD_BROADCOM_SAND256: {
+ uint32_t param = fourcc_mod_broadcom_param(fb->modifier);
+ u32 components_per_word;
+ u32 starting_offset;
+ u32 fetch_count;
+
+ if (param > SCALER_TILE_HEIGHT_MASK) {
+ DRM_DEBUG_KMS("SAND height too large (%d)\n",
+ param);
+ return -EINVAL;
+ }
+
+ if (fb->format->format == DRM_FORMAT_P030) {
+ hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT;
+ tiling = SCALER6_CTL0_ADDR_MODE_128B;
+ } else {
+ hvs_format = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE;
+
+ switch (base_format_mod) {
+ case DRM_FORMAT_MOD_BROADCOM_SAND128:
+ tiling = SCALER6_CTL0_ADDR_MODE_128B;
+ break;
+ case DRM_FORMAT_MOD_BROADCOM_SAND256:
+ tiling = SCALER6_CTL0_ADDR_MODE_256B;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ /* Adjust the base pointer to the first pixel to be scanned
+ * out.
+ *
+ * For P030, y_ptr [31:4] is the 128bit word for the start pixel
+ * y_ptr [3:0] is the pixel (0-11) contained within that 128bit
+ * word that should be taken as the first pixel.
+ * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the
+ * element within the 128bit word, eg for pixel 3 the value
+ * should be 6.
+ */
+ for (i = 0; i < num_planes; i++) {
+ u32 tile_w, tile, x_off, pix_per_tile;
+
+ if (fb->format->format == DRM_FORMAT_P030) {
+ /*
+ * Spec says: bits [31:4] of the given address
+ * should point to the 128-bit word containing
+ * the desired starting pixel, and bits[3:0]
+ * should be between 0 and 11, indicating which
+ * of the 12-pixels in that 128-bit word is the
+ * first pixel to be used
+ */
+ u32 remaining_pixels = src_x % 96;
+ u32 aligned = remaining_pixels / 12;
+ u32 last_bits = remaining_pixels % 12;
+
+ x_off = aligned * 16 + last_bits;
+ tile_w = 128;
+ pix_per_tile = 96;
+ } else {
+ switch (base_format_mod) {
+ case DRM_FORMAT_MOD_BROADCOM_SAND128:
+ tile_w = 128;
+ break;
+ case DRM_FORMAT_MOD_BROADCOM_SAND256:
+ tile_w = 256;
+ break;
+ default:
+ return -EINVAL;
+ }
+ pix_per_tile = tile_w / fb->format->cpp[0];
+ x_off = (src_x % pix_per_tile) /
+ (i ? h_subsample : 1) *
+ fb->format->cpp[i];
+ }
+
+ tile = src_x / pix_per_tile;
+
+ offsets[i] += param * tile_w * tile;
+ offsets[i] += src_y / (i ? v_subsample : 1) * tile_w;
+ offsets[i] += x_off & ~(i ? 1 : 0);
+ }
+
+ components_per_word = fb->format->format == DRM_FORMAT_P030 ? 24 : 32;
+ starting_offset = src_x % components_per_word;
+ fetch_count = (width + starting_offset + components_per_word - 1) /
+ components_per_word;
+
+ pitch0 = VC4_SET_FIELD(param, SCALER6_PTR2_PITCH) |
+ VC4_SET_FIELD(fetch_count - 1, SCALER6_PTR2_FETCH_COUNT);
+ break;
+ }
+
+ default:
+ DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
+ (long long)fb->modifier);
+ return -EINVAL;
+ }
+
+ /* fetch an extra pixel if we don't actually line up with the left edge. */
+ if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16))
+ width++;
+
+ /* same for the right side */
+ if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) &&
+ vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16))
+ width++;
+
+ /* now for the top */
+ if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16))
+ height++;
+
+ /* and the bottom */
+ if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) &&
+ vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16))
+ height++;
+
+ /* for YUV444 hardware wants double the width, otherwise it doesn't
+ * fetch full width of chroma
+ */
+ if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444)
+ width <<= 1;
+
+ /* Don't waste cycles mixing with plane alpha if the set alpha
+ * is opaque or there is no per-pixel alpha information.
+ * In any case we use the alpha property value as the fixed alpha.
+ */
+ mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
+ fb->format->has_alpha;
+
+ /* Control Word 0: Scaling Configuration & Element Validity*/
+ vc4_dlist_write(vc4_state,
+ SCALER6_CTL0_VALID |
+ VC4_SET_FIELD(tiling, SCALER6_CTL0_ADDR_MODE) |
+ VC4_SET_FIELD(0, SCALER6_CTL0_ALPHA_MASK) |
+ (vc4_state->is_unity ? SCALER6_CTL0_UNITY : 0) |
+ VC4_SET_FIELD(format->pixel_order_hvs5, SCALER6_CTL0_ORDERRGBA) |
+ VC4_SET_FIELD(scl1, SCALER6_CTL0_SCL1_MODE) |
+ VC4_SET_FIELD(scl0, SCALER6_CTL0_SCL0_MODE) |
+ VC4_SET_FIELD(hvs_format, SCALER6_CTL0_PIXEL_FORMAT));
+
+ /* Position Word 0: Image Position */
+ vc4_state->pos0_offset = vc4_state->dlist_count;
+ vc4_dlist_write(vc4_state,
+ VC4_SET_FIELD(vc4_state->crtc_y, SCALER6_POS0_START_Y) |
+ (rotation & DRM_MODE_REFLECT_X ? SCALER6_POS0_HFLIP : 0) |
+ VC4_SET_FIELD(vc4_state->crtc_x, SCALER6_POS0_START_X));
+
+ /* Control Word 2: Alpha Value & CSC */
+ vc4_dlist_write(vc4_state,
+ vc6_plane_get_csc_mode(vc4_state) |
+ vc4_hvs5_get_alpha_blend_mode(state) |
+ (mix_plane_alpha ? SCALER6_CTL2_ALPHA_MIX : 0) |
+ VC4_SET_FIELD(state->alpha >> 4, SCALER5_CTL2_ALPHA));
+
+ /* Position Word 1: Scaled Image Dimensions */
+ if (!vc4_state->is_unity)
+ vc4_dlist_write(vc4_state,
+ VC4_SET_FIELD(vc4_state->crtc_h - 1,
+ SCALER6_POS1_SCL_LINES) |
+ VC4_SET_FIELD(vc4_state->crtc_w - 1,
+ SCALER6_POS1_SCL_WIDTH));
+
+ /* Position Word 2: Source Image Size */
+ vc4_state->pos2_offset = vc4_state->dlist_count;
+ vc4_dlist_write(vc4_state,
+ VC4_SET_FIELD(height - 1,
+ SCALER6_POS2_SRC_LINES) |
+ VC4_SET_FIELD(width - 1,
+ SCALER6_POS2_SRC_WIDTH));
+
+ /* Position Word 3: Context */
+ vc4_dlist_write(vc4_state, 0xc0c0c0c0);
+
+ /*
+ * TODO: This only covers Raster Scan Order planes
+ */
+ for (i = 0; i < num_planes; i++) {
+ struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, i);
+ dma_addr_t paddr = bo->dma_addr + fb->offsets[i] + offsets[i];
+
+ /* Pointer Word 0 */
+ vc4_state->ptr0_offset[i] = vc4_state->dlist_count;
+ vc4_dlist_write(vc4_state,
+ (rotation & DRM_MODE_REFLECT_Y ? SCALER6_PTR0_VFLIP : 0) |
+ /*
+ * The UPM buffer will be allocated in
+ * vc6_plane_allocate_upm().
+ */
+ VC4_SET_FIELD(upper_32_bits(paddr) & 0xff,
+ SCALER6_PTR0_UPPER_ADDR));
+
+ /* Pointer Word 1 */
+ vc4_dlist_write(vc4_state, lower_32_bits(paddr));
+
+ /* Pointer Word 2 */
+ if (base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND128 &&
+ base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND256) {
+ vc4_dlist_write(vc4_state,
+ VC4_SET_FIELD(fb->pitches[i],
+ SCALER6_PTR2_PITCH));
+ } else {
+ vc4_dlist_write(vc4_state, pitch0);
+ }
+ }
+
+ /*
+ * Palette Word 0
+ * TODO: We're not using the palette mode
+ */
+
+ /*
+ * Trans Word 0
+ * TODO: It's only relevant if we set the trans_rgb bit in the
+ * control word 0, and we don't at the moment.
+ */
+
+ vc4_state->lbm_offset = 0;
+
+ if (!vc4_state->is_unity || fb->format->is_yuv) {
+ /*
+ * Reserve a slot for the LBM Base Address. The real value will
+ * be set when calling vc4_plane_allocate_lbm().
+ */
+ if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
+ vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
+ vc4_state->lbm_offset = vc4_state->dlist_count;
+ vc4_dlist_counter_increment(vc4_state);
+ }
+
+ if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
+ vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
+ vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
+ vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
+ if (num_planes > 1)
+ /*
+ * Emit Cb/Cr as channel 0 and Y as channel
+ * 1. This matches how we set up scl0/scl1
+ * above.
+ */
+ vc4_write_scaling_parameters(state, 1);
+
+ vc4_write_scaling_parameters(state, 0);
+ }
+
+ /*
+ * If any PPF setup was done, then all the kernel
+ * pointers get uploaded.
+ */
+ if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
+ vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
+ vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
+ vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
+ u32 kernel =
+ VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
+ SCALER_PPF_KERNEL_OFFSET);
+
+ /* HPPF plane 0 */
+ vc4_dlist_write(vc4_state, kernel);
+ /* VPPF plane 0 */
+ vc4_dlist_write(vc4_state, kernel);
+ /* HPPF plane 1 */
+ vc4_dlist_write(vc4_state, kernel);
+ /* VPPF plane 1 */
+ vc4_dlist_write(vc4_state, kernel);
+ }
+ }
+
+ vc4_dlist_write(vc4_state, SCALER6_CTL0_END);
+
+ vc4_state->dlist[0] |=
+ VC4_SET_FIELD(vc4_state->dlist_count, SCALER6_CTL0_NEXT);
+
+ /* crtc_* are already clipped coordinates. */
+ covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 &&
+ vc4_state->crtc_w == state->crtc->mode.hdisplay &&
+ vc4_state->crtc_h == state->crtc->mode.vdisplay;
+
+ /*
+ * Background fill might be necessary when the plane has per-pixel
+ * alpha content or a non-opaque plane alpha and could blend from the
+ * background or does not cover the entire screen.
+ */
+ vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
+ state->alpha != DRM_BLEND_ALPHA_OPAQUE;
+
+ /*
+ * Flag the dlist as initialized to avoid checking it twice in case
+ * the async update check already called vc4_plane_mode_set() and
+ * decided to fallback to sync update because async update was not
+ * possible.
+ */
+ vc4_state->dlist_initialized = 1;
+
+ vc4_plane_calc_load(state);
+
+ drm_dbg_driver(drm, "[PLANE:%d:%s] Computed DLIST size: %u\n",
+ plane->base.id, plane->name, vc4_state->dlist_count);
+
+ return 0;
+}
+
/* If a modeset involves changing the setup of a plane, the atomic
* infrastructure will call this to validate a proposed plane setup.
* However, if a plane isn't getting updated, this (and the
@@ -1373,6 +2058,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
static int vc4_plane_atomic_check(struct drm_plane *plane,
struct drm_atomic_state *state)
{
+ struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
plane);
struct vc4_plane_state *vc4_state = to_vc4_plane_state(new_plane_state);
@@ -1380,10 +2066,21 @@ static int vc4_plane_atomic_check(struct drm_plane *plane,
vc4_state->dlist_count = 0;
- if (!plane_enabled(new_plane_state))
+ if (!plane_enabled(new_plane_state)) {
+ struct drm_plane_state *old_plane_state =
+ drm_atomic_get_old_plane_state(state, plane);
+
+ if (vc4->gen >= VC4_GEN_6_C && old_plane_state &&
+ plane_enabled(old_plane_state)) {
+ vc6_plane_free_upm(new_plane_state);
+ }
return 0;
+ }
- ret = vc4_plane_mode_set(plane, new_plane_state);
+ if (vc4->gen >= VC4_GEN_6_C)
+ ret = vc6_plane_mode_set(plane, new_plane_state);
+ else
+ ret = vc4_plane_mode_set(plane, new_plane_state);
if (ret)
return ret;
@@ -1391,6 +2088,12 @@ static int vc4_plane_atomic_check(struct drm_plane *plane,
if (ret)
return ret;
+ if (vc4->gen >= VC4_GEN_6_C) {
+ ret = vc6_plane_allocate_upm(new_plane_state);
+ if (ret)
+ return ret;
+ }
+
return 0;
}
@@ -1439,7 +2142,8 @@ void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
{
struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, 0);
- uint32_t addr;
+ struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
+ dma_addr_t dma_addr = bo->dma_addr + fb->offsets[0];
int idx;
if (!drm_dev_enter(plane->dev, &idx))
@@ -1449,19 +2153,38 @@ void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
* because this is only called on the primary plane.
*/
WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0);
- addr = bo->dma_addr + fb->offsets[0];
- /* Write the new address into the hardware immediately. The
- * scanout will start from this address as soon as the FIFO
- * needs to refill with pixels.
- */
- writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]);
+ if (vc4->gen == VC4_GEN_6_C) {
+ u32 value;
- /* Also update the CPU-side dlist copy, so that any later
- * atomic updates that don't do a new modeset on our plane
- * also use our updated address.
- */
- vc4_state->dlist[vc4_state->ptr0_offset[0]] = addr;
+ value = vc4_state->dlist[vc4_state->ptr0_offset[0]] &
+ ~SCALER6_PTR0_UPPER_ADDR_MASK;
+ value |= VC4_SET_FIELD(upper_32_bits(dma_addr) & 0xff,
+ SCALER6_PTR0_UPPER_ADDR);
+
+ writel(value, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]);
+ vc4_state->dlist[vc4_state->ptr0_offset[0]] = value;
+
+ value = lower_32_bits(dma_addr);
+ writel(value, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0] + 1]);
+ vc4_state->dlist[vc4_state->ptr0_offset[0] + 1] = value;
+ } else {
+ u32 addr;
+
+ addr = (u32)dma_addr;
+
+ /* Write the new address into the hardware immediately. The
+ * scanout will start from this address as soon as the FIFO
+ * needs to refill with pixels.
+ */
+ writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]);
+
+ /* Also update the CPU-side dlist copy, so that any later
+ * atomic updates that don't do a new modeset on our plane
+ * also use our updated address.
+ */
+ vc4_state->dlist[vc4_state->ptr0_offset[0]] = addr;
+ }
drm_dev_exit(idx);
}
@@ -1543,13 +2266,17 @@ static void vc4_plane_atomic_async_update(struct drm_plane *plane,
static int vc4_plane_atomic_async_check(struct drm_plane *plane,
struct drm_atomic_state *state)
{
+ struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
plane);
struct vc4_plane_state *old_vc4_state, *new_vc4_state;
int ret;
u32 i;
- ret = vc4_plane_mode_set(plane, new_plane_state);
+ if (vc4->gen <= VC4_GEN_5)
+ ret = vc4_plane_mode_set(plane, new_plane_state);
+ else
+ ret = vc6_plane_mode_set(plane, new_plane_state);
if (ret)
return ret;
@@ -1723,7 +2450,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
};
for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
- if (!hvs_formats[i].hvs5_only || vc4->gen == VC4_GEN_5) {
+ if (!hvs_formats[i].hvs5_only || vc4->gen >= VC4_GEN_5) {
formats[num_formats] = hvs_formats[i].drm;
num_formats++;
}
@@ -1738,7 +2465,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
return ERR_CAST(vc4_plane);
plane = &vc4_plane->base;
- if (vc4->gen == VC4_GEN_5)
+ if (vc4->gen >= VC4_GEN_5)
drm_plane_helper_add(plane, &vc5_plane_helper_funcs);
else
drm_plane_helper_add(plane, &vc4_plane_helper_funcs);