From 784b1a8435da458f10f3837477ae0a18b579d278 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 23 Oct 2018 11:07:05 +0100 Subject: drm/i915/perf: remove redundant oa buffer initialization We initialize the OA buffer everytime we enable the OA unit (first call in gen[78]_oa_enable), so we don't need to initialize when preparing the metric set. Signed-off-by: Lionel Landwerlin Reviewed-by: Matthew Auld Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20181023100707.31738-3-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_perf.c') diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 664b96bb65a3..6a8c9ae9430e 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1528,8 +1528,6 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv) goto err_unpin; } - dev_priv->perf.oa.ops.init_oa_buffer(dev_priv); - DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n", i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma), dev_priv->perf.oa.oa_buffer.vaddr); @@ -1998,7 +1996,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return -EINVAL; } - if (!dev_priv->perf.oa.ops.init_oa_buffer) { + if (!dev_priv->perf.oa.ops.enable_metric_set) { DRM_DEBUG("OA unit not supported\n"); return -ENODEV; } @@ -3387,7 +3385,6 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.ops.is_valid_mux_reg = hsw_is_valid_mux_addr; dev_priv->perf.oa.ops.is_valid_flex_reg = NULL; - dev_priv->perf.oa.ops.init_oa_buffer = gen7_init_oa_buffer; dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set; dev_priv->perf.oa.ops.disable_metric_set = hsw_disable_metric_set; dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable; @@ -3406,7 +3403,6 @@ void i915_perf_init(struct drm_i915_private *dev_priv) */ dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats; - dev_priv->perf.oa.ops.init_oa_buffer = gen8_init_oa_buffer; dev_priv->perf.oa.ops.oa_enable = gen8_oa_enable; dev_priv->perf.oa.ops.oa_disable = gen8_oa_disable; dev_priv->perf.oa.ops.read = gen8_oa_read; -- cgit v1.2.3 From 5728de2f4f6abc37a2067f2a7edc41c8678574d8 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 23 Oct 2018 11:07:06 +0100 Subject: drm/i915/perf: pass stream to vfuncs when possible We want to use some of the properties of the perf stream to program the hardware in a later commit. v2: Pass only perf stream as argument (Matthew) Signed-off-by: Lionel Landwerlin Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20181023100707.31738-4-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 43 +++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_perf.c') diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 6a8c9ae9430e..4529edfdcfc8 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -890,8 +890,8 @@ static int gen8_oa_read(struct i915_perf_stream *stream, DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", dev_priv->perf.oa.period_exponent); - dev_priv->perf.oa.ops.oa_disable(dev_priv); - dev_priv->perf.oa.ops.oa_enable(dev_priv); + dev_priv->perf.oa.ops.oa_disable(stream); + dev_priv->perf.oa.ops.oa_enable(stream); /* * Note: .oa_enable() is expected to re-init the oabuffer and @@ -1114,8 +1114,8 @@ static int gen7_oa_read(struct i915_perf_stream *stream, DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", dev_priv->perf.oa.period_exponent); - dev_priv->perf.oa.ops.oa_disable(dev_priv); - dev_priv->perf.oa.ops.oa_enable(dev_priv); + dev_priv->perf.oa.ops.oa_disable(stream); + dev_priv->perf.oa.ops.oa_enable(stream); oastatus1 = I915_READ(GEN7_OASTATUS1); } @@ -1561,9 +1561,11 @@ static void config_oa_regs(struct drm_i915_private *dev_priv, } } -static int hsw_enable_metric_set(struct drm_i915_private *dev_priv, - const struct i915_oa_config *oa_config) +static int hsw_enable_metric_set(struct i915_perf_stream *stream) { + struct drm_i915_private *dev_priv = stream->dev_priv; + const struct i915_oa_config *oa_config = stream->oa_config; + /* PRM: * * OA unit is using “crclk” for its functionality. When trunk @@ -1765,9 +1767,10 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, return 0; } -static int gen8_enable_metric_set(struct drm_i915_private *dev_priv, - const struct i915_oa_config *oa_config) +static int gen8_enable_metric_set(struct i915_perf_stream *stream) { + struct drm_i915_private *dev_priv = stream->dev_priv; + const struct i915_oa_config *oa_config = stream->oa_config; int ret; /* @@ -1835,10 +1838,10 @@ static void gen10_disable_metric_set(struct drm_i915_private *dev_priv) I915_READ(RPM_CONFIG1) & ~GEN10_GT_NOA_ENABLE); } -static void gen7_oa_enable(struct drm_i915_private *dev_priv) +static void gen7_oa_enable(struct i915_perf_stream *stream) { - struct i915_gem_context *ctx = - dev_priv->perf.oa.exclusive_stream->ctx; + struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_gem_context *ctx = stream->ctx; u32 ctx_id = dev_priv->perf.oa.specific_ctx_id; bool periodic = dev_priv->perf.oa.periodic; u32 period_exponent = dev_priv->perf.oa.period_exponent; @@ -1865,8 +1868,9 @@ static void gen7_oa_enable(struct drm_i915_private *dev_priv) GEN7_OACONTROL_ENABLE); } -static void gen8_oa_enable(struct drm_i915_private *dev_priv) +static void gen8_oa_enable(struct i915_perf_stream *stream) { + struct drm_i915_private *dev_priv = stream->dev_priv; u32 report_format = dev_priv->perf.oa.oa_buffer.format; /* @@ -1903,7 +1907,7 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - dev_priv->perf.oa.ops.oa_enable(dev_priv); + dev_priv->perf.oa.ops.oa_enable(stream); if (dev_priv->perf.oa.periodic) hrtimer_start(&dev_priv->perf.oa.poll_check_timer, @@ -1911,8 +1915,10 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream) HRTIMER_MODE_REL_PINNED); } -static void gen7_oa_disable(struct drm_i915_private *dev_priv) +static void gen7_oa_disable(struct i915_perf_stream *stream) { + struct drm_i915_private *dev_priv = stream->dev_priv; + I915_WRITE(GEN7_OACONTROL, 0); if (intel_wait_for_register(dev_priv, GEN7_OACONTROL, GEN7_OACONTROL_ENABLE, 0, @@ -1920,8 +1926,10 @@ static void gen7_oa_disable(struct drm_i915_private *dev_priv) DRM_ERROR("wait for OA to be disabled timed out\n"); } -static void gen8_oa_disable(struct drm_i915_private *dev_priv) +static void gen8_oa_disable(struct i915_perf_stream *stream) { + struct drm_i915_private *dev_priv = stream->dev_priv; + I915_WRITE(GEN8_OACONTROL, 0); if (intel_wait_for_register(dev_priv, GEN8_OACONTROL, GEN8_OA_COUNTER_ENABLE, 0, @@ -1941,7 +1949,7 @@ static void i915_oa_stream_disable(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - dev_priv->perf.oa.ops.oa_disable(dev_priv); + dev_priv->perf.oa.ops.oa_disable(stream); if (dev_priv->perf.oa.periodic) hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer); @@ -2090,8 +2098,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, if (ret) goto err_lock; - ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv, - stream->oa_config); + ret = dev_priv->perf.oa.ops.enable_metric_set(stream); if (ret) { DRM_DEBUG("Unable to enable metric set\n"); goto err_enable; -- cgit v1.2.3 From cd956bfcd0f58d20485ac0a785415f7d9327a95f Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 23 Oct 2018 11:07:07 +0100 Subject: drm/i915/perf: add a parameter to control the size of OA buffer The way our hardware is designed doesn't seem to let us use the MI_RECORD_PERF_COUNT command without setting up a circular buffer. In the case where the user didn't request OA reports to be available through the i915 perf stream, we can set the OA buffer to the minimum size to avoid consuming memory which won't be used by the driver. v2: Simplify oa buffer size exponent selection (Chris) Reuse vma size field (Lionel) v3: Restrict size opening parameter to values supported by HW (Chris) v4: Drop out of date comment (Matt) Add debug message when buffer size is rejected (Matt) Signed-off-by: Lionel Landwerlin Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20181023100707.31738-5-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 99 ++++++++++++++++++++++++++-------------- 1 file changed, 66 insertions(+), 33 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_perf.c') diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 4529edfdcfc8..1712b68de8f5 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -212,13 +212,7 @@ #include "i915_oa_icl.h" #include "intel_lrc_reg.h" -/* HW requires this to be a power of two, between 128k and 16M, though driver - * is currently generally designed assuming the largest 16M size is used such - * that the overflow cases are unlikely in normal operation. - */ -#define OA_BUFFER_SIZE SZ_16M - -#define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1)) +#define OA_TAKEN(tail, head) (((tail) - (head)) & (dev_priv->perf.oa.oa_buffer.vma->size - 1)) /** * DOC: OA Tail Pointer Race @@ -361,6 +355,7 @@ struct perf_open_properties { int oa_format; bool oa_periodic; int oa_period_exponent; + u32 oa_buffer_size_exponent; }; static void free_oa_config(struct drm_i915_private *dev_priv, @@ -523,7 +518,7 @@ static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv) * could put the tail out of bounds... */ if (hw_tail >= gtt_offset && - hw_tail < (gtt_offset + OA_BUFFER_SIZE)) { + hw_tail < (gtt_offset + dev_priv->perf.oa.oa_buffer.vma->size)) { dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = aging_tail = hw_tail; dev_priv->perf.oa.oa_buffer.aging_timestamp = now; @@ -652,7 +647,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, int report_size = dev_priv->perf.oa.oa_buffer.format_size; u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr; u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); - u32 mask = (OA_BUFFER_SIZE - 1); + u32 mask = (dev_priv->perf.oa.oa_buffer.vma->size - 1); size_t start_offset = *offset; unsigned long flags; unsigned int aged_tail_idx; @@ -692,8 +687,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * only be incremented by multiples of the report size (notably also * all a power of two). */ - if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size || - tail > OA_BUFFER_SIZE || tail % report_size, + if (WARN_ONCE(head > dev_priv->perf.oa.oa_buffer.vma->size || head % report_size || + tail > dev_priv->perf.oa.oa_buffer.vma->size || tail % report_size, "Inconsistent OA buffer pointers: head = %u, tail = %u\n", head, tail)) return -EIO; @@ -716,7 +711,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * here would imply a driver bug that would result * in an overrun. */ - if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) { + if (WARN_ON((dev_priv->perf.oa.oa_buffer.vma->size - head) < report_size)) { DRM_ERROR("Spurious OA head ptr: non-integral report offset\n"); break; } @@ -875,11 +870,6 @@ static int gen8_oa_read(struct i915_perf_stream *stream, * automatically triggered reports in this condition and so we * have to assume that old reports are now being trampled * over. - * - * Considering how we don't currently give userspace control - * over the OA buffer size and always configure a large 16MB - * buffer, then a buffer overflow does anyway likely indicate - * that something has gone quite badly wrong. */ if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) { ret = append_oa_status(stream, buf, count, offset, @@ -941,7 +931,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, int report_size = dev_priv->perf.oa.oa_buffer.format_size; u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr; u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); - u32 mask = (OA_BUFFER_SIZE - 1); + u32 mask = (dev_priv->perf.oa.oa_buffer.vma->size - 1); size_t start_offset = *offset; unsigned long flags; unsigned int aged_tail_idx; @@ -978,8 +968,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, * only be incremented by multiples of the report size (notably also * all a power of two). */ - if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size || - tail > OA_BUFFER_SIZE || tail % report_size, + if (WARN_ONCE(head > dev_priv->perf.oa.oa_buffer.vma->size || head % report_size || + tail > dev_priv->perf.oa.oa_buffer.vma->size || tail % report_size, "Inconsistent OA buffer pointers: head = %u, tail = %u\n", head, tail)) return -EIO; @@ -999,7 +989,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, * here would imply a driver bug that would result * in an overrun. */ - if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) { + if (WARN_ON((dev_priv->perf.oa.oa_buffer.vma->size - head) < report_size)) { DRM_ERROR("Spurious OA head ptr: non-integral report offset\n"); break; } @@ -1394,7 +1384,9 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) I915_WRITE(GEN7_OABUFFER, gtt_offset); - I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */ + I915_WRITE(GEN7_OASTATUS1, gtt_offset | + ((dev_priv->perf.oa.oa_buffer.size_exponent - 17) << + GEN7_OASTATUS1_BUFFER_SIZE_SHIFT)); /* tail */ /* Mark that we need updated tail pointers to read from... */ dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR; @@ -1419,7 +1411,8 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) * the assumption that new reports are being written to zeroed * memory... */ - memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE); + memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, + dev_priv->perf.oa.oa_buffer.vma->size); /* Maybe make ->pollin per-stream state if we support multiple * concurrent streams in the future. @@ -1449,7 +1442,9 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) * bit." */ I915_WRITE(GEN8_OABUFFER, gtt_offset | - OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT); + ((dev_priv->perf.oa.oa_buffer.size_exponent - 17) << + GEN8_OABUFFER_BUFFER_SIZE_SHIFT) | + GEN8_OABUFFER_MEM_SELECT_GGTT); I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); /* Mark that we need updated tail pointers to read from... */ @@ -1477,7 +1472,8 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) * the assumption that new reports are being written to zeroed * memory... */ - memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE); + memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, + dev_priv->perf.oa.oa_buffer.vma->size); /* * Maybe make ->pollin per-stream state if we support multiple @@ -1486,23 +1482,24 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) dev_priv->perf.oa.pollin = false; } -static int alloc_oa_buffer(struct drm_i915_private *dev_priv) +static int alloc_oa_buffer(struct drm_i915_private *dev_priv, int size_exponent) { struct drm_i915_gem_object *bo; struct i915_vma *vma; + size_t size = 1U << size_exponent; int ret; if (WARN_ON(dev_priv->perf.oa.oa_buffer.vma)) return -ENODEV; + if (WARN_ON(size < SZ_128K || size > SZ_16M)) + return -EINVAL; + ret = i915_mutex_lock_interruptible(&dev_priv->drm); if (ret) return ret; - BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE); - BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M); - - bo = i915_gem_object_create(dev_priv, OA_BUFFER_SIZE); + bo = i915_gem_object_create(dev_priv, size); if (IS_ERR(bo)) { DRM_ERROR("Failed to allocate OA buffer\n"); ret = PTR_ERR(bo); @@ -1520,6 +1517,7 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv) goto err_unref; } dev_priv->perf.oa.oa_buffer.vma = vma; + dev_priv->perf.oa.oa_buffer.size_exponent = size_exponent; dev_priv->perf.oa.oa_buffer.vaddr = i915_gem_object_pin_map(bo, I915_MAP_WB); @@ -1528,9 +1526,10 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv) goto err_unpin; } - DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n", + DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p, size = %llu\n", i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma), - dev_priv->perf.oa.oa_buffer.vaddr); + dev_priv->perf.oa.oa_buffer.vaddr, + dev_priv->perf.oa.oa_buffer.vma->size); goto unlock; @@ -2090,7 +2089,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, intel_runtime_pm_get(dev_priv); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - ret = alloc_oa_buffer(dev_priv); + ret = alloc_oa_buffer(dev_priv, props->oa_buffer_size_exponent); if (ret) goto err_oa_buf_alloc; @@ -2649,6 +2648,26 @@ static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) 1000ULL * INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz); } +static int +select_oa_buffer_exponent(struct drm_i915_private *i915, + u64 requested_size) +{ + int order; + + /* + * When no size is specified, use the largest size supported by all + * generations. + */ + if (!requested_size) + return order_base_2(SZ_16M); + + order = order_base_2(clamp_t(u64, requested_size, SZ_128K, SZ_16M)); + if (requested_size != (1UL << order)) + return -EINVAL; + + return order; +} + /** * read_properties_unlocked - validate + copy userspace stream open properties * @dev_priv: i915 device instance @@ -2776,6 +2795,14 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, props->oa_periodic = true; props->oa_period_exponent = value; break; + case DRM_I915_PERF_PROP_OA_BUFFER_SIZE: + ret = select_oa_buffer_exponent(dev_priv, value); + if (ret < 0) { + DRM_DEBUG("OA buffer size invalid %llu\n", value); + return ret; + } + props->oa_buffer_size_exponent = ret; + break; case DRM_I915_PERF_PROP_MAX: MISSING_CASE(id); return -EINVAL; @@ -2784,6 +2811,12 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, uprop += 2; } + /* If no buffer size was requested, select the default one. */ + if (!props->oa_buffer_size_exponent) { + props->oa_buffer_size_exponent = + select_oa_buffer_exponent(dev_priv, 0); + } + return 0; } -- cgit v1.2.3 From 9fa6e2f7609fdbb7d6f86be86371a5719bec0376 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 24 Oct 2018 11:51:58 +0100 Subject: drm/i915/perf: Fix warning in documentation Forgot to add the description of this option in a previous commit. Signed-off-by: Lionel Landwerlin Fixes: cd956bfcd0f58d ("drm/i915/perf: add a parameter to control the size of OA buffer") Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20181024105158.4732-1-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/i915/i915_perf.c') diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 1712b68de8f5..2c2b63be7a6c 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -339,6 +339,7 @@ static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = { * @oa_format: An OA unit HW report format * @oa_periodic: Whether to enable periodic OA unit sampling * @oa_period_exponent: The OA unit sampling period is derived from this + * @oa_buffer_size_exponent: The OA buffer size is derived from this * * As read_properties_unlocked() enumerates and validates the properties given * to open a stream of metrics the configuration is built up in the structure -- cgit v1.2.3 From fe841686470d383e33b606d0704ef4295141c582 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Fri, 16 Nov 2018 15:55:09 +0200 Subject: Revert "drm/i915/perf: add a parameter to control the size of OA buffer" Userspace portion is still missing. This reverts commit cd956bfcd0f58d20485ac0a785415f7d9327a95f. Cc: Lionel Landwerlin Cc: Matthew Auld Signed-off-by: Joonas Lahtinen Acked-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20181116135510.13807-1-joonas.lahtinen@linux.intel.com --- drivers/gpu/drm/i915/i915_perf.c | 99 ++++++++++++++-------------------------- 1 file changed, 33 insertions(+), 66 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_perf.c') diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 2c2b63be7a6c..c762418d3b01 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -212,7 +212,13 @@ #include "i915_oa_icl.h" #include "intel_lrc_reg.h" -#define OA_TAKEN(tail, head) (((tail) - (head)) & (dev_priv->perf.oa.oa_buffer.vma->size - 1)) +/* HW requires this to be a power of two, between 128k and 16M, though driver + * is currently generally designed assuming the largest 16M size is used such + * that the overflow cases are unlikely in normal operation. + */ +#define OA_BUFFER_SIZE SZ_16M + +#define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1)) /** * DOC: OA Tail Pointer Race @@ -356,7 +362,6 @@ struct perf_open_properties { int oa_format; bool oa_periodic; int oa_period_exponent; - u32 oa_buffer_size_exponent; }; static void free_oa_config(struct drm_i915_private *dev_priv, @@ -519,7 +524,7 @@ static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv) * could put the tail out of bounds... */ if (hw_tail >= gtt_offset && - hw_tail < (gtt_offset + dev_priv->perf.oa.oa_buffer.vma->size)) { + hw_tail < (gtt_offset + OA_BUFFER_SIZE)) { dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = aging_tail = hw_tail; dev_priv->perf.oa.oa_buffer.aging_timestamp = now; @@ -648,7 +653,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, int report_size = dev_priv->perf.oa.oa_buffer.format_size; u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr; u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); - u32 mask = (dev_priv->perf.oa.oa_buffer.vma->size - 1); + u32 mask = (OA_BUFFER_SIZE - 1); size_t start_offset = *offset; unsigned long flags; unsigned int aged_tail_idx; @@ -688,8 +693,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * only be incremented by multiples of the report size (notably also * all a power of two). */ - if (WARN_ONCE(head > dev_priv->perf.oa.oa_buffer.vma->size || head % report_size || - tail > dev_priv->perf.oa.oa_buffer.vma->size || tail % report_size, + if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size || + tail > OA_BUFFER_SIZE || tail % report_size, "Inconsistent OA buffer pointers: head = %u, tail = %u\n", head, tail)) return -EIO; @@ -712,7 +717,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * here would imply a driver bug that would result * in an overrun. */ - if (WARN_ON((dev_priv->perf.oa.oa_buffer.vma->size - head) < report_size)) { + if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) { DRM_ERROR("Spurious OA head ptr: non-integral report offset\n"); break; } @@ -871,6 +876,11 @@ static int gen8_oa_read(struct i915_perf_stream *stream, * automatically triggered reports in this condition and so we * have to assume that old reports are now being trampled * over. + * + * Considering how we don't currently give userspace control + * over the OA buffer size and always configure a large 16MB + * buffer, then a buffer overflow does anyway likely indicate + * that something has gone quite badly wrong. */ if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) { ret = append_oa_status(stream, buf, count, offset, @@ -932,7 +942,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, int report_size = dev_priv->perf.oa.oa_buffer.format_size; u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr; u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); - u32 mask = (dev_priv->perf.oa.oa_buffer.vma->size - 1); + u32 mask = (OA_BUFFER_SIZE - 1); size_t start_offset = *offset; unsigned long flags; unsigned int aged_tail_idx; @@ -969,8 +979,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, * only be incremented by multiples of the report size (notably also * all a power of two). */ - if (WARN_ONCE(head > dev_priv->perf.oa.oa_buffer.vma->size || head % report_size || - tail > dev_priv->perf.oa.oa_buffer.vma->size || tail % report_size, + if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size || + tail > OA_BUFFER_SIZE || tail % report_size, "Inconsistent OA buffer pointers: head = %u, tail = %u\n", head, tail)) return -EIO; @@ -990,7 +1000,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, * here would imply a driver bug that would result * in an overrun. */ - if (WARN_ON((dev_priv->perf.oa.oa_buffer.vma->size - head) < report_size)) { + if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) { DRM_ERROR("Spurious OA head ptr: non-integral report offset\n"); break; } @@ -1385,9 +1395,7 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) I915_WRITE(GEN7_OABUFFER, gtt_offset); - I915_WRITE(GEN7_OASTATUS1, gtt_offset | - ((dev_priv->perf.oa.oa_buffer.size_exponent - 17) << - GEN7_OASTATUS1_BUFFER_SIZE_SHIFT)); /* tail */ + I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */ /* Mark that we need updated tail pointers to read from... */ dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR; @@ -1412,8 +1420,7 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) * the assumption that new reports are being written to zeroed * memory... */ - memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, - dev_priv->perf.oa.oa_buffer.vma->size); + memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE); /* Maybe make ->pollin per-stream state if we support multiple * concurrent streams in the future. @@ -1443,9 +1450,7 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) * bit." */ I915_WRITE(GEN8_OABUFFER, gtt_offset | - ((dev_priv->perf.oa.oa_buffer.size_exponent - 17) << - GEN8_OABUFFER_BUFFER_SIZE_SHIFT) | - GEN8_OABUFFER_MEM_SELECT_GGTT); + OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT); I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); /* Mark that we need updated tail pointers to read from... */ @@ -1473,8 +1478,7 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) * the assumption that new reports are being written to zeroed * memory... */ - memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, - dev_priv->perf.oa.oa_buffer.vma->size); + memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE); /* * Maybe make ->pollin per-stream state if we support multiple @@ -1483,24 +1487,23 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) dev_priv->perf.oa.pollin = false; } -static int alloc_oa_buffer(struct drm_i915_private *dev_priv, int size_exponent) +static int alloc_oa_buffer(struct drm_i915_private *dev_priv) { struct drm_i915_gem_object *bo; struct i915_vma *vma; - size_t size = 1U << size_exponent; int ret; if (WARN_ON(dev_priv->perf.oa.oa_buffer.vma)) return -ENODEV; - if (WARN_ON(size < SZ_128K || size > SZ_16M)) - return -EINVAL; - ret = i915_mutex_lock_interruptible(&dev_priv->drm); if (ret) return ret; - bo = i915_gem_object_create(dev_priv, size); + BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE); + BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M); + + bo = i915_gem_object_create(dev_priv, OA_BUFFER_SIZE); if (IS_ERR(bo)) { DRM_ERROR("Failed to allocate OA buffer\n"); ret = PTR_ERR(bo); @@ -1518,7 +1521,6 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv, int size_exponent) goto err_unref; } dev_priv->perf.oa.oa_buffer.vma = vma; - dev_priv->perf.oa.oa_buffer.size_exponent = size_exponent; dev_priv->perf.oa.oa_buffer.vaddr = i915_gem_object_pin_map(bo, I915_MAP_WB); @@ -1527,10 +1529,9 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv, int size_exponent) goto err_unpin; } - DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p, size = %llu\n", + DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n", i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma), - dev_priv->perf.oa.oa_buffer.vaddr, - dev_priv->perf.oa.oa_buffer.vma->size); + dev_priv->perf.oa.oa_buffer.vaddr); goto unlock; @@ -2090,7 +2091,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, intel_runtime_pm_get(dev_priv); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - ret = alloc_oa_buffer(dev_priv, props->oa_buffer_size_exponent); + ret = alloc_oa_buffer(dev_priv); if (ret) goto err_oa_buf_alloc; @@ -2649,26 +2650,6 @@ static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) 1000ULL * INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz); } -static int -select_oa_buffer_exponent(struct drm_i915_private *i915, - u64 requested_size) -{ - int order; - - /* - * When no size is specified, use the largest size supported by all - * generations. - */ - if (!requested_size) - return order_base_2(SZ_16M); - - order = order_base_2(clamp_t(u64, requested_size, SZ_128K, SZ_16M)); - if (requested_size != (1UL << order)) - return -EINVAL; - - return order; -} - /** * read_properties_unlocked - validate + copy userspace stream open properties * @dev_priv: i915 device instance @@ -2796,14 +2777,6 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, props->oa_periodic = true; props->oa_period_exponent = value; break; - case DRM_I915_PERF_PROP_OA_BUFFER_SIZE: - ret = select_oa_buffer_exponent(dev_priv, value); - if (ret < 0) { - DRM_DEBUG("OA buffer size invalid %llu\n", value); - return ret; - } - props->oa_buffer_size_exponent = ret; - break; case DRM_I915_PERF_PROP_MAX: MISSING_CASE(id); return -EINVAL; @@ -2812,12 +2785,6 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, uprop += 2; } - /* If no buffer size was requested, select the default one. */ - if (!props->oa_buffer_size_exponent) { - props->oa_buffer_size_exponent = - select_oa_buffer_exponent(dev_priv, 0); - } - return 0; } -- cgit v1.2.3 From 6b671c27ff57cb420fb0066563ce6d696ec3abc5 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Fri, 16 Nov 2018 15:55:10 +0200 Subject: Revert "drm/i915/perf: Fix warning in documentation" Userspace portion is still missing. This reverts commit 9fa6e2f7609fdbb7d6f86be86371a5719bec0376. Cc: Lionel Landwerlin Cc: Matthew Auld Signed-off-by: Joonas Lahtinen Acked-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20181116135510.13807-2-joonas.lahtinen@linux.intel.com --- drivers/gpu/drm/i915/i915_perf.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_perf.c') diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index c762418d3b01..4529edfdcfc8 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -345,7 +345,6 @@ static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = { * @oa_format: An OA unit HW report format * @oa_periodic: Whether to enable periodic OA unit sampling * @oa_period_exponent: The OA unit sampling period is derived from this - * @oa_buffer_size_exponent: The OA buffer size is derived from this * * As read_properties_unlocked() enumerates and validates the properties given * to open a stream of metrics the configuration is built up in the structure -- cgit v1.2.3