diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_perf.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_perf.c | 809 |
1 files changed, 584 insertions, 225 deletions
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index f33d90226704..94185d610673 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -193,6 +193,7 @@ #include <linux/anon_inodes.h> #include <linux/sizes.h> +#include <linux/uuid.h> #include "i915_drv.h" #include "i915_oa_hsw.h" @@ -357,6 +358,54 @@ struct perf_open_properties { int oa_period_exponent; }; +static void free_oa_config(struct drm_i915_private *dev_priv, + struct i915_oa_config *oa_config) +{ + if (!PTR_ERR(oa_config->flex_regs)) + kfree(oa_config->flex_regs); + if (!PTR_ERR(oa_config->b_counter_regs)) + kfree(oa_config->b_counter_regs); + if (!PTR_ERR(oa_config->mux_regs)) + kfree(oa_config->mux_regs); + kfree(oa_config); +} + +static void put_oa_config(struct drm_i915_private *dev_priv, + struct i915_oa_config *oa_config) +{ + if (!atomic_dec_and_test(&oa_config->ref_count)) + return; + + free_oa_config(dev_priv, oa_config); +} + +static int get_oa_config(struct drm_i915_private *dev_priv, + int metrics_set, + struct i915_oa_config **out_config) +{ + int ret; + + if (metrics_set == 1) { + *out_config = &dev_priv->perf.oa.test_config; + atomic_inc(&dev_priv->perf.oa.test_config.ref_count); + return 0; + } + + ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); + if (ret) + return ret; + + *out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set); + if (!*out_config) + ret = -EINVAL; + else + atomic_inc(&(*out_config)->ref_count); + + mutex_unlock(&dev_priv->perf.metrics_lock); + + return ret; +} + static u32 gen8_oa_hw_tail_read(struct drm_i915_private *dev_priv) { return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK; @@ -1246,10 +1295,12 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) BUG_ON(stream != dev_priv->perf.oa.exclusive_stream); /* - * Unset exclusive_stream first, it might be checked while - * disabling the metric set on gen8+. + * Unset exclusive_stream first, it will be checked while disabling + * the metric set on gen8+. */ + mutex_lock(&dev_priv->drm.struct_mutex); dev_priv->perf.oa.exclusive_stream = NULL; + mutex_unlock(&dev_priv->drm.struct_mutex); dev_priv->perf.oa.ops.disable_metric_set(dev_priv); @@ -1261,6 +1312,8 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) if (stream->ctx) oa_put_render_ctx_id(stream); + put_oa_config(dev_priv, stream->oa_config); + if (dev_priv->perf.oa.spurious_report_rs.missed) { DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n", dev_priv->perf.oa.spurious_report_rs.missed); @@ -1440,9 +1493,9 @@ unlock: static void config_oa_regs(struct drm_i915_private *dev_priv, const struct i915_oa_reg *regs, - int n_regs) + u32 n_regs) { - int i; + u32 i; for (i = 0; i < n_regs; i++) { const struct i915_oa_reg *reg = regs + i; @@ -1451,17 +1504,9 @@ static void config_oa_regs(struct drm_i915_private *dev_priv, } } -static int hsw_enable_metric_set(struct drm_i915_private *dev_priv) +static int hsw_enable_metric_set(struct drm_i915_private *dev_priv, + const struct i915_oa_config *oa_config) { - int ret = i915_oa_select_metric_set_hsw(dev_priv); - int i; - - if (ret) - return ret; - - I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) | - GT_NOA_ENABLE)); - /* PRM: * * OA unit is using “crclk” for its functionality. When trunk @@ -1476,10 +1521,7 @@ static int hsw_enable_metric_set(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) | GEN6_CSUNIT_CLOCK_GATE_DISABLE)); - for (i = 0; i < dev_priv->perf.oa.n_mux_configs; i++) { - config_oa_regs(dev_priv, dev_priv->perf.oa.mux_regs[i], - dev_priv->perf.oa.mux_regs_lens[i]); - } + config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); /* It apparently takes a fairly long time for a new MUX * configuration to be be applied after these register writes. @@ -1504,8 +1546,8 @@ static int hsw_enable_metric_set(struct drm_i915_private *dev_priv) */ usleep_range(15000, 20000); - config_oa_regs(dev_priv, dev_priv->perf.oa.b_counter_regs, - dev_priv->perf.oa.b_counter_regs_len); + config_oa_regs(dev_priv, oa_config->b_counter_regs, + oa_config->b_counter_regs_len); return 0; } @@ -1529,11 +1571,10 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv) * in the case that the OA unit has been disabled. */ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, - u32 *reg_state) + u32 *reg_state, + const struct i915_oa_config *oa_config) { struct drm_i915_private *dev_priv = ctx->i915; - const struct i915_oa_reg *flex_regs = dev_priv->perf.oa.flex_regs; - int n_flex_regs = dev_priv->perf.oa.flex_regs_len; u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset; u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset; /* The MMIO offsets for Flex EU registers aren't contiguous */ @@ -1565,12 +1606,15 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, * will be an explicit 'No Event' we can select, but not yet... */ u32 value = 0; - int j; - for (j = 0; j < n_flex_regs; j++) { - if (i915_mmio_reg_offset(flex_regs[j].addr) == mmio) { - value = flex_regs[j].value; - break; + if (oa_config) { + u32 j; + + for (j = 0; j < oa_config->flex_regs_len; j++) { + if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) { + value = oa_config->flex_regs[j].value; + break; + } } } @@ -1583,11 +1627,10 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, * Same as gen8_update_reg_state_unlocked only through the batchbuffer. This * is only used by the kernel context. */ -static int gen8_emit_oa_config(struct drm_i915_gem_request *req) +static int gen8_emit_oa_config(struct drm_i915_gem_request *req, + const struct i915_oa_config *oa_config) { struct drm_i915_private *dev_priv = req->i915; - const struct i915_oa_reg *flex_regs = dev_priv->perf.oa.flex_regs; - int n_flex_regs = dev_priv->perf.oa.flex_regs_len; /* The MMIO offsets for Flex EU registers aren't contiguous */ u32 flex_mmio[] = { i915_mmio_reg_offset(EU_PERF_CNTL0), @@ -1622,12 +1665,15 @@ static int gen8_emit_oa_config(struct drm_i915_gem_request *req) * yet... */ u32 value = 0; - int j; - for (j = 0; j < n_flex_regs; j++) { - if (i915_mmio_reg_offset(flex_regs[j].addr) == mmio) { - value = flex_regs[j].value; - break; + if (oa_config) { + u32 j; + + for (j = 0; j < oa_config->flex_regs_len; j++) { + if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) { + value = oa_config->flex_regs[j].value; + break; + } } } @@ -1641,7 +1687,8 @@ static int gen8_emit_oa_config(struct drm_i915_gem_request *req) return 0; } -static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_priv) +static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_priv, + const struct i915_oa_config *oa_config) { struct intel_engine_cs *engine = dev_priv->engine[RCS]; struct i915_gem_timeline *timeline; @@ -1656,7 +1703,7 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr if (IS_ERR(req)) return PTR_ERR(req); - ret = gen8_emit_oa_config(req); + ret = gen8_emit_oa_config(req, oa_config); if (ret) { i915_add_request(req); return ret; @@ -1707,6 +1754,7 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr * Note: it's only the RCS/Render context that has any OA state. */ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, + const struct i915_oa_config *oa_config, bool interruptible) { struct i915_gem_context *ctx; @@ -1724,7 +1772,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, } /* Switch away from any user context. */ - ret = gen8_switch_to_updated_kernel_context(dev_priv); + ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config); if (ret) goto out; @@ -1746,7 +1794,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, goto out; /* Update all contexts now that we've stalled the submission. */ - list_for_each_entry(ctx, &dev_priv->context_list, link) { + list_for_each_entry(ctx, &dev_priv->contexts.list, link) { struct intel_context *ce = &ctx->engine[RCS]; u32 *regs; @@ -1763,7 +1811,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, ce->state->obj->mm.dirty = true; regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); - gen8_update_reg_state_unlocked(ctx, regs); + gen8_update_reg_state_unlocked(ctx, regs, oa_config); i915_gem_object_unpin_map(ce->state->obj); } @@ -1774,13 +1822,10 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, return ret; } -static int gen8_enable_metric_set(struct drm_i915_private *dev_priv) +static int gen8_enable_metric_set(struct drm_i915_private *dev_priv, + const struct i915_oa_config *oa_config) { - int ret = dev_priv->perf.oa.ops.select_metric_set(dev_priv); - int i; - - if (ret) - return ret; + int ret; /* * We disable slice/unslice clock ratio change reports on SKL since @@ -1817,19 +1862,14 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv) * to make sure all slices/subslices are ON before writing to NOA * registers. */ - ret = gen8_configure_all_contexts(dev_priv, true); + ret = gen8_configure_all_contexts(dev_priv, oa_config, true); if (ret) return ret; - I915_WRITE(GDT_CHICKEN_BITS, 0xA0); - for (i = 0; i < dev_priv->perf.oa.n_mux_configs; i++) { - config_oa_regs(dev_priv, dev_priv->perf.oa.mux_regs[i], - dev_priv->perf.oa.mux_regs_lens[i]); - } - I915_WRITE(GDT_CHICKEN_BITS, 0x80); + config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); - config_oa_regs(dev_priv, dev_priv->perf.oa.b_counter_regs, - dev_priv->perf.oa.b_counter_regs_len); + config_oa_regs(dev_priv, oa_config->b_counter_regs, + oa_config->b_counter_regs_len); return 0; } @@ -1837,7 +1877,11 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv) static void gen8_disable_metric_set(struct drm_i915_private *dev_priv) { /* Reset all contexts' slices/subslices configurations. */ - gen8_configure_all_contexts(dev_priv, false); + gen8_configure_all_contexts(dev_priv, NULL, false); + + I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & + ~GT_NOA_ENABLE)); + } static void gen7_oa_enable(struct drm_i915_private *dev_priv) @@ -2011,11 +2055,6 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return -EBUSY; } - if (!props->metrics_set) { - DRM_DEBUG("OA metric set not specified\n"); - return -EINVAL; - } - if (!props->oa_format) { DRM_DEBUG("OA report format not specified\n"); return -EINVAL; @@ -2055,8 +2094,6 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, dev_priv->perf.oa.oa_buffer.format = dev_priv->perf.oa.oa_formats[props->oa_format].format; - dev_priv->perf.oa.metrics_set = props->metrics_set; - dev_priv->perf.oa.periodic = props->oa_periodic; if (dev_priv->perf.oa.periodic) dev_priv->perf.oa.period_exponent = props->oa_period_exponent; @@ -2067,6 +2104,10 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return ret; } + ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config); + if (ret) + goto err_config; + /* PRM - observability performance counters: * * OACONTROL, performance counter enable, note: @@ -2086,22 +2127,39 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, if (ret) goto err_oa_buf_alloc; - ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv); + ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv, + stream->oa_config); if (ret) goto err_enable; stream->ops = &i915_oa_stream_ops; + /* Lock device for exclusive_stream access late because + * enable_metric_set() might lock as well on gen8+. + */ + ret = i915_mutex_lock_interruptible(&dev_priv->drm); + if (ret) + goto err_lock; + dev_priv->perf.oa.exclusive_stream = stream; + mutex_unlock(&dev_priv->drm.struct_mutex); + return 0; +err_lock: + dev_priv->perf.oa.ops.disable_metric_set(dev_priv); + err_enable: free_oa_buffer(dev_priv); err_oa_buf_alloc: + put_oa_config(dev_priv, stream->oa_config); + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); intel_runtime_pm_put(dev_priv); + +err_config: if (stream->ctx) oa_put_render_ctx_id(stream); @@ -2112,15 +2170,14 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine, struct i915_gem_context *ctx, u32 *reg_state) { - struct drm_i915_private *dev_priv = engine->i915; + struct i915_perf_stream *stream; if (engine->id != RCS) return; - if (!dev_priv->perf.initialized) - return; - - gen8_update_reg_state_unlocked(ctx, reg_state); + stream = engine->i915->perf.oa.exclusive_stream; + if (stream) + gen8_update_reg_state_unlocked(ctx, reg_state, stream->oa_config); } /** @@ -2444,7 +2501,7 @@ static void i915_perf_destroy_locked(struct i915_perf_stream *stream) list_del(&stream->link); if (stream->ctx) - i915_gem_context_put_unlocked(stream->ctx); + i915_gem_context_put(stream->ctx); kfree(stream); } @@ -2483,27 +2540,6 @@ static const struct file_operations fops = { }; -static struct i915_gem_context * -lookup_context(struct drm_i915_private *dev_priv, - struct drm_i915_file_private *file_priv, - u32 ctx_user_handle) -{ - struct i915_gem_context *ctx; - int ret; - - ret = i915_mutex_lock_interruptible(&dev_priv->drm); - if (ret) - return ERR_PTR(ret); - - ctx = i915_gem_context_lookup(file_priv, ctx_user_handle); - if (!IS_ERR(ctx)) - i915_gem_context_get(ctx); - - mutex_unlock(&dev_priv->drm.struct_mutex); - - return ctx; -} - /** * i915_perf_open_ioctl_locked - DRM ioctl() for userspace to open a stream FD * @dev_priv: i915 device instance @@ -2545,12 +2581,11 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, u32 ctx_handle = props->ctx_handle; struct drm_i915_file_private *file_priv = file->driver_priv; - specific_ctx = lookup_context(dev_priv, file_priv, ctx_handle); - if (IS_ERR(specific_ctx)) { - ret = PTR_ERR(specific_ctx); - if (ret != -EINTR) - DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n", - ctx_handle); + specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle); + if (!specific_ctx) { + DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n", + ctx_handle); + ret = -ENOENT; goto err; } } @@ -2633,7 +2668,7 @@ err_alloc: kfree(stream); err_ctx: if (specific_ctx) - i915_gem_context_put_unlocked(specific_ctx); + i915_gem_context_put(specific_ctx); err: return ret; } @@ -2665,7 +2700,7 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, struct perf_open_properties *props) { u64 __user *uprop = uprops; - int i; + u32 i; memset(props, 0, sizeof(struct perf_open_properties)); @@ -2712,8 +2747,7 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, props->sample_flags |= SAMPLE_OA_REPORT; break; case DRM_I915_PERF_PROP_OA_METRICS_SET: - if (value == 0 || - value > dev_priv->perf.oa.n_builtin_sets) { + if (value == 0) { DRM_DEBUG("Unknown OA metric set ID\n"); return -EINVAL; } @@ -2852,6 +2886,8 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data, */ void i915_perf_register(struct drm_i915_private *dev_priv) { + int ret; + if (!dev_priv->perf.initialized) return; @@ -2867,44 +2903,42 @@ void i915_perf_register(struct drm_i915_private *dev_priv) if (!dev_priv->perf.metrics_kobj) goto exit; + sysfs_attr_init(&dev_priv->perf.oa.test_config.sysfs_metric_id.attr); + if (IS_HASWELL(dev_priv)) { - if (i915_perf_register_sysfs_hsw(dev_priv)) - goto sysfs_error; + i915_perf_load_test_config_hsw(dev_priv); } else if (IS_BROADWELL(dev_priv)) { - if (i915_perf_register_sysfs_bdw(dev_priv)) - goto sysfs_error; + i915_perf_load_test_config_bdw(dev_priv); } else if (IS_CHERRYVIEW(dev_priv)) { - if (i915_perf_register_sysfs_chv(dev_priv)) - goto sysfs_error; + i915_perf_load_test_config_chv(dev_priv); } else if (IS_SKYLAKE(dev_priv)) { - if (IS_SKL_GT2(dev_priv)) { - if (i915_perf_register_sysfs_sklgt2(dev_priv)) - goto sysfs_error; - } else if (IS_SKL_GT3(dev_priv)) { - if (i915_perf_register_sysfs_sklgt3(dev_priv)) - goto sysfs_error; - } else if (IS_SKL_GT4(dev_priv)) { - if (i915_perf_register_sysfs_sklgt4(dev_priv)) - goto sysfs_error; - } else - goto sysfs_error; + if (IS_SKL_GT2(dev_priv)) + i915_perf_load_test_config_sklgt2(dev_priv); + else if (IS_SKL_GT3(dev_priv)) + i915_perf_load_test_config_sklgt3(dev_priv); + else if (IS_SKL_GT4(dev_priv)) + i915_perf_load_test_config_sklgt4(dev_priv); } else if (IS_BROXTON(dev_priv)) { - if (i915_perf_register_sysfs_bxt(dev_priv)) - goto sysfs_error; + i915_perf_load_test_config_bxt(dev_priv); } else if (IS_KABYLAKE(dev_priv)) { - if (IS_KBL_GT2(dev_priv)) { - if (i915_perf_register_sysfs_kblgt2(dev_priv)) - goto sysfs_error; - } else if (IS_KBL_GT3(dev_priv)) { - if (i915_perf_register_sysfs_kblgt3(dev_priv)) - goto sysfs_error; - } else - goto sysfs_error; + if (IS_KBL_GT2(dev_priv)) + i915_perf_load_test_config_kblgt2(dev_priv); + else if (IS_KBL_GT3(dev_priv)) + i915_perf_load_test_config_kblgt3(dev_priv); } else if (IS_GEMINILAKE(dev_priv)) { - if (i915_perf_register_sysfs_glk(dev_priv)) - goto sysfs_error; + i915_perf_load_test_config_glk(dev_priv); } + if (dev_priv->perf.oa.test_config.id == 0) + goto sysfs_error; + + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, + &dev_priv->perf.oa.test_config.sysfs_metric); + if (ret) + goto sysfs_error; + + atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1); + goto exit; sysfs_error: @@ -2929,34 +2963,375 @@ void i915_perf_unregister(struct drm_i915_private *dev_priv) if (!dev_priv->perf.metrics_kobj) return; - if (IS_HASWELL(dev_priv)) - i915_perf_unregister_sysfs_hsw(dev_priv); - else if (IS_BROADWELL(dev_priv)) - i915_perf_unregister_sysfs_bdw(dev_priv); - else if (IS_CHERRYVIEW(dev_priv)) - i915_perf_unregister_sysfs_chv(dev_priv); - else if (IS_SKYLAKE(dev_priv)) { - if (IS_SKL_GT2(dev_priv)) - i915_perf_unregister_sysfs_sklgt2(dev_priv); - else if (IS_SKL_GT3(dev_priv)) - i915_perf_unregister_sysfs_sklgt3(dev_priv); - else if (IS_SKL_GT4(dev_priv)) - i915_perf_unregister_sysfs_sklgt4(dev_priv); - } else if (IS_BROXTON(dev_priv)) - i915_perf_unregister_sysfs_bxt(dev_priv); - else if (IS_KABYLAKE(dev_priv)) { - if (IS_KBL_GT2(dev_priv)) - i915_perf_unregister_sysfs_kblgt2(dev_priv); - else if (IS_KBL_GT3(dev_priv)) - i915_perf_unregister_sysfs_kblgt3(dev_priv); - } else if (IS_GEMINILAKE(dev_priv)) - i915_perf_unregister_sysfs_glk(dev_priv); - + sysfs_remove_group(dev_priv->perf.metrics_kobj, + &dev_priv->perf.oa.test_config.sysfs_metric); kobject_put(dev_priv->perf.metrics_kobj); dev_priv->perf.metrics_kobj = NULL; } +static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr) +{ + static const i915_reg_t flex_eu_regs[] = { + EU_PERF_CNTL0, + EU_PERF_CNTL1, + EU_PERF_CNTL2, + EU_PERF_CNTL3, + EU_PERF_CNTL4, + EU_PERF_CNTL5, + EU_PERF_CNTL6, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) { + if (flex_eu_regs[i].reg == addr) + return true; + } + return false; +} + +static bool gen7_is_valid_b_counter_addr(struct drm_i915_private *dev_priv, u32 addr) +{ + return (addr >= OASTARTTRIG1.reg && addr <= OASTARTTRIG8.reg) || + (addr >= OAREPORTTRIG1.reg && addr <= OAREPORTTRIG8.reg) || + (addr >= OACEC0_0.reg && addr <= OACEC7_1.reg); +} + +static bool gen7_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +{ + return addr == HALF_SLICE_CHICKEN2.reg || + (addr >= MICRO_BP0_0.reg && addr <= NOA_WRITE.reg) || + (addr >= OA_PERFCNT1_LO.reg && addr <= OA_PERFCNT2_HI.reg) || + (addr >= OA_PERFMATRIX_LO.reg && addr <= OA_PERFMATRIX_HI.reg); +} + +static bool gen8_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +{ + return gen7_is_valid_mux_addr(dev_priv, addr) || + addr == WAIT_FOR_RC6_EXIT.reg || + (addr >= RPM_CONFIG0.reg && addr <= NOA_CONFIG(8).reg); +} + +static bool hsw_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +{ + return gen7_is_valid_mux_addr(dev_priv, addr) || + (addr >= 0x25100 && addr <= 0x2FF90) || + addr == 0x9ec0; +} + +static bool chv_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +{ + return gen7_is_valid_mux_addr(dev_priv, addr) || + (addr >= 0x182300 && addr <= 0x1823A4); +} + +static uint32_t mask_reg_value(u32 reg, u32 val) +{ + /* HALF_SLICE_CHICKEN2 is programmed with a the + * WaDisableSTUnitPowerOptimization workaround. Make sure the value + * programmed by userspace doesn't change this. + */ + if (HALF_SLICE_CHICKEN2.reg == reg) + val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE); + + /* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function + * indicated by its name and a bunch of selection fields used by OA + * configs. + */ + if (WAIT_FOR_RC6_EXIT.reg == reg) + val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE); + + return val; +} + +static struct i915_oa_reg *alloc_oa_regs(struct drm_i915_private *dev_priv, + bool (*is_valid)(struct drm_i915_private *dev_priv, u32 addr), + u32 __user *regs, + u32 n_regs) +{ + struct i915_oa_reg *oa_regs; + int err; + u32 i; + + if (!n_regs) + return NULL; + + if (!access_ok(VERIFY_READ, regs, n_regs * sizeof(u32) * 2)) + return ERR_PTR(-EFAULT); + + /* No is_valid function means we're not allowing any register to be programmed. */ + GEM_BUG_ON(!is_valid); + if (!is_valid) + return ERR_PTR(-EINVAL); + + oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL); + if (!oa_regs) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < n_regs; i++) { + u32 addr, value; + + err = get_user(addr, regs); + if (err) + goto addr_err; + + if (!is_valid(dev_priv, addr)) { + DRM_DEBUG("Invalid oa_reg address: %X\n", addr); + err = -EINVAL; + goto addr_err; + } + + err = get_user(value, regs + 1); + if (err) + goto addr_err; + + oa_regs[i].addr = _MMIO(addr); + oa_regs[i].value = mask_reg_value(addr, value); + + regs += 2; + } + + return oa_regs; + +addr_err: + kfree(oa_regs); + return ERR_PTR(err); +} + +static ssize_t show_dynamic_id(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct i915_oa_config *oa_config = + container_of(attr, typeof(*oa_config), sysfs_metric_id); + + return sprintf(buf, "%d\n", oa_config->id); +} + +static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv, + struct i915_oa_config *oa_config) +{ + sysfs_attr_init(&oa_config->sysfs_metric_id.attr); + oa_config->sysfs_metric_id.attr.name = "id"; + oa_config->sysfs_metric_id.attr.mode = S_IRUGO; + oa_config->sysfs_metric_id.show = show_dynamic_id; + oa_config->sysfs_metric_id.store = NULL; + + oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr; + oa_config->attrs[1] = NULL; + + oa_config->sysfs_metric.name = oa_config->uuid; + oa_config->sysfs_metric.attrs = oa_config->attrs; + + return sysfs_create_group(dev_priv->perf.metrics_kobj, + &oa_config->sysfs_metric); +} + +/** + * i915_perf_add_config_ioctl - DRM ioctl() for userspace to add a new OA config + * @dev: drm device + * @data: ioctl data (pointer to struct drm_i915_perf_oa_config) copied from + * userspace (unvalidated) + * @file: drm file + * + * Validates the submitted OA register to be saved into a new OA config that + * can then be used for programming the OA unit and its NOA network. + * + * Returns: A new allocated config number to be used with the perf open ioctl + * or a negative error code on failure. + */ +int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_perf_oa_config *args = data; + struct i915_oa_config *oa_config, *tmp; + int err, id; + + if (!dev_priv->perf.initialized) { + DRM_DEBUG("i915 perf interface not available for this system\n"); + return -ENOTSUPP; + } + + if (!dev_priv->perf.metrics_kobj) { + DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); + return -EINVAL; + } + + if (i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { + DRM_DEBUG("Insufficient privileges to add i915 OA config\n"); + return -EACCES; + } + + if ((!args->mux_regs_ptr || !args->n_mux_regs) && + (!args->boolean_regs_ptr || !args->n_boolean_regs) && + (!args->flex_regs_ptr || !args->n_flex_regs)) { + DRM_DEBUG("No OA registers given\n"); + return -EINVAL; + } + + oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL); + if (!oa_config) { + DRM_DEBUG("Failed to allocate memory for the OA config\n"); + return -ENOMEM; + } + + atomic_set(&oa_config->ref_count, 1); + + if (!uuid_is_valid(args->uuid)) { + DRM_DEBUG("Invalid uuid format for OA config\n"); + err = -EINVAL; + goto reg_err; + } + + /* Last character in oa_config->uuid will be 0 because oa_config is + * kzalloc. + */ + memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid)); + + oa_config->mux_regs_len = args->n_mux_regs; + oa_config->mux_regs = + alloc_oa_regs(dev_priv, + dev_priv->perf.oa.ops.is_valid_mux_reg, + u64_to_user_ptr(args->mux_regs_ptr), + args->n_mux_regs); + + if (IS_ERR(oa_config->mux_regs)) { + DRM_DEBUG("Failed to create OA config for mux_regs\n"); + err = PTR_ERR(oa_config->mux_regs); + goto reg_err; + } + + oa_config->b_counter_regs_len = args->n_boolean_regs; + oa_config->b_counter_regs = + alloc_oa_regs(dev_priv, + dev_priv->perf.oa.ops.is_valid_b_counter_reg, + u64_to_user_ptr(args->boolean_regs_ptr), + args->n_boolean_regs); + + if (IS_ERR(oa_config->b_counter_regs)) { + DRM_DEBUG("Failed to create OA config for b_counter_regs\n"); + err = PTR_ERR(oa_config->b_counter_regs); + goto reg_err; + } + + if (INTEL_GEN(dev_priv) < 8) { + if (args->n_flex_regs != 0) { + err = -EINVAL; + goto reg_err; + } + } else { + oa_config->flex_regs_len = args->n_flex_regs; + oa_config->flex_regs = + alloc_oa_regs(dev_priv, + dev_priv->perf.oa.ops.is_valid_flex_reg, + u64_to_user_ptr(args->flex_regs_ptr), + args->n_flex_regs); + + if (IS_ERR(oa_config->flex_regs)) { + DRM_DEBUG("Failed to create OA config for flex_regs\n"); + err = PTR_ERR(oa_config->flex_regs); + goto reg_err; + } + } + + err = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); + if (err) + goto reg_err; + + /* We shouldn't have too many configs, so this iteration shouldn't be + * too costly. + */ + idr_for_each_entry(&dev_priv->perf.metrics_idr, tmp, id) { + if (!strcmp(tmp->uuid, oa_config->uuid)) { + DRM_DEBUG("OA config already exists with this uuid\n"); + err = -EADDRINUSE; + goto sysfs_err; + } + } + + err = create_dynamic_oa_sysfs_entry(dev_priv, oa_config); + if (err) { + DRM_DEBUG("Failed to create sysfs entry for OA config\n"); + goto sysfs_err; + } + + /* Config id 0 is invalid, id 1 for kernel stored test config. */ + oa_config->id = idr_alloc(&dev_priv->perf.metrics_idr, + oa_config, 2, + 0, GFP_KERNEL); + if (oa_config->id < 0) { + DRM_DEBUG("Failed to create sysfs entry for OA config\n"); + err = oa_config->id; + goto sysfs_err; + } + + mutex_unlock(&dev_priv->perf.metrics_lock); + + return oa_config->id; + +sysfs_err: + mutex_unlock(&dev_priv->perf.metrics_lock); +reg_err: + put_oa_config(dev_priv, oa_config); + DRM_DEBUG("Failed to add new OA config\n"); + return err; +} + +/** + * i915_perf_remove_config_ioctl - DRM ioctl() for userspace to remove an OA config + * @dev: drm device + * @data: ioctl data (pointer to u64 integer) copied from userspace + * @file: drm file + * + * Configs can be removed while being used, the will stop appearing in sysfs + * and their content will be freed when the stream using the config is closed. + * + * Returns: 0 on success or a negative error code on failure. + */ +int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u64 *arg = data; + struct i915_oa_config *oa_config; + int ret; + + if (!dev_priv->perf.initialized) { + DRM_DEBUG("i915 perf interface not available for this system\n"); + return -ENOTSUPP; + } + + if (i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { + DRM_DEBUG("Insufficient privileges to remove i915 OA config\n"); + return -EACCES; + } + + ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); + if (ret) + goto lock_err; + + oa_config = idr_find(&dev_priv->perf.metrics_idr, *arg); + if (!oa_config) { + DRM_DEBUG("Failed to remove unknown OA config\n"); + ret = -ENOENT; + goto config_err; + } + + GEM_BUG_ON(*arg != oa_config->id); + + sysfs_remove_group(dev_priv->perf.metrics_kobj, + &oa_config->sysfs_metric); + + idr_remove(&dev_priv->perf.metrics_idr, *arg); + put_oa_config(dev_priv, oa_config); + +config_err: + mutex_unlock(&dev_priv->perf.metrics_lock); +lock_err: + return ret; +} + static struct ctl_table oa_table[] = { { .procname = "perf_stream_paranoid", @@ -3010,9 +3385,14 @@ static struct ctl_table dev_root[] = { */ void i915_perf_init(struct drm_i915_private *dev_priv) { - dev_priv->perf.oa.n_builtin_sets = 0; + dev_priv->perf.oa.timestamp_frequency = 0; if (IS_HASWELL(dev_priv)) { + dev_priv->perf.oa.ops.is_valid_b_counter_reg = + gen7_is_valid_b_counter_addr; + dev_priv->perf.oa.ops.is_valid_mux_reg = + hsw_is_valid_mux_addr; + dev_priv->perf.oa.ops.is_valid_flex_reg = NULL; dev_priv->perf.oa.ops.init_oa_buffer = gen7_init_oa_buffer; dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set; dev_priv->perf.oa.ops.disable_metric_set = hsw_disable_metric_set; @@ -3025,9 +3405,6 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.timestamp_frequency = 12500000; dev_priv->perf.oa.oa_formats = hsw_oa_formats; - - dev_priv->perf.oa.n_builtin_sets = - i915_oa_n_builtin_metric_sets_hsw; } else if (i915.enable_execlists) { /* Note: that although we could theoretically also support the * legacy ringbuffer mode on BDW (and earlier iterations of @@ -3035,6 +3412,22 @@ void i915_perf_init(struct drm_i915_private *dev_priv) * worth the complexity to maintain now that BDW+ enable * execlist mode by default. */ + dev_priv->perf.oa.ops.is_valid_b_counter_reg = + gen7_is_valid_b_counter_addr; + dev_priv->perf.oa.ops.is_valid_mux_reg = + gen8_is_valid_mux_addr; + dev_priv->perf.oa.ops.is_valid_flex_reg = + gen8_is_valid_flex_addr; + + dev_priv->perf.oa.ops.init_oa_buffer = gen8_init_oa_buffer; + dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set; + dev_priv->perf.oa.ops.disable_metric_set = gen8_disable_metric_set; + dev_priv->perf.oa.ops.oa_enable = gen8_oa_enable; + dev_priv->perf.oa.ops.oa_disable = gen8_oa_disable; + dev_priv->perf.oa.ops.read = gen8_oa_read; + dev_priv->perf.oa.ops.oa_hw_tail_read = gen8_oa_hw_tail_read; + + dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats; if (IS_GEN8(dev_priv)) { dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120; @@ -3043,85 +3436,35 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.timestamp_frequency = 12500000; dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25); - - if (IS_BROADWELL(dev_priv)) { - dev_priv->perf.oa.n_builtin_sets = - i915_oa_n_builtin_metric_sets_bdw; - dev_priv->perf.oa.ops.select_metric_set = - i915_oa_select_metric_set_bdw; - } else if (IS_CHERRYVIEW(dev_priv)) { - dev_priv->perf.oa.n_builtin_sets = - i915_oa_n_builtin_metric_sets_chv; - dev_priv->perf.oa.ops.select_metric_set = - i915_oa_select_metric_set_chv; + if (IS_CHERRYVIEW(dev_priv)) { + dev_priv->perf.oa.ops.is_valid_mux_reg = + chv_is_valid_mux_addr; } } else if (IS_GEN9(dev_priv)) { dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; - dev_priv->perf.oa.timestamp_frequency = 12000000; - dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); - if (IS_SKL_GT2(dev_priv)) { - dev_priv->perf.oa.n_builtin_sets = - i915_oa_n_builtin_metric_sets_sklgt2; - dev_priv->perf.oa.ops.select_metric_set = - i915_oa_select_metric_set_sklgt2; - } else if (IS_SKL_GT3(dev_priv)) { - dev_priv->perf.oa.n_builtin_sets = - i915_oa_n_builtin_metric_sets_sklgt3; - dev_priv->perf.oa.ops.select_metric_set = - i915_oa_select_metric_set_sklgt3; - } else if (IS_SKL_GT4(dev_priv)) { - dev_priv->perf.oa.n_builtin_sets = - i915_oa_n_builtin_metric_sets_sklgt4; - dev_priv->perf.oa.ops.select_metric_set = - i915_oa_select_metric_set_sklgt4; - } else if (IS_BROXTON(dev_priv)) { + switch (dev_priv->info.platform) { + case INTEL_BROXTON: + case INTEL_GEMINILAKE: dev_priv->perf.oa.timestamp_frequency = 19200000; - - dev_priv->perf.oa.n_builtin_sets = - i915_oa_n_builtin_metric_sets_bxt; - dev_priv->perf.oa.ops.select_metric_set = - i915_oa_select_metric_set_bxt; - } else if (IS_KBL_GT2(dev_priv)) { - dev_priv->perf.oa.n_builtin_sets = - i915_oa_n_builtin_metric_sets_kblgt2; - dev_priv->perf.oa.ops.select_metric_set = - i915_oa_select_metric_set_kblgt2; - } else if (IS_KBL_GT3(dev_priv)) { - dev_priv->perf.oa.n_builtin_sets = - i915_oa_n_builtin_metric_sets_kblgt3; - dev_priv->perf.oa.ops.select_metric_set = - i915_oa_select_metric_set_kblgt3; - } else if (IS_GEMINILAKE(dev_priv)) { - dev_priv->perf.oa.timestamp_frequency = 19200000; - - dev_priv->perf.oa.n_builtin_sets = - i915_oa_n_builtin_metric_sets_glk; - dev_priv->perf.oa.ops.select_metric_set = - i915_oa_select_metric_set_glk; + break; + case INTEL_SKYLAKE: + case INTEL_KABYLAKE: + dev_priv->perf.oa.timestamp_frequency = 12000000; + break; + default: + /* Leave timestamp_frequency to 0 so we can + * detect unsupported platforms. + */ + break; } } - - if (dev_priv->perf.oa.n_builtin_sets) { - dev_priv->perf.oa.ops.init_oa_buffer = gen8_init_oa_buffer; - dev_priv->perf.oa.ops.enable_metric_set = - gen8_enable_metric_set; - dev_priv->perf.oa.ops.disable_metric_set = - gen8_disable_metric_set; - dev_priv->perf.oa.ops.oa_enable = gen8_oa_enable; - dev_priv->perf.oa.ops.oa_disable = gen8_oa_disable; - dev_priv->perf.oa.ops.read = gen8_oa_read; - dev_priv->perf.oa.ops.oa_hw_tail_read = - gen8_oa_hw_tail_read; - - dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats; - } } - if (dev_priv->perf.oa.n_builtin_sets) { + if (dev_priv->perf.oa.timestamp_frequency) { hrtimer_init(&dev_priv->perf.oa.poll_check_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb; @@ -3135,10 +3478,23 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.timestamp_frequency / 2; dev_priv->perf.sysctl_header = register_sysctl_table(dev_root); + mutex_init(&dev_priv->perf.metrics_lock); + idr_init(&dev_priv->perf.metrics_idr); + dev_priv->perf.initialized = true; } } +static int destroy_config(int id, void *p, void *data) +{ + struct drm_i915_private *dev_priv = data; + struct i915_oa_config *oa_config = p; + + put_oa_config(dev_priv, oa_config); + + return 0; +} + /** * i915_perf_fini - Counter part to i915_perf_init() * @dev_priv: i915 device instance @@ -3148,6 +3504,9 @@ void i915_perf_fini(struct drm_i915_private *dev_priv) if (!dev_priv->perf.initialized) return; + idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, dev_priv); + idr_destroy(&dev_priv->perf.metrics_idr); + unregister_sysctl_table(dev_priv->perf.sysctl_header); memset(&dev_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops)); |