From 65fda4b226d2886f6479d2282b097143bfb5fd63 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 26 Mar 2023 18:47:00 +0200 Subject: dt-bindings: display: sitronix,st7789v: document dc-gpios The device comes with DCX pin which is already used in canaan/sipeed_maixduino.dts (although not in Linux driver). Signed-off-by: Krzysztof Kozlowski Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20230326164700.104570-1-krzysztof.kozlowski@linaro.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml b/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml index d984b59daa4a..fa6556363cca 100644 --- a/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml +++ b/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml @@ -26,6 +26,10 @@ properties: spi-cpha: true spi-cpol: true + dc-gpios: + maxItems: 1 + description: DCX pin, Display data/command selection pin in parallel interface + required: - compatible - reg -- cgit v1.2.3 From 52fb6663a8df16bcf6a68aa98cece0a13ab4c804 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 26 Mar 2023 22:42:21 +0200 Subject: dt-bindings: display: xinpeng,xpp055c272: document port Panels are supposed to have one port (defined in panel-common.yaml binding): px30-evb.dtb: panel@0: 'port' does not match any of the regexes: 'pinctrl-[0-9]+' Signed-off-by: Krzysztof Kozlowski Reviewed-by: Jagan Teki Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20230326204224.80181-1-krzysztof.kozlowski@linaro.org Signed-off-by: Rob Herring --- .../devicetree/bindings/display/panel/xinpeng,xpp055c272.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/devicetree/bindings/display/panel/xinpeng,xpp055c272.yaml b/Documentation/devicetree/bindings/display/panel/xinpeng,xpp055c272.yaml index d5c46a3cc2b0..c407deb6afb1 100644 --- a/Documentation/devicetree/bindings/display/panel/xinpeng,xpp055c272.yaml +++ b/Documentation/devicetree/bindings/display/panel/xinpeng,xpp055c272.yaml @@ -17,6 +17,7 @@ properties: const: xinpeng,xpp055c272 reg: true backlight: true + port: true reset-gpios: true iovcc-supply: description: regulator that supplies the iovcc voltage @@ -27,6 +28,7 @@ required: - compatible - reg - backlight + - port - iovcc-supply - vci-supply @@ -44,6 +46,12 @@ examples: backlight = <&backlight>; iovcc-supply = <&vcc_1v8>; vci-supply = <&vcc3v3_lcd>; + + port { + mipi_in_panel: endpoint { + remote-endpoint = <&mipi_out_panel>; + }; + }; }; }; -- cgit v1.2.3 From 6ffbfa280851320bdd5d4ccdae97421dd1c8510c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 26 Mar 2023 22:42:22 +0200 Subject: dt-bindings: display: feiyang,fy07024di26a30d: document port Panels are supposed to have one port (defined in panel-common.yaml binding): rk3399-rockpro64.dtb: panel@0: 'port' does not match any of the regexes: 'pinctrl-[0-9]+' Signed-off-by: Krzysztof Kozlowski Reviewed-by: Jagan Teki Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20230326204224.80181-2-krzysztof.kozlowski@linaro.org Signed-off-by: Rob Herring --- .../bindings/display/panel/feiyang,fy07024di26a30d.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/devicetree/bindings/display/panel/feiyang,fy07024di26a30d.yaml b/Documentation/devicetree/bindings/display/panel/feiyang,fy07024di26a30d.yaml index 1cf84c8dd85e..92df69e80a82 100644 --- a/Documentation/devicetree/bindings/display/panel/feiyang,fy07024di26a30d.yaml +++ b/Documentation/devicetree/bindings/display/panel/feiyang,fy07024di26a30d.yaml @@ -26,6 +26,7 @@ properties: dvdd-supply: description: 3v3 digital regulator + port: true reset-gpios: true backlight: true @@ -35,6 +36,7 @@ required: - reg - avdd-supply - dvdd-supply + - port additionalProperties: false @@ -53,5 +55,11 @@ examples: dvdd-supply = <®_dldo2>; reset-gpios = <&pio 3 24 GPIO_ACTIVE_HIGH>; /* LCD-RST: PD24 */ backlight = <&backlight>; + + port { + mipi_in_panel: endpoint { + remote-endpoint = <&mipi_out_panel>; + }; + }; }; }; -- cgit v1.2.3 From 9eef9c216e71adc1ed8f8e815de6e2d19939e965 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 26 Mar 2023 22:42:23 +0200 Subject: dt-bindings: display: elida,kd35t133: document port and rotation Panels are supposed to have one port (defined in panel-common.yaml binding) and can have also rotation: rk3326-odroid-go2.dtb: panel@0: 'port', 'rotation' do not match any of the regexes: 'pinctrl-[0-9]+' Signed-off-by: Krzysztof Kozlowski Reviewed-by: Neil Armstrong Reviewed-by: Jagan Teki Link: https://lore.kernel.org/r/20230326204224.80181-3-krzysztof.kozlowski@linaro.org Signed-off-by: Rob Herring --- .../devicetree/bindings/display/panel/elida,kd35t133.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Documentation/devicetree/bindings/display/panel/elida,kd35t133.yaml b/Documentation/devicetree/bindings/display/panel/elida,kd35t133.yaml index 7adb83e2e8d9..265ab6d30572 100644 --- a/Documentation/devicetree/bindings/display/panel/elida,kd35t133.yaml +++ b/Documentation/devicetree/bindings/display/panel/elida,kd35t133.yaml @@ -17,7 +17,9 @@ properties: const: elida,kd35t133 reg: true backlight: true + port: true reset-gpios: true + rotation: true iovcc-supply: description: regulator that supplies the iovcc voltage vdd-supply: @@ -27,6 +29,7 @@ required: - compatible - reg - backlight + - port - iovcc-supply - vdd-supply @@ -43,6 +46,12 @@ examples: backlight = <&backlight>; iovcc-supply = <&vcc_1v8>; vdd-supply = <&vcc3v3_lcd>; + + port { + mipi_in_panel: endpoint { + remote-endpoint = <&mipi_out_panel>; + }; + }; }; }; -- cgit v1.2.3 From 175caafa787a4f0657850513ab3866f560c94aac Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 26 Mar 2023 22:42:24 +0200 Subject: dt-bindings: display: sitronix,st7701: document port and rotation Panels are supposed to have one port (defined in panel-common.yaml binding) and can have also rotation: rk3326-odroid-go3.dtb: panel@0: 'port', 'rotation' do not match any of the regexes: 'pinctrl-[0-9]+' Signed-off-by: Krzysztof Kozlowski Reviewed-by: Neil Armstrong Reviewed-by: Jagan Teki Link: https://lore.kernel.org/r/20230326204224.80181-4-krzysztof.kozlowski@linaro.org Signed-off-by: Rob Herring --- .../devicetree/bindings/display/panel/sitronix,st7701.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Documentation/devicetree/bindings/display/panel/sitronix,st7701.yaml b/Documentation/devicetree/bindings/display/panel/sitronix,st7701.yaml index 83d30eadf7d9..4dc0cd4a6a77 100644 --- a/Documentation/devicetree/bindings/display/panel/sitronix,st7701.yaml +++ b/Documentation/devicetree/bindings/display/panel/sitronix,st7701.yaml @@ -42,7 +42,9 @@ properties: IOVCC-supply: description: I/O system regulator + port: true reset-gpios: true + rotation: true backlight: true @@ -51,6 +53,7 @@ required: - reg - VCC-supply - IOVCC-supply + - port - reset-gpios additionalProperties: false @@ -70,5 +73,11 @@ examples: IOVCC-supply = <®_dldo2>; reset-gpios = <&pio 3 24 GPIO_ACTIVE_HIGH>; /* LCD-RST: PD24 */ backlight = <&backlight>; + + port { + mipi_in_panel: endpoint { + remote-endpoint = <&mipi_out_panel>; + }; + }; }; }; -- cgit v1.2.3 From c5647cae2704e58d1c4e5fedbf63f11bca6376c9 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Mon, 13 Mar 2023 22:27:11 -0700 Subject: drm/lima/lima_drv: Add missing unwind goto in lima_pdev_probe() Smatch reports: drivers/gpu/drm/lima/lima_drv.c:396 lima_pdev_probe() warn: missing unwind goto? Store return value in err and goto 'err_out0' which has lima_sched_slab_fini() before returning. Fixes: a1d2a6339961 ("drm/lima: driver for ARM Mali4xx GPUs") Signed-off-by: Harshit Mogalapalli Signed-off-by: Qiang Yu Link: https://patchwork.freedesktop.org/patch/msgid/20230314052711.4061652-1-harshit.m.mogalapalli@oracle.com --- drivers/gpu/drm/lima/lima_drv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c index 7b8d7178d09a..39cab4a55f57 100644 --- a/drivers/gpu/drm/lima/lima_drv.c +++ b/drivers/gpu/drm/lima/lima_drv.c @@ -392,8 +392,10 @@ static int lima_pdev_probe(struct platform_device *pdev) /* Allocate and initialize the DRM device. */ ddev = drm_dev_alloc(&lima_drm_driver, &pdev->dev); - if (IS_ERR(ddev)) - return PTR_ERR(ddev); + if (IS_ERR(ddev)) { + err = PTR_ERR(ddev); + goto err_out0; + } ddev->dev_private = ldev; ldev->ddev = ddev; -- cgit v1.2.3 From bccafec957a5c4b22ac29e53a39e82d0a0008348 Mon Sep 17 00:00:00 2001 From: Erico Nunes Date: Mon, 13 Mar 2023 00:30:50 +0100 Subject: drm/lima: add usage counting method to ctx_mgr lima maintains a context manager per drm_file, similar to amdgpu. In order to account for the complete usage per drm_file, all of the associated contexts need to be considered. Previously released contexts also need to be accounted for but their drm_sched_entity info is gone once they get released, so account for it in the ctx_mgr. Signed-off-by: Erico Nunes Signed-off-by: Qiang Yu Link: https://patchwork.freedesktop.org/patch/msgid/20230312233052.21095-2-nunes.erico@gmail.com --- drivers/gpu/drm/lima/lima_ctx.c | 30 +++++++++++++++++++++++++++++- drivers/gpu/drm/lima/lima_ctx.h | 3 +++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/lima/lima_ctx.c b/drivers/gpu/drm/lima/lima_ctx.c index 891d5cd5019a..e008e586fad0 100644 --- a/drivers/gpu/drm/lima/lima_ctx.c +++ b/drivers/gpu/drm/lima/lima_ctx.c @@ -15,6 +15,7 @@ int lima_ctx_create(struct lima_device *dev, struct lima_ctx_mgr *mgr, u32 *id) if (!ctx) return -ENOMEM; ctx->dev = dev; + ctx->mgr = mgr; kref_init(&ctx->refcnt); for (i = 0; i < lima_pipe_num; i++) { @@ -42,10 +43,17 @@ err_out0: static void lima_ctx_do_release(struct kref *ref) { struct lima_ctx *ctx = container_of(ref, struct lima_ctx, refcnt); + struct lima_ctx_mgr *mgr = ctx->mgr; int i; - for (i = 0; i < lima_pipe_num; i++) + for (i = 0; i < lima_pipe_num; i++) { + struct lima_sched_context *context = &ctx->context[i]; + struct drm_sched_entity *entity = &context->base; + + mgr->elapsed_ns[i] += entity->elapsed_ns; + lima_sched_context_fini(ctx->dev->pipe + i, ctx->context + i); + } kfree(ctx); } @@ -99,3 +107,23 @@ void lima_ctx_mgr_fini(struct lima_ctx_mgr *mgr) xa_destroy(&mgr->handles); mutex_destroy(&mgr->lock); } + +void lima_ctx_mgr_usage(struct lima_ctx_mgr *mgr, u64 usage[lima_pipe_num]) +{ + struct lima_ctx *ctx; + unsigned long id; + + for (int i = 0; i < lima_pipe_num; i++) + usage[i] = mgr->elapsed_ns[i]; + + mutex_lock(&mgr->lock); + xa_for_each(&mgr->handles, id, ctx) { + for (int i = 0; i < lima_pipe_num; i++) { + struct lima_sched_context *context = &ctx->context[i]; + struct drm_sched_entity *entity = &context->base; + + usage[i] += entity->elapsed_ns; + } + } + mutex_unlock(&mgr->lock); +} diff --git a/drivers/gpu/drm/lima/lima_ctx.h b/drivers/gpu/drm/lima/lima_ctx.h index 74e2be09090f..6068863880eb 100644 --- a/drivers/gpu/drm/lima/lima_ctx.h +++ b/drivers/gpu/drm/lima/lima_ctx.h @@ -12,6 +12,7 @@ struct lima_ctx { struct kref refcnt; struct lima_device *dev; + struct lima_ctx_mgr *mgr; struct lima_sched_context context[lima_pipe_num]; atomic_t guilty; @@ -23,6 +24,7 @@ struct lima_ctx { struct lima_ctx_mgr { struct mutex lock; struct xarray handles; + u64 elapsed_ns[lima_pipe_num]; }; int lima_ctx_create(struct lima_device *dev, struct lima_ctx_mgr *mgr, u32 *id); @@ -31,5 +33,6 @@ struct lima_ctx *lima_ctx_get(struct lima_ctx_mgr *mgr, u32 id); void lima_ctx_put(struct lima_ctx *ctx); void lima_ctx_mgr_init(struct lima_ctx_mgr *mgr); void lima_ctx_mgr_fini(struct lima_ctx_mgr *mgr); +void lima_ctx_mgr_usage(struct lima_ctx_mgr *mgr, u64 usage[lima_pipe_num]); #endif -- cgit v1.2.3 From 87767de835edf527b879a363d518c33da68adb81 Mon Sep 17 00:00:00 2001 From: Erico Nunes Date: Mon, 13 Mar 2023 00:30:51 +0100 Subject: drm/lima: allocate unique id per drm_file To track if fds are pointing to the same execution context and export the expected information to fdinfo, similar to what is done in other drivers. Signed-off-by: Erico Nunes Signed-off-by: Qiang Yu Link: https://patchwork.freedesktop.org/patch/msgid/20230312233052.21095-3-nunes.erico@gmail.com --- drivers/gpu/drm/lima/lima_device.h | 3 +++ drivers/gpu/drm/lima/lima_drv.c | 12 ++++++++++++ drivers/gpu/drm/lima/lima_drv.h | 1 + 3 files changed, 16 insertions(+) diff --git a/drivers/gpu/drm/lima/lima_device.h b/drivers/gpu/drm/lima/lima_device.h index 41b9d7b4bcc7..71b2db60d161 100644 --- a/drivers/gpu/drm/lima/lima_device.h +++ b/drivers/gpu/drm/lima/lima_device.h @@ -106,6 +106,9 @@ struct lima_device { struct lima_dump_head dump; struct list_head error_task_list; struct mutex error_task_list_lock; + + struct xarray active_contexts; + u32 next_context_id; }; static inline struct lima_device * diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c index 39cab4a55f57..f456a471216b 100644 --- a/drivers/gpu/drm/lima/lima_drv.c +++ b/drivers/gpu/drm/lima/lima_drv.c @@ -218,6 +218,11 @@ static int lima_drm_driver_open(struct drm_device *dev, struct drm_file *file) if (!priv) return -ENOMEM; + err = xa_alloc_cyclic(&ldev->active_contexts, &priv->id, priv, + xa_limit_32b, &ldev->next_context_id, GFP_KERNEL); + if (err < 0) + goto err_out0; + priv->vm = lima_vm_create(ldev); if (!priv->vm) { err = -ENOMEM; @@ -237,6 +242,9 @@ err_out0: static void lima_drm_driver_postclose(struct drm_device *dev, struct drm_file *file) { struct lima_drm_priv *priv = file->driver_priv; + struct lima_device *ldev = to_lima_dev(dev); + + xa_erase(&ldev->active_contexts, priv->id); lima_ctx_mgr_fini(&priv->ctx_mgr); lima_vm_put(priv->vm); @@ -388,6 +396,8 @@ static int lima_pdev_probe(struct platform_device *pdev) ldev->dev = &pdev->dev; ldev->id = (enum lima_gpu_id)of_device_get_match_data(&pdev->dev); + xa_init_flags(&ldev->active_contexts, XA_FLAGS_ALLOC); + platform_set_drvdata(pdev, ldev); /* Allocate and initialize the DRM device. */ @@ -446,6 +456,8 @@ static int lima_pdev_remove(struct platform_device *pdev) struct lima_device *ldev = platform_get_drvdata(pdev); struct drm_device *ddev = ldev->ddev; + xa_destroy(&ldev->active_contexts); + sysfs_remove_bin_file(&ldev->dev->kobj, &lima_error_state_attr); drm_dev_unregister(ddev); diff --git a/drivers/gpu/drm/lima/lima_drv.h b/drivers/gpu/drm/lima/lima_drv.h index c738d288547b..e49b7ab651d0 100644 --- a/drivers/gpu/drm/lima/lima_drv.h +++ b/drivers/gpu/drm/lima/lima_drv.h @@ -20,6 +20,7 @@ struct lima_sched_task; struct drm_lima_gem_submit_bo; struct lima_drm_priv { + int id; struct lima_vm *vm; struct lima_ctx_mgr ctx_mgr; }; -- cgit v1.2.3 From 4a66f3da99dcb4dcbd28544110636b50adfb0f0d Mon Sep 17 00:00:00 2001 From: Erico Nunes Date: Mon, 13 Mar 2023 00:30:52 +0100 Subject: drm/lima: add show_fdinfo for drm usage stats This exposes an accumulated active time per client via the fdinfo infrastructure per execution engine, following Documentation/gpu/drm-usage-stats.rst. In lima, the exposed execution engines are gp and pp. Signed-off-by: Erico Nunes Signed-off-by: Qiang Yu Link: https://patchwork.freedesktop.org/patch/msgid/20230312233052.21095-4-nunes.erico@gmail.com --- drivers/gpu/drm/lima/lima_drv.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c index f456a471216b..3420875d6fc6 100644 --- a/drivers/gpu/drm/lima/lima_drv.c +++ b/drivers/gpu/drm/lima/lima_drv.c @@ -261,7 +261,36 @@ static const struct drm_ioctl_desc lima_drm_driver_ioctls[] = { DRM_IOCTL_DEF_DRV(LIMA_CTX_FREE, lima_ioctl_ctx_free, DRM_RENDER_ALLOW), }; -DEFINE_DRM_GEM_FOPS(lima_drm_driver_fops); +static void lima_drm_driver_show_fdinfo(struct seq_file *m, struct file *filp) +{ + struct drm_file *file = filp->private_data; + struct drm_device *dev = file->minor->dev; + struct lima_device *ldev = to_lima_dev(dev); + struct lima_drm_priv *priv = file->driver_priv; + struct lima_ctx_mgr *ctx_mgr = &priv->ctx_mgr; + u64 usage[lima_pipe_num]; + + lima_ctx_mgr_usage(ctx_mgr, usage); + + /* + * For a description of the text output format used here, see + * Documentation/gpu/drm-usage-stats.rst. + */ + seq_printf(m, "drm-driver:\t%s\n", dev->driver->name); + seq_printf(m, "drm-client-id:\t%u\n", priv->id); + for (int i = 0; i < lima_pipe_num; i++) { + struct lima_sched_pipe *pipe = &ldev->pipe[i]; + struct drm_gpu_scheduler *sched = &pipe->base; + + seq_printf(m, "drm-engine-%s:\t%llu ns\n", sched->name, usage[i]); + } +} + +static const struct file_operations lima_drm_driver_fops = { + .owner = THIS_MODULE, + DRM_GEM_FOPS, + .show_fdinfo = lima_drm_driver_show_fdinfo, +}; /* * Changelog: -- cgit v1.2.3 From f435b7ef3b360d689df2ffa8326352cd07940d92 Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Thu, 30 Mar 2023 11:31:31 +0200 Subject: drm/bridge: lt8912b: Fix DSI Video Mode LT8912 DSI port supports only Non-Burst mode video operation with Sync Events and continuous clock on clock lane, correct dsi mode flags according to that removing MIPI_DSI_MODE_VIDEO_BURST flag. Cc: Fixes: 30e2ae943c26 ("drm/bridge: Introduce LT8912B DSI to HDMI bridge") Signed-off-by: Francesco Dolcini Reviewed-by: Robert Foss Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20230330093131.424828-1-francesco@dolcini.it --- drivers/gpu/drm/bridge/lontium-lt8912b.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/lontium-lt8912b.c b/drivers/gpu/drm/bridge/lontium-lt8912b.c index b40baced1331..13c131ade268 100644 --- a/drivers/gpu/drm/bridge/lontium-lt8912b.c +++ b/drivers/gpu/drm/bridge/lontium-lt8912b.c @@ -504,7 +504,6 @@ static int lt8912_attach_dsi(struct lt8912 *lt) dsi->format = MIPI_DSI_FMT_RGB888; dsi->mode_flags = MIPI_DSI_MODE_VIDEO | - MIPI_DSI_MODE_VIDEO_BURST | MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET; -- cgit v1.2.3 From 207395da5a97035f06360638f0c2fcd92963ce95 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Thu, 2 Mar 2023 14:35:09 +0000 Subject: drm/prime: reject DMA-BUF attach when get_sg_table is missing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_gem_map_dma_buf() requires drm_gem_object_funcs.get_sg_table to be implemented, or else WARNs. Allow drivers to leave this hook unimplemented to implement purely local DMA-BUFs (ie, DMA-BUFs which cannot be imported anywhere else but the device which allocated them). In that case, reject imports to other devices in drm_gem_map_attach(). v2: new patch v3: use ENOSYS Signed-off-by: Simon Ser Cc: Daniel Vetter Cc: Tian Tao Cc: Maxime Ripard Cc: Hans de Goede Acked-by: Thomas Zimmermann Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20230302143502.500661-1-contact@emersion.fr --- drivers/gpu/drm/drm_prime.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 149cd4ff6a3b..d29dafce9bb0 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -544,7 +544,8 @@ int drm_prime_handle_to_fd_ioctl(struct drm_device *dev, void *data, * Optional pinning of buffers is handled at dma-buf attach and detach time in * drm_gem_map_attach() and drm_gem_map_detach(). Backing storage itself is * handled by drm_gem_map_dma_buf() and drm_gem_unmap_dma_buf(), which relies on - * &drm_gem_object_funcs.get_sg_table. + * &drm_gem_object_funcs.get_sg_table. If &drm_gem_object_funcs.get_sg_table is + * unimplemented, exports into another device are rejected. * * For kernel-internal access there's drm_gem_dmabuf_vmap() and * drm_gem_dmabuf_vunmap(). Userspace mmap support is provided by @@ -583,6 +584,9 @@ int drm_gem_map_attach(struct dma_buf *dma_buf, { struct drm_gem_object *obj = dma_buf->priv; + if (!obj->funcs->get_sg_table) + return -ENOSYS; + return drm_gem_pin(obj); } EXPORT_SYMBOL(drm_gem_map_attach); -- cgit v1.2.3 From 41068c8b28e16f1c2c26c854271520e1f3afaa22 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Thu, 2 Mar 2023 14:35:13 +0000 Subject: drm/vram-helper: turn on PRIME import/export MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't populate drm_driver.gem_prime_import_sg_table so only DMA-BUFs exported from our own device can be imported. We don't populate drm_gem_object_funcs.get_sg_table so DMA-BUFs cannot be imported into another device. Still, this is useful to user-space to share buffers between processes and between API boundaries (e.g. wlroots hard-requires PRIME import/export support). v2: expand commit message Signed-off-by: Simon Ser Cc: Daniel Vetter Cc: Tian Tao Cc: Maxime Ripard Cc: Hans de Goede Acked-by: Thomas Zimmermann Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20230302143502.500661-2-contact@emersion.fr --- include/drm/drm_gem_vram_helper.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/drm/drm_gem_vram_helper.h b/include/drm/drm_gem_vram_helper.h index d3e8920c0b64..f4aab64411d8 100644 --- a/include/drm/drm_gem_vram_helper.h +++ b/include/drm/drm_gem_vram_helper.h @@ -160,7 +160,9 @@ void drm_gem_vram_simple_display_pipe_cleanup_fb( .debugfs_init = drm_vram_mm_debugfs_init, \ .dumb_create = drm_gem_vram_driver_dumb_create, \ .dumb_map_offset = drm_gem_ttm_dumb_map_offset, \ - .gem_prime_mmap = drm_gem_prime_mmap + .gem_prime_mmap = drm_gem_prime_mmap, \ + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, \ + .prime_fd_to_handle = drm_gem_prime_fd_to_handle /* * VRAM memory manager -- cgit v1.2.3 From 2792aed16ca7e594916893698f526ff31eeed237 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 16 Oct 2022 02:36:32 +0200 Subject: drm/bridge: tc358767: Enable DSI burst mode, LPM, non-continuous clock The TC358767/TC358867/TC9595 are capable of DSI burst mode, which is more energy efficient than the non-burst modes. Make use of it. The TC358767/TC358867/TC9595 are capable of DSI non-continuous clock, since it sources the internal PLL clock from external clock source. The DSI non-continuous clock further reduces power utilization. The TC358767/TC358867/TC9595 may use DSI LPM for command transmissions, make sure this is configured correctly in the DSI mode flags. Signed-off-by: Marek Vasut Acked-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20221016003632.406468-1-marex@denx.de --- drivers/gpu/drm/bridge/tc358767.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 6d16ec45ea61..91f7cb56a654 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -1896,10 +1896,10 @@ static int tc_mipi_dsi_host_attach(struct tc_data *tc) "failed to create dsi device\n"); tc->dsi = dsi; - dsi->lanes = dsi_lanes; dsi->format = MIPI_DSI_FMT_RGB888; - dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE; + dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | + MIPI_DSI_MODE_LPM | MIPI_DSI_CLOCK_NON_CONTINUOUS; ret = mipi_dsi_attach(dsi); if (ret < 0) { -- cgit v1.2.3 From 3e66f493e136e615ada3c0dc691119679b1df837 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Tue, 21 Feb 2023 21:04:07 +0100 Subject: dt-bindings: display: Start the info graphics with HS/VS change The VS signal change is synchronized to HS signal change, start the info graphics with that event, instead of having that event occur in the middle of it. Scope trace of DPI bus with HS/VS active HIGH looks as follows: ________________...__ VS...___/__ __ \______... HS...___/ \_______/ \__...__/ \___... ^ ^ | | | Used to start here -' | '--- Start info graphics here Signed-off-by: Marek Vasut Reviewed-by: Sam Ravnborg Acked-by: Rob Herring Link: https://patchwork.freedesktop.org/patch/msgid/20230221200407.16531-1-marex@denx.de --- .../bindings/display/panel/panel-timing.yaml | 46 +++++++++++----------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/Documentation/devicetree/bindings/display/panel/panel-timing.yaml b/Documentation/devicetree/bindings/display/panel/panel-timing.yaml index 0d317e61edd8..aea69b84ca5d 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-timing.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-timing.yaml @@ -17,29 +17,29 @@ description: | The parameters are defined as seen in the following illustration. - +----------+-------------------------------------+----------+-------+ - | | ^ | | | - | | |vback_porch | | | - | | v | | | - +----------#######################################----------+-------+ - | # ^ # | | - | # | # | | - | hback # | # hfront | hsync | - | porch # | hactive # porch | len | - |<-------->#<-------+--------------------------->#<-------->|<----->| - | # | # | | - | # |vactive # | | - | # | # | | - | # v # | | - +----------#######################################----------+-------+ - | | ^ | | | - | | |vfront_porch | | | - | | v | | | - +----------+-------------------------------------+----------+-------+ - | | ^ | | | - | | |vsync_len | | | - | | v | | | - +----------+-------------------------------------+----------+-------+ + +-------+----------+-------------------------------------+----------+ + | | | ^ | | + | | | |vsync_len | | + | | | v | | + +-------+----------+-------------------------------------+----------+ + | | | ^ | | + | | | |vback_porch | | + | | | v | | + +-------+----------#######################################----------+ + | | # ^ # | + | | # | # | + | hsync | hback # | # hfront | + | len | porch # | hactive # porch | + |<----->|<-------->#<-------+--------------------------->#<-------->| + | | # | # | + | | # |vactive # | + | | # | # | + | | # v # | + +-------+----------#######################################----------+ + | | | ^ | | + | | | |vfront_porch | | + | | | v | | + +-------+----------+-------------------------------------+----------+ The following is the panel timings shown with time on the x-axis. -- cgit v1.2.3 From 92ba67347a3a6caa1843c81b0b48c48ccadf6915 Mon Sep 17 00:00:00 2001 From: Sui Jingfeng Date: Tue, 4 Apr 2023 12:01:01 +0800 Subject: video/aperture: Fix typos in comments EFI FB, VESA FB or VGA FB etc belong to firmware based framebuffer driver. Signed-off-by: Sui Jingfeng Reviewed-by: Javier Martinez Canillas Signed-off-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20230404040101.2165600-1-suijingfeng@loongson.cn --- drivers/video/aperture.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/video/aperture.c b/drivers/video/aperture.c index 41e77de1ea82..b009468ffdff 100644 --- a/drivers/video/aperture.c +++ b/drivers/video/aperture.c @@ -20,7 +20,7 @@ * driver can be active at any given time. Many systems load a generic * graphics drivers, such as EFI-GOP or VESA, early during the boot process. * During later boot stages, they replace the generic driver with a dedicated, - * hardware-specific driver. To take over the device the dedicated driver + * hardware-specific driver. To take over the device, the dedicated driver * first has to remove the generic driver. Aperture functions manage * ownership of framebuffer memory and hand-over between drivers. * @@ -76,7 +76,7 @@ * generic EFI or VESA drivers, have to register themselves as owners of their * framebuffer apertures. Ownership of the framebuffer memory is achieved * by calling devm_aperture_acquire_for_platform_device(). If successful, the - * driveris the owner of the framebuffer range. The function fails if the + * driver is the owner of the framebuffer range. The function fails if the * framebuffer is already owned by another driver. See below for an example. * * .. code-block:: c @@ -126,7 +126,7 @@ * et al for the registered framebuffer range, the aperture helpers call * platform_device_unregister() and the generic driver unloads itself. The * generic driver also has to provide a remove function to make this work. - * Once hot unplugged fro mhardware, it may not access the device's + * Once hot unplugged from hardware, it may not access the device's * registers, framebuffer memory, ROM, etc afterwards. */ @@ -203,7 +203,7 @@ static void aperture_detach_platform_device(struct device *dev) /* * Remove the device from the device hierarchy. This is the right thing - * to do for firmware-based DRM drivers, such as EFI, VESA or VGA. After + * to do for firmware-based fb drivers, such as EFI, VESA or VGA. After * the new driver takes over the hardware, the firmware device's state * will be lost. * -- cgit v1.2.3 From c1ebead36099deb85384f6fb262fe619a04cee73 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 11 Jan 2023 16:41:02 +0100 Subject: drm/ast: Use drm_aperture_remove_conflicting_pci_framebuffers It's just open coded and matches. Note that Thomas said that his version apparently failed for some reason, but hey maybe we should try again. Signed-off-by: Daniel Vetter Cc: Dave Airlie Cc: Thomas Zimmermann Cc: Javier Martinez Canillas Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Tested-by: Thomas Zimmmermann Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20230111154112.90575-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/ast/ast_drv.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c index 3a7af6d5aa79..e1224ef4ad83 100644 --- a/drivers/gpu/drm/ast/ast_drv.c +++ b/drivers/gpu/drm/ast/ast_drv.c @@ -89,27 +89,13 @@ static const struct pci_device_id ast_pciidlist[] = { MODULE_DEVICE_TABLE(pci, ast_pciidlist); -static int ast_remove_conflicting_framebuffers(struct pci_dev *pdev) -{ - bool primary = false; - resource_size_t base, size; - - base = pci_resource_start(pdev, 0); - size = pci_resource_len(pdev, 0); -#ifdef CONFIG_X86 - primary = pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW; -#endif - - return drm_aperture_remove_conflicting_framebuffers(base, size, primary, &ast_driver); -} - static int ast_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct ast_device *ast; struct drm_device *dev; int ret; - ret = ast_remove_conflicting_framebuffers(pdev); + ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &ast_driver); if (ret) return ret; -- cgit v1.2.3 From deddc9127beb0409559aecf6f76aab10eb844579 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 11 Jan 2023 16:41:07 +0100 Subject: staging/lynxfb: Use pci aperture helper It exists! Note that since this is an exact copy, there shouldn't be any functional difference here. Signed-off-by: Daniel Vetter Cc: Sudip Mukherjee Cc: Teddy Wang Cc: linux-fbdev@vger.kernel.org Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20230111154112.90575-6-daniel.vetter@ffwll.ch --- drivers/staging/sm750fb/sm750.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/staging/sm750fb/sm750.c b/drivers/staging/sm750fb/sm750.c index effc7fcc3703..22ace3168723 100644 --- a/drivers/staging/sm750fb/sm750.c +++ b/drivers/staging/sm750fb/sm750.c @@ -989,20 +989,6 @@ release_fb: return err; } -static int lynxfb_kick_out_firmware_fb(struct pci_dev *pdev) -{ - resource_size_t base = pci_resource_start(pdev, 0); - resource_size_t size = pci_resource_len(pdev, 0); - bool primary = false; - -#ifdef CONFIG_X86 - primary = pdev->resource[PCI_ROM_RESOURCE].flags & - IORESOURCE_ROM_SHADOW; -#endif - - return aperture_remove_conflicting_devices(base, size, primary, "sm750_fb1"); -} - static int lynxfb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -1011,7 +997,7 @@ static int lynxfb_pci_probe(struct pci_dev *pdev, int fbidx; int err; - err = lynxfb_kick_out_firmware_fb(pdev); + err = aperture_remove_conflicting_pci_devices(pdev, "sm750_fb1"); if (err) return err; -- cgit v1.2.3 From 9b539c4d1b921bc9c8c578d4d50f0a7e7874d384 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 11 Jan 2023 16:41:08 +0100 Subject: fbdev/radeon: use pci aperture helpers It's not exactly the same since the open coded version doesn't set primary correctly. But that's a bugfix, so shouldn't hurt really. Signed-off-by: Daniel Vetter Cc: Benjamin Herrenschmidt Cc: linux-fbdev@vger.kernel.org Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20230111154112.90575-7-daniel.vetter@ffwll.ch --- drivers/video/fbdev/aty/radeon_base.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/video/fbdev/aty/radeon_base.c b/drivers/video/fbdev/aty/radeon_base.c index 657064227de8..972c4bbedfa3 100644 --- a/drivers/video/fbdev/aty/radeon_base.c +++ b/drivers/video/fbdev/aty/radeon_base.c @@ -2238,14 +2238,6 @@ static const struct bin_attribute edid2_attr = { .read = radeon_show_edid2, }; -static int radeon_kick_out_firmware_fb(struct pci_dev *pdev) -{ - resource_size_t base = pci_resource_start(pdev, 0); - resource_size_t size = pci_resource_len(pdev, 0); - - return aperture_remove_conflicting_devices(base, size, false, KBUILD_MODNAME); -} - static int radeonfb_pci_register(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -2296,7 +2288,7 @@ static int radeonfb_pci_register(struct pci_dev *pdev, rinfo->fb_base_phys = pci_resource_start (pdev, 0); rinfo->mmio_base_phys = pci_resource_start (pdev, 2); - ret = radeon_kick_out_firmware_fb(pdev); + ret = aperture_remove_conflicting_pci_devices(pdev, KBUILD_MODNAME); if (ret) goto err_release_fb; -- cgit v1.2.3 From 6f1ccbf07453eb1ee6bb24d6b531b88dd44ad229 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 3 Apr 2023 09:03:14 -0700 Subject: drm/vblank: Fix for drivers that do not drm_vblank_init() This should fix a crash that was reported on ast (and possibly other drivers which do not initialize vblank). fbcon: Taking over console Unable to handle kernel NULL pointer dereference at virtual address 0000000000000074 Mem abort info: ESR = 0x0000000096000004 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x04: level 0 translation fault Data abort info: ISV = 0, ISS = 0x00000004 CM = 0, WnR = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=0000080009d16000 [0000000000000074] pgd=0000000000000000, p4d=0000000000000000 Internal error: Oops: 0000000096000004 [#1] SMP Modules linked in: ip6table_nat tun nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 rfkill ip_set nf_tables nfnetlink qrtr sunrpc binfmt_misc vfat fat xfs snd_usb_audio snd_hwdep snd_usbmidi_lib snd_seq snd_pcm snd_rawmidi snd_timer snd_seq_device snd soundcore joydev mc ipmi_ssif ipmi_devintf ipmi_msghandler arm_spe_pmu arm_cmn arm_dsu_pmu arm_dmc620_pmu cppc_cpufreq loop zram crct10dif_ce polyval_ce nvme polyval_generic ghash_ce sbsa_gwdt igb nvme_core ast nvme_common i2c_algo_bit xgene_hwmon gpio_dwapb scsi_dh_rdac scsi_dh_emc scsi_dh_alua ip6_tables ip_tables dm_multipath fuse CPU: 12 PID: 469 Comm: kworker/12:1 Not tainted 6.3.0-rc2-00008-gd39e48ca80c0 #1 Hardware name: ADLINK AVA Developer Platform/AVA Developer Platform, BIOS TianoCore 2.04.100.07 (SYS: 2.06.20220308) 09/08/2022 Workqueue: events fbcon_register_existing_fbs pstate: 20400009 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : drm_crtc_next_vblank_start+0x2c/0x98 lr : drm_atomic_helper_wait_for_fences+0x90/0x240 sp : ffff80000d583960 x29: ffff80000d583960 x28: ffff07ff8fc187b0 x27: 0000000000000000 x26: ffff07ff99c08c00 x25: 0000000000000038 x24: ffff07ff99c0c000 x23: 0000000000000001 x22: 0000000000000038 x21: 0000000000000000 x20: ffff07ff9640a280 x19: 0000000000000000 x18: ffffffffffffffff x17: 0000000000000000 x16: ffffb24d2eece1c0 x15: 0000003038303178 x14: 3032393100000048 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000000 x9 : ffffb24d2eeeaca0 x8 : ffff80000d583628 x7 : 0000080077783000 x6 : 0000000000000000 x5 : ffff80000d584000 x4 : ffff07ff99c0c000 x3 : 0000000000000130 x2 : 0000000000000000 x1 : ffff80000d5839c0 x0 : ffff07ff99c0cc08 Call trace: drm_crtc_next_vblank_start+0x2c/0x98 drm_atomic_helper_wait_for_fences+0x90/0x240 drm_atomic_helper_commit+0xb0/0x188 drm_atomic_commit+0xb0/0xf0 drm_client_modeset_commit_atomic+0x218/0x280 drm_client_modeset_commit_locked+0x64/0x1a0 drm_client_modeset_commit+0x38/0x68 __drm_fb_helper_restore_fbdev_mode_unlocked+0xb0/0xf8 drm_fb_helper_set_par+0x44/0x88 fbcon_init+0x1e0/0x4a8 visual_init+0xbc/0x118 do_bind_con_driver.isra.0+0x194/0x3a0 do_take_over_console+0x50/0x70 do_fbcon_takeover+0x74/0xf8 do_fb_registered+0x13c/0x158 fbcon_register_existing_fbs+0x78/0xc0 process_one_work+0x1ec/0x478 worker_thread+0x74/0x418 kthread+0xec/0x100 ret_from_fork+0x10/0x20 Code: f9400004 b9409013 f940a082 9ba30a73 (b9407662) ---[ end trace 0000000000000000 ]--- v2: Use drm_dev_has_vblank() Reported-by: Nathan Chancellor Fixes: d39e48ca80c0 ("drm/atomic-helper: Set fence deadline for vblank") Signed-off-by: Rob Clark Reviewed-by: Thomas Zimmermann Tested-by: Nathan Chancellor Tested-by: Geert Uytterhoeven Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20230403160314.1210533-1-robdclark@gmail.com --- drivers/gpu/drm/drm_vblank.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c index 299fa2a19a90..877e2067534f 100644 --- a/drivers/gpu/drm/drm_vblank.c +++ b/drivers/gpu/drm/drm_vblank.c @@ -996,10 +996,16 @@ EXPORT_SYMBOL(drm_crtc_vblank_count_and_time); int drm_crtc_next_vblank_start(struct drm_crtc *crtc, ktime_t *vblanktime) { unsigned int pipe = drm_crtc_index(crtc); - struct drm_vblank_crtc *vblank = &crtc->dev->vblank[pipe]; - struct drm_display_mode *mode = &vblank->hwmode; + struct drm_vblank_crtc *vblank; + struct drm_display_mode *mode; u64 vblank_start; + if (!drm_dev_has_vblank(crtc->dev)) + return -EINVAL; + + vblank = &crtc->dev->vblank[pipe]; + mode = &vblank->hwmode; + if (!vblank->framedur_ns || !vblank->linedur_ns) return -EINVAL; -- cgit v1.2.3 From 7e4d0b09a9f268dad595c3ea92d692a14389131a Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Tue, 4 Apr 2023 08:25:59 +0800 Subject: Revert "drm/lima: add show_fdinfo for drm usage stats" This reverts commit 4a66f3da99dcb4dcbd28544110636b50adfb0f0d. This is due to the depend commit has been reverted on upstream: commit baad10973fdb ("Revert "drm/scheduler: track GPU active time per entity"") Acked-by: Emil Velikov Signed-off-by: Qiang Yu Link: https://patchwork.freedesktop.org/patch/msgid/20230404002601.24136-2-yq882255@163.com --- drivers/gpu/drm/lima/lima_drv.c | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c index 3420875d6fc6..f456a471216b 100644 --- a/drivers/gpu/drm/lima/lima_drv.c +++ b/drivers/gpu/drm/lima/lima_drv.c @@ -261,36 +261,7 @@ static const struct drm_ioctl_desc lima_drm_driver_ioctls[] = { DRM_IOCTL_DEF_DRV(LIMA_CTX_FREE, lima_ioctl_ctx_free, DRM_RENDER_ALLOW), }; -static void lima_drm_driver_show_fdinfo(struct seq_file *m, struct file *filp) -{ - struct drm_file *file = filp->private_data; - struct drm_device *dev = file->minor->dev; - struct lima_device *ldev = to_lima_dev(dev); - struct lima_drm_priv *priv = file->driver_priv; - struct lima_ctx_mgr *ctx_mgr = &priv->ctx_mgr; - u64 usage[lima_pipe_num]; - - lima_ctx_mgr_usage(ctx_mgr, usage); - - /* - * For a description of the text output format used here, see - * Documentation/gpu/drm-usage-stats.rst. - */ - seq_printf(m, "drm-driver:\t%s\n", dev->driver->name); - seq_printf(m, "drm-client-id:\t%u\n", priv->id); - for (int i = 0; i < lima_pipe_num; i++) { - struct lima_sched_pipe *pipe = &ldev->pipe[i]; - struct drm_gpu_scheduler *sched = &pipe->base; - - seq_printf(m, "drm-engine-%s:\t%llu ns\n", sched->name, usage[i]); - } -} - -static const struct file_operations lima_drm_driver_fops = { - .owner = THIS_MODULE, - DRM_GEM_FOPS, - .show_fdinfo = lima_drm_driver_show_fdinfo, -}; +DEFINE_DRM_GEM_FOPS(lima_drm_driver_fops); /* * Changelog: -- cgit v1.2.3 From 4ad17bf571730475bf62290399d52b26ece8228c Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Tue, 4 Apr 2023 08:26:00 +0800 Subject: Revert "drm/lima: allocate unique id per drm_file" This reverts commit 87767de835edf527b879a363d518c33da68adb81. This is due to the depend commit has been reverted on upstream: commit baad10973fdb ("Revert "drm/scheduler: track GPU active time per entity"") Acked-by: Emil Velikov Signed-off-by: Qiang Yu Link: https://patchwork.freedesktop.org/patch/msgid/20230404002601.24136-3-yq882255@163.com --- drivers/gpu/drm/lima/lima_device.h | 3 --- drivers/gpu/drm/lima/lima_drv.c | 12 ------------ drivers/gpu/drm/lima/lima_drv.h | 1 - 3 files changed, 16 deletions(-) diff --git a/drivers/gpu/drm/lima/lima_device.h b/drivers/gpu/drm/lima/lima_device.h index 71b2db60d161..41b9d7b4bcc7 100644 --- a/drivers/gpu/drm/lima/lima_device.h +++ b/drivers/gpu/drm/lima/lima_device.h @@ -106,9 +106,6 @@ struct lima_device { struct lima_dump_head dump; struct list_head error_task_list; struct mutex error_task_list_lock; - - struct xarray active_contexts; - u32 next_context_id; }; static inline struct lima_device * diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c index f456a471216b..39cab4a55f57 100644 --- a/drivers/gpu/drm/lima/lima_drv.c +++ b/drivers/gpu/drm/lima/lima_drv.c @@ -218,11 +218,6 @@ static int lima_drm_driver_open(struct drm_device *dev, struct drm_file *file) if (!priv) return -ENOMEM; - err = xa_alloc_cyclic(&ldev->active_contexts, &priv->id, priv, - xa_limit_32b, &ldev->next_context_id, GFP_KERNEL); - if (err < 0) - goto err_out0; - priv->vm = lima_vm_create(ldev); if (!priv->vm) { err = -ENOMEM; @@ -242,9 +237,6 @@ err_out0: static void lima_drm_driver_postclose(struct drm_device *dev, struct drm_file *file) { struct lima_drm_priv *priv = file->driver_priv; - struct lima_device *ldev = to_lima_dev(dev); - - xa_erase(&ldev->active_contexts, priv->id); lima_ctx_mgr_fini(&priv->ctx_mgr); lima_vm_put(priv->vm); @@ -396,8 +388,6 @@ static int lima_pdev_probe(struct platform_device *pdev) ldev->dev = &pdev->dev; ldev->id = (enum lima_gpu_id)of_device_get_match_data(&pdev->dev); - xa_init_flags(&ldev->active_contexts, XA_FLAGS_ALLOC); - platform_set_drvdata(pdev, ldev); /* Allocate and initialize the DRM device. */ @@ -456,8 +446,6 @@ static int lima_pdev_remove(struct platform_device *pdev) struct lima_device *ldev = platform_get_drvdata(pdev); struct drm_device *ddev = ldev->ddev; - xa_destroy(&ldev->active_contexts); - sysfs_remove_bin_file(&ldev->dev->kobj, &lima_error_state_attr); drm_dev_unregister(ddev); diff --git a/drivers/gpu/drm/lima/lima_drv.h b/drivers/gpu/drm/lima/lima_drv.h index e49b7ab651d0..c738d288547b 100644 --- a/drivers/gpu/drm/lima/lima_drv.h +++ b/drivers/gpu/drm/lima/lima_drv.h @@ -20,7 +20,6 @@ struct lima_sched_task; struct drm_lima_gem_submit_bo; struct lima_drm_priv { - int id; struct lima_vm *vm; struct lima_ctx_mgr ctx_mgr; }; -- cgit v1.2.3 From 8678c8b305bb0de99eb05d6dae74e04ca46827a4 Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Tue, 4 Apr 2023 08:26:01 +0800 Subject: Revert "drm/lima: add usage counting method to ctx_mgr" This reverts commit bccafec957a5c4b22ac29e53a39e82d0a0008348. This is due to the depend commit has been reverted on upstream: commit baad10973fdb ("Revert "drm/scheduler: track GPU active time per entity"") Acked-by: Emil Velikov Signed-off-by: Qiang Yu Link: https://patchwork.freedesktop.org/patch/msgid/20230404002601.24136-4-yq882255@163.com --- drivers/gpu/drm/lima/lima_ctx.c | 30 +----------------------------- drivers/gpu/drm/lima/lima_ctx.h | 3 --- 2 files changed, 1 insertion(+), 32 deletions(-) diff --git a/drivers/gpu/drm/lima/lima_ctx.c b/drivers/gpu/drm/lima/lima_ctx.c index e008e586fad0..891d5cd5019a 100644 --- a/drivers/gpu/drm/lima/lima_ctx.c +++ b/drivers/gpu/drm/lima/lima_ctx.c @@ -15,7 +15,6 @@ int lima_ctx_create(struct lima_device *dev, struct lima_ctx_mgr *mgr, u32 *id) if (!ctx) return -ENOMEM; ctx->dev = dev; - ctx->mgr = mgr; kref_init(&ctx->refcnt); for (i = 0; i < lima_pipe_num; i++) { @@ -43,17 +42,10 @@ err_out0: static void lima_ctx_do_release(struct kref *ref) { struct lima_ctx *ctx = container_of(ref, struct lima_ctx, refcnt); - struct lima_ctx_mgr *mgr = ctx->mgr; int i; - for (i = 0; i < lima_pipe_num; i++) { - struct lima_sched_context *context = &ctx->context[i]; - struct drm_sched_entity *entity = &context->base; - - mgr->elapsed_ns[i] += entity->elapsed_ns; - + for (i = 0; i < lima_pipe_num; i++) lima_sched_context_fini(ctx->dev->pipe + i, ctx->context + i); - } kfree(ctx); } @@ -107,23 +99,3 @@ void lima_ctx_mgr_fini(struct lima_ctx_mgr *mgr) xa_destroy(&mgr->handles); mutex_destroy(&mgr->lock); } - -void lima_ctx_mgr_usage(struct lima_ctx_mgr *mgr, u64 usage[lima_pipe_num]) -{ - struct lima_ctx *ctx; - unsigned long id; - - for (int i = 0; i < lima_pipe_num; i++) - usage[i] = mgr->elapsed_ns[i]; - - mutex_lock(&mgr->lock); - xa_for_each(&mgr->handles, id, ctx) { - for (int i = 0; i < lima_pipe_num; i++) { - struct lima_sched_context *context = &ctx->context[i]; - struct drm_sched_entity *entity = &context->base; - - usage[i] += entity->elapsed_ns; - } - } - mutex_unlock(&mgr->lock); -} diff --git a/drivers/gpu/drm/lima/lima_ctx.h b/drivers/gpu/drm/lima/lima_ctx.h index 6068863880eb..74e2be09090f 100644 --- a/drivers/gpu/drm/lima/lima_ctx.h +++ b/drivers/gpu/drm/lima/lima_ctx.h @@ -12,7 +12,6 @@ struct lima_ctx { struct kref refcnt; struct lima_device *dev; - struct lima_ctx_mgr *mgr; struct lima_sched_context context[lima_pipe_num]; atomic_t guilty; @@ -24,7 +23,6 @@ struct lima_ctx { struct lima_ctx_mgr { struct mutex lock; struct xarray handles; - u64 elapsed_ns[lima_pipe_num]; }; int lima_ctx_create(struct lima_device *dev, struct lima_ctx_mgr *mgr, u32 *id); @@ -33,6 +31,5 @@ struct lima_ctx *lima_ctx_get(struct lima_ctx_mgr *mgr, u32 id); void lima_ctx_put(struct lima_ctx *ctx); void lima_ctx_mgr_init(struct lima_ctx_mgr *mgr); void lima_ctx_mgr_fini(struct lima_ctx_mgr *mgr); -void lima_ctx_mgr_usage(struct lima_ctx_mgr *mgr, u64 usage[lima_pipe_num]); #endif -- cgit v1.2.3 From cb949ce504e829193234e26cb3042bb448465d52 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Mon, 3 Apr 2023 14:15:45 +0200 Subject: accel/ivpu: Remove D3hot delay for Meteorlake VPU on MTL has hardware optimizations and does not require 10ms D0 - D3hot transition delay imposed by PCI specification (PCIe r6.0, sec 5.9.) . The delay removal is traditionally done by adding PCI ID to quirk_remove_d3hot_delay() in drivers/pci/quirks.c . But since we do not need that optimization before driver probe and we can better specify in the ivpu driver on what (future) hardware use the optimization, we do not use quirk_remove_d3hot_delay() for that. Signed-off-by: Karol Wachowski Signed-off-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20230403121545.2995279-1-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 231f29bb5025..eb6405f9bf6b 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -433,6 +433,10 @@ static int ivpu_pci_init(struct ivpu_device *vdev) /* Clear any pending errors */ pcie_capability_clear_word(pdev, PCI_EXP_DEVSTA, 0x3f); + /* VPU MTL does not require PCI spec 10m D3hot delay */ + if (ivpu_is_mtl(vdev)) + pdev->d3hot_delay = 0; + ret = pcim_enable_device(pdev); if (ret) { ivpu_err(vdev, "Failed to enable PCI device: %d\n", ret); -- cgit v1.2.3 From ce7498acaa88ac3db5385dad2317c03006c49837 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 5 Apr 2023 09:52:23 +0200 Subject: drm/bridge: ti-sn65dsi83: use dev_err_probe if host attach failed There might be cases where the host attach is deferred, use dev_err_probe to add more detailed information to /sys/kernel/debug/devices_deferred. Signed-off-by: Alexander Stein Reviewed-by: Laurent Pinchart Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20230405075223.579461-1-alexander.stein@ew.tq-group.com --- drivers/gpu/drm/bridge/ti-sn65dsi83.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c b/drivers/gpu/drm/bridge/ti-sn65dsi83.c index 91ecfbe45bf9..988e537ab884 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c @@ -698,8 +698,10 @@ static int sn65dsi83_probe(struct i2c_client *client) drm_bridge_add(&ctx->bridge); ret = sn65dsi83_host_attach(ctx); - if (ret) + if (ret) { + dev_err_probe(dev, ret, "failed to attach DSI host\n"); goto err_remove_bridge; + } return 0; -- cgit v1.2.3 From 8cc0b604f2345c304b7ddc828b880f4f3f21816a Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 5 Apr 2023 10:10:56 +0200 Subject: drm: bridge: ldb: add missing \n in dev_warn() string dev_warn() and similar require a training \n. Signed-off-by: Luca Ceresoli Reviewed-by: Robert Foss Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20230405081058.2347130-1-luca.ceresoli@bootlin.com --- drivers/gpu/drm/bridge/fsl-ldb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/fsl-ldb.c b/drivers/gpu/drm/bridge/fsl-ldb.c index 450b352914f4..81b84a2ee6bb 100644 --- a/drivers/gpu/drm/bridge/fsl-ldb.c +++ b/drivers/gpu/drm/bridge/fsl-ldb.c @@ -170,7 +170,7 @@ static void fsl_ldb_atomic_enable(struct drm_bridge *bridge, configured_link_freq = clk_get_rate(fsl_ldb->clk); if (configured_link_freq != requested_link_freq) - dev_warn(fsl_ldb->dev, "Configured LDB clock (%lu Hz) does not match requested LVDS clock: %lu Hz", + dev_warn(fsl_ldb->dev, "Configured LDB clock (%lu Hz) does not match requested LVDS clock: %lu Hz\n", configured_link_freq, requested_link_freq); -- cgit v1.2.3 From e09220f42b5c28dae51a3cf2810afcb4fad9909d Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 5 Apr 2023 10:10:57 +0200 Subject: drm: bridge: ldb: add support for using channel 1 only The LDB driver currently checks whether dual mode is used, otherwise it assumes only channel 0 is in use. Add support for using only channel 1. In device tree terms, this means linking port 2 only. Doing this cleanly requires changing the logic of the probe functions from this: 1. use of_graph_get_remote_node() on port 1 to find the panel 2. use drm_of_lvds_get_dual_link_pixel_order() to detect dual mode to this: 1. use of_graph_get_remote_node() twice to find remote ports 2. reuse the result of the above to know whether each channel is enabled and to find the panel 3. if (both channels as enabled) use drm_of_lvds_get_dual_link_pixel_order() to detect dual mode Also add a dev_dbg() to log the detected mode and log an error in case no panel was found (no channel enabled). Signed-off-by: Luca Ceresoli Reviewed-by: Marek Vasut Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20230405081058.2347130-2-luca.ceresoli@bootlin.com --- drivers/gpu/drm/bridge/fsl-ldb.c | 101 +++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/bridge/fsl-ldb.c b/drivers/gpu/drm/bridge/fsl-ldb.c index 81b84a2ee6bb..682623369498 100644 --- a/drivers/gpu/drm/bridge/fsl-ldb.c +++ b/drivers/gpu/drm/bridge/fsl-ldb.c @@ -84,10 +84,16 @@ struct fsl_ldb { struct drm_bridge *panel_bridge; struct clk *clk; struct regmap *regmap; - bool lvds_dual_link; const struct fsl_ldb_devdata *devdata; + bool ch0_enabled; + bool ch1_enabled; }; +static bool fsl_ldb_is_dual(const struct fsl_ldb *fsl_ldb) +{ + return (fsl_ldb->ch0_enabled && fsl_ldb->ch1_enabled); +} + static inline struct fsl_ldb *to_fsl_ldb(struct drm_bridge *bridge) { return container_of(bridge, struct fsl_ldb, bridge); @@ -95,7 +101,7 @@ static inline struct fsl_ldb *to_fsl_ldb(struct drm_bridge *bridge) static unsigned long fsl_ldb_link_frequency(struct fsl_ldb *fsl_ldb, int clock) { - if (fsl_ldb->lvds_dual_link) + if (fsl_ldb_is_dual(fsl_ldb)) return clock * 3500; else return clock * 7000; @@ -177,28 +183,21 @@ static void fsl_ldb_atomic_enable(struct drm_bridge *bridge, clk_prepare_enable(fsl_ldb->clk); /* Program LDB_CTRL */ - reg = LDB_CTRL_CH0_ENABLE; + reg = (fsl_ldb->ch0_enabled ? LDB_CTRL_CH0_ENABLE : 0) | + (fsl_ldb->ch1_enabled ? LDB_CTRL_CH1_ENABLE : 0) | + (fsl_ldb_is_dual(fsl_ldb) ? LDB_CTRL_SPLIT_MODE : 0); - if (fsl_ldb->lvds_dual_link) - reg |= LDB_CTRL_CH1_ENABLE | LDB_CTRL_SPLIT_MODE; + if (lvds_format_24bpp) + reg |= (fsl_ldb->ch0_enabled ? LDB_CTRL_CH0_DATA_WIDTH : 0) | + (fsl_ldb->ch1_enabled ? LDB_CTRL_CH1_DATA_WIDTH : 0); - if (lvds_format_24bpp) { - reg |= LDB_CTRL_CH0_DATA_WIDTH; - if (fsl_ldb->lvds_dual_link) - reg |= LDB_CTRL_CH1_DATA_WIDTH; - } + if (lvds_format_jeida) + reg |= (fsl_ldb->ch0_enabled ? LDB_CTRL_CH0_BIT_MAPPING : 0) | + (fsl_ldb->ch1_enabled ? LDB_CTRL_CH1_BIT_MAPPING : 0); - if (lvds_format_jeida) { - reg |= LDB_CTRL_CH0_BIT_MAPPING; - if (fsl_ldb->lvds_dual_link) - reg |= LDB_CTRL_CH1_BIT_MAPPING; - } - - if (mode->flags & DRM_MODE_FLAG_PVSYNC) { - reg |= LDB_CTRL_DI0_VSYNC_POLARITY; - if (fsl_ldb->lvds_dual_link) - reg |= LDB_CTRL_DI1_VSYNC_POLARITY; - } + if (mode->flags & DRM_MODE_FLAG_PVSYNC) + reg |= (fsl_ldb->ch0_enabled ? LDB_CTRL_DI0_VSYNC_POLARITY : 0) | + (fsl_ldb->ch1_enabled ? LDB_CTRL_DI1_VSYNC_POLARITY : 0); regmap_write(fsl_ldb->regmap, fsl_ldb->devdata->ldb_ctrl, reg); @@ -210,9 +209,8 @@ static void fsl_ldb_atomic_enable(struct drm_bridge *bridge, /* Wait for VBG to stabilize. */ usleep_range(15, 20); - reg |= LVDS_CTRL_CH0_EN; - if (fsl_ldb->lvds_dual_link) - reg |= LVDS_CTRL_CH1_EN; + reg |= (fsl_ldb->ch0_enabled ? LVDS_CTRL_CH0_EN : 0) | + (fsl_ldb->ch1_enabled ? LVDS_CTRL_CH1_EN : 0); regmap_write(fsl_ldb->regmap, fsl_ldb->devdata->lvds_ctrl, reg); } @@ -265,7 +263,7 @@ fsl_ldb_mode_valid(struct drm_bridge *bridge, { struct fsl_ldb *fsl_ldb = to_fsl_ldb(bridge); - if (mode->clock > (fsl_ldb->lvds_dual_link ? 160000 : 80000)) + if (mode->clock > (fsl_ldb_is_dual(fsl_ldb) ? 160000 : 80000)) return MODE_CLOCK_HIGH; return MODE_OK; @@ -286,7 +284,7 @@ static int fsl_ldb_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct device_node *panel_node; - struct device_node *port1, *port2; + struct device_node *remote1, *remote2; struct drm_panel *panel; struct fsl_ldb *fsl_ldb; int dual_link; @@ -311,10 +309,23 @@ static int fsl_ldb_probe(struct platform_device *pdev) if (IS_ERR(fsl_ldb->regmap)) return PTR_ERR(fsl_ldb->regmap); - /* Locate the panel DT node. */ - panel_node = of_graph_get_remote_node(dev->of_node, 1, 0); - if (!panel_node) - return -ENXIO; + /* Locate the remote ports and the panel node */ + remote1 = of_graph_get_remote_node(dev->of_node, 1, 0); + remote2 = of_graph_get_remote_node(dev->of_node, 2, 0); + fsl_ldb->ch0_enabled = (remote1 != NULL); + fsl_ldb->ch1_enabled = (remote2 != NULL); + panel_node = of_node_get(remote1 ? remote1 : remote2); + of_node_put(remote1); + of_node_put(remote2); + + if (!fsl_ldb->ch0_enabled && !fsl_ldb->ch1_enabled) { + of_node_put(panel_node); + return dev_err_probe(dev, -ENXIO, "No panel node found"); + } + + dev_dbg(dev, "Using %s\n", + fsl_ldb_is_dual(fsl_ldb) ? "dual-link mode" : + fsl_ldb->ch0_enabled ? "channel 0" : "channel 1"); panel = of_drm_find_panel(panel_node); of_node_put(panel_node); @@ -325,20 +336,26 @@ static int fsl_ldb_probe(struct platform_device *pdev) if (IS_ERR(fsl_ldb->panel_bridge)) return PTR_ERR(fsl_ldb->panel_bridge); - /* Determine whether this is dual-link configuration */ - port1 = of_graph_get_port_by_id(dev->of_node, 1); - port2 = of_graph_get_port_by_id(dev->of_node, 2); - dual_link = drm_of_lvds_get_dual_link_pixel_order(port1, port2); - of_node_put(port1); - of_node_put(port2); - if (dual_link == DRM_LVDS_DUAL_LINK_EVEN_ODD_PIXELS) { - dev_err(dev, "LVDS channel pixel swap not supported.\n"); - return -EINVAL; - } + if (fsl_ldb_is_dual(fsl_ldb)) { + struct device_node *port1, *port2; - if (dual_link == DRM_LVDS_DUAL_LINK_ODD_EVEN_PIXELS) - fsl_ldb->lvds_dual_link = true; + port1 = of_graph_get_port_by_id(dev->of_node, 1); + port2 = of_graph_get_port_by_id(dev->of_node, 2); + dual_link = drm_of_lvds_get_dual_link_pixel_order(port1, port2); + of_node_put(port1); + of_node_put(port2); + + if (dual_link < 0) + return dev_err_probe(dev, dual_link, + "Error getting dual link configuration\n"); + + /* Only DRM_LVDS_DUAL_LINK_ODD_EVEN_PIXELS is supported */ + if (dual_link == DRM_LVDS_DUAL_LINK_EVEN_ODD_PIXELS) { + dev_err(dev, "LVDS channel pixel swap not supported.\n"); + return -EINVAL; + } + } platform_set_drvdata(pdev, fsl_ldb); -- cgit v1.2.3 From ca161b259cc84fe1f4a2ce4c73c3832cf6f713f1 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 3 Apr 2023 21:02:42 +0200 Subject: drm/bridge: ti-sn65dsi83: Do not generate HFP/HBP/HSA and EOT packet Do not generate the HS front and back porch gaps, the HSA gap and EOT packet, as per "SN65DSI83 datasheet SLLSEC1I - SEPTEMBER 2012 - REVISED OCTOBER 2020", page 22, these packets are not required. This makes the TI SN65DSI83 bridge work with Samsung DSIM on i.MX8MN. Signed-off-by: Marek Vasut Reviewed-by: Laurent Pinchart Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20230403190242.224490-1-marex@denx.de --- drivers/gpu/drm/bridge/ti-sn65dsi83.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c b/drivers/gpu/drm/bridge/ti-sn65dsi83.c index 988e537ab884..75286c9afbb9 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c @@ -642,7 +642,9 @@ static int sn65dsi83_host_attach(struct sn65dsi83 *ctx) dsi->lanes = dsi_lanes; dsi->format = MIPI_DSI_FMT_RGB888; - dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST; + dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | + MIPI_DSI_MODE_VIDEO_NO_HFP | MIPI_DSI_MODE_VIDEO_NO_HBP | + MIPI_DSI_MODE_VIDEO_NO_HSA | MIPI_DSI_MODE_NO_EOT_PACKET; ret = devm_mipi_dsi_attach(dev, dsi); if (ret < 0) { -- cgit v1.2.3 From 4224011374d1b4228a59b73149320c61eeb700b3 Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Fri, 31 Mar 2023 11:02:04 +0800 Subject: drm/bridge: ps8640: Use constant sleep time for polling hpd The default hpd_wait_us in panel_edp.c is 2 seconds. This makes the sleep time in the polling of _ps8640_wait_hpd_asserted become 200ms. Change it to a constant 20ms to speed up the function. Signed-off-by: Pin-yen Lin Reviewed-by: Douglas Anderson Reviewed-by: Robert Foss Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20230331030204.1179524-1-treapking@chromium.org --- drivers/gpu/drm/bridge/parade-ps8640.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c index b823e55650b1..c3eb45179405 100644 --- a/drivers/gpu/drm/bridge/parade-ps8640.c +++ b/drivers/gpu/drm/bridge/parade-ps8640.c @@ -184,7 +184,7 @@ static int _ps8640_wait_hpd_asserted(struct ps8640 *ps_bridge, unsigned long wai * actually connected to GPIO9). */ ret = regmap_read_poll_timeout(map, PAGE2_GPIO_H, status, - status & PS_GPIO9, wait_us / 10, wait_us); + status & PS_GPIO9, 20000, wait_us); /* * The first time we see HPD go high after a reset we delay an extra -- cgit v1.2.3 From a80c882183e36b483734681c830a332add912186 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 5 Apr 2023 12:04:52 +0200 Subject: drm/panel-edp: Add AUO NE135FBM-N41 v8.1 panel entry Add a panel entry with delay_200_500_e50 for the AUO NE135FBM-N41 version 8.1, found on a number of ACER laptops, including the Swift 3 (SF313-52, SF313-53), Chromebook Spin 513 (CP513-2H) and others. Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20230405100452.44225-1-angelogioacchino.delregno@collabora.com --- drivers/gpu/drm/panel/panel-edp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c index 926906ca2304..e23ddab2126e 100644 --- a/drivers/gpu/drm/panel/panel-edp.c +++ b/drivers/gpu/drm/panel/panel-edp.c @@ -1879,6 +1879,7 @@ static const struct edp_panel_entry edp_panels[] = { EDP_PANEL_ENTRY('B', 'O', 'E', 0x07d1, &boe_nv133fhm_n61.delay, "NV133FHM-N61"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x082d, &boe_nv133fhm_n61.delay, "NV133FHM-N62"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x094b, &delay_200_500_e50, "NT116WHM-N21"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x095f, &delay_200_500_e50, "NE135FBM-N41 v8.1"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x098d, &boe_nv110wtm_n61.delay, "NV110WTM-N61"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x09dd, &delay_200_500_e50, "NT116WHM-N21"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0a5d, &delay_200_500_e50, "NV116WHM-N45"), -- cgit v1.2.3 From 77d08a2de6a43521f5a02848f11185b6f46af21c Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 5 Apr 2023 15:51:27 +0200 Subject: drm/bridge: ti-sn65dsi86: Allow GPIO operations to sleep There is no need to require non-sleeping GPIO access. Silence the WARN_ON() if GPIO is using e.g. I2C expanders. Signed-off-by: Alexander Stein Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20230405135127.769665-1-alexander.stein@ew.tq-group.com --- drivers/gpu/drm/bridge/ti-sn65dsi86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 1e26fa63845a..7a748785c545 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -363,7 +363,7 @@ static int __maybe_unused ti_sn65dsi86_resume(struct device *dev) /* td2: min 100 us after regulators before enabling the GPIO */ usleep_range(100, 110); - gpiod_set_value(pdata->enable_gpio, 1); + gpiod_set_value_cansleep(pdata->enable_gpio, 1); /* * If we have a reference clock we can enable communication w/ the @@ -386,7 +386,7 @@ static int __maybe_unused ti_sn65dsi86_suspend(struct device *dev) if (pdata->refclk) ti_sn65dsi86_disable_comms(pdata); - gpiod_set_value(pdata->enable_gpio, 0); + gpiod_set_value_cansleep(pdata->enable_gpio, 0); ret = regulator_bulk_disable(SN_REGULATOR_SUPPLY_NUM, pdata->supplies); if (ret) -- cgit v1.2.3 From 1935f0deb6116dd785ea64d8035eab0ff441255b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 4 Apr 2023 21:40:36 +0200 Subject: drm/fb-helper: set x/yres_virtual in drm_fb_helper_check_var Drivers are supposed to fix this up if needed if they don't outright reject it. Uncovered by 6c11df58fd1a ("fbmem: Check virtual screen sizes in fb_set_var()"). Reported-by: syzbot+20dcf81733d43ddff661@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=c5faf983bfa4a607de530cd3bb008888bf06cefc Cc: stable@vger.kernel.org # v5.4+ Cc: Daniel Vetter Cc: Javier Martinez Canillas Cc: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20230404194038.472803-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/drm_fb_helper.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 63ec95e86d0e..aa9efc651a19 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1589,6 +1589,9 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, return -EINVAL; } + var->xres_virtual = fb->width; + var->yres_virtual = fb->height; + /* * Workaround for SDL 1.2, which is known to be setting all pixel format * fields values to zero in some cases. We treat this situation as a -- cgit v1.2.3 From 36e239b5d509c1b564669e7ca3d016c444bbf808 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 4 Apr 2023 21:40:37 +0200 Subject: drm/fb-helper: drop redundant pixclock check from drm_fb_helper_set_par() The fb_check_var hook is supposed to validate all this stuff. Any errors from fb_set_par are considered driver/hw issues and resulting in dmesg warnings. Luckily we do fix up the pixclock already, so this is all fine. Signed-off-by: Daniel Vetter Reviewed-by: Javier Martinez Canillas Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20230404194038.472803-2-daniel.vetter@ffwll.ch --- drivers/gpu/drm/drm_fb_helper.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index aa9efc651a19..d4b4edde5adc 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1641,11 +1641,6 @@ int drm_fb_helper_set_par(struct fb_info *info) if (oops_in_progress) return -EBUSY; - if (var->pixclock != 0) { - drm_err(fb_helper->dev, "PIXEL CLOCK SET\n"); - return -EINVAL; - } - /* * Normally we want to make sure that a kms master takes precedence over * fbdev, to avoid fbdev flickering and occasionally stealing the -- cgit v1.2.3 From ee4cce0a8f03a3332ccf48ef8b420a65d02d1fcf Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 4 Apr 2023 21:40:38 +0200 Subject: drm/fb-helper: fix input validation gaps in check_var Apparently drivers need to check all this stuff themselves, which for most things makes sense I guess. And for everything else we luck out, because modern distros stopped supporting any other fbdev drivers than drm ones and I really don't want to argue anymore about who needs to check stuff. Therefore fixing all this just for drm fbdev emulation is good enough. Note that var->active is not set or validated. This is just control flow for fbmem.c and needs to be validated in there as needed. Reviewed-by: Javier Martinez Canillas Signed-off-by: Daniel Vetter Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20230404194038.472803-3-daniel.vetter@ffwll.ch --- drivers/gpu/drm/drm_fb_helper.c | 49 ++++++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index d4b4edde5adc..64458982be40 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1537,6 +1537,27 @@ static void drm_fb_helper_fill_pixel_fmt(struct fb_var_screeninfo *var, } } +static void __fill_var(struct fb_var_screeninfo *var, + struct drm_framebuffer *fb) +{ + int i; + + var->xres_virtual = fb->width; + var->yres_virtual = fb->height; + var->accel_flags = FB_ACCELF_TEXT; + var->bits_per_pixel = drm_format_info_bpp(fb->format, 0); + + var->height = var->width = 0; + var->left_margin = var->right_margin = 0; + var->upper_margin = var->lower_margin = 0; + var->hsync_len = var->vsync_len = 0; + var->sync = var->vmode = 0; + var->rotate = 0; + var->colorspace = 0; + for (i = 0; i < 4; i++) + var->reserved[i] = 0; +} + /** * drm_fb_helper_check_var - implementation for &fb_ops.fb_check_var * @var: screeninfo to check @@ -1589,8 +1610,22 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, return -EINVAL; } - var->xres_virtual = fb->width; - var->yres_virtual = fb->height; + __fill_var(var, fb); + + /* + * fb_pan_display() validates this, but fb_set_par() doesn't and just + * falls over. Note that __fill_var above adjusts y/res_virtual. + */ + if (var->yoffset > var->yres_virtual - var->yres || + var->xoffset > var->xres_virtual - var->xres) + return -EINVAL; + + /* We neither support grayscale nor FOURCC (also stored in here). */ + if (var->grayscale > 0) + return -EINVAL; + + if (var->nonstd) + return -EINVAL; /* * Workaround for SDL 1.2, which is known to be setting all pixel format @@ -1606,11 +1641,6 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, drm_fb_helper_fill_pixel_fmt(var, format); } - /* - * Likewise, bits_per_pixel should be rounded up to a supported value. - */ - var->bits_per_pixel = bpp; - /* * drm fbdev emulation doesn't support changing the pixel format at all, * so reject all pixel format changing requests. @@ -2034,12 +2064,9 @@ static void drm_fb_helper_fill_var(struct fb_info *info, } info->pseudo_palette = fb_helper->pseudo_palette; - info->var.xres_virtual = fb->width; - info->var.yres_virtual = fb->height; - info->var.bits_per_pixel = drm_format_info_bpp(format, 0); - info->var.accel_flags = FB_ACCELF_TEXT; info->var.xoffset = 0; info->var.yoffset = 0; + __fill_var(&info->var, fb); info->var.activate = FB_ACTIVATE_NOW; drm_fb_helper_fill_pixel_fmt(&info->var, format); -- cgit v1.2.3 From 5d844091f2370f01752c3129b147861b9dcd3d98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 4 Apr 2023 01:36:52 +0300 Subject: drm/scdc-helper: Pimp SCDC debugs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Include the device and connector information in the SCDC debugs. Makes it easier to figure out who did what. v2: Rely on connector->ddc (Maxime) Cc: Andrzej Hajda Cc: Neil Armstrong Cc: Robert Foss Cc: Laurent Pinchart Cc: Jonas Karlman Cc: Jernej Skrabec Cc: Thierry Reding Cc: Emma Anholt Cc: Maxime Ripard Cc: intel-gfx@lists.freedesktop.org Cc: linux-tegra@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20230403223652.18848-1-ville.syrjala@linux.intel.com Reviewed-by: Laurent Pinchart Acked-by: Maxime Ripard Reviewed-by: Andrzej Hajda Acked-by: Thierry Reding --- drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 8 +++--- drivers/gpu/drm/display/drm_scdc_helper.c | 46 ++++++++++++++++++++----------- drivers/gpu/drm/i915/display/intel_ddi.c | 4 +-- drivers/gpu/drm/i915/display/intel_hdmi.c | 8 ++---- drivers/gpu/drm/tegra/sor.c | 15 ++++------ drivers/gpu/drm/vc4/vc4_hdmi.c | 21 ++++++++------ include/drm/display/drm_scdc_helper.h | 7 +++-- 7 files changed, 59 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index aa51c61a78c7..603bb3c51027 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -1426,9 +1426,9 @@ void dw_hdmi_set_high_tmds_clock_ratio(struct dw_hdmi *hdmi, /* Control for TMDS Bit Period/TMDS Clock-Period Ratio */ if (dw_hdmi_support_scdc(hdmi, display)) { if (mtmdsclock > HDMI14_MAX_TMDSCLK) - drm_scdc_set_high_tmds_clock_ratio(hdmi->ddc, 1); + drm_scdc_set_high_tmds_clock_ratio(&hdmi->connector, 1); else - drm_scdc_set_high_tmds_clock_ratio(hdmi->ddc, 0); + drm_scdc_set_high_tmds_clock_ratio(&hdmi->connector, 0); } } EXPORT_SYMBOL_GPL(dw_hdmi_set_high_tmds_clock_ratio); @@ -2116,7 +2116,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi, min_t(u8, bytes, SCDC_MIN_SOURCE_VERSION)); /* Enabled Scrambling in the Sink */ - drm_scdc_set_scrambling(hdmi->ddc, 1); + drm_scdc_set_scrambling(&hdmi->connector, 1); /* * To activate the scrambler feature, you must ensure @@ -2132,7 +2132,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi, hdmi_writeb(hdmi, 0, HDMI_FC_SCRAMBLER_CTRL); hdmi_writeb(hdmi, (u8)~HDMI_MC_SWRSTZ_TMDSSWRST_REQ, HDMI_MC_SWRSTZ); - drm_scdc_set_scrambling(hdmi->ddc, 0); + drm_scdc_set_scrambling(&hdmi->connector, 0); } } diff --git a/drivers/gpu/drm/display/drm_scdc_helper.c b/drivers/gpu/drm/display/drm_scdc_helper.c index c3ad4ab2b456..6d2f244e5830 100644 --- a/drivers/gpu/drm/display/drm_scdc_helper.c +++ b/drivers/gpu/drm/display/drm_scdc_helper.c @@ -26,6 +26,8 @@ #include #include +#include +#include #include /** @@ -140,7 +142,7 @@ EXPORT_SYMBOL(drm_scdc_write); /** * drm_scdc_get_scrambling_status - what is status of scrambling? - * @adapter: I2C adapter for DDC channel + * @connector: connector * * Reads the scrambler status over SCDC, and checks the * scrambling status. @@ -148,14 +150,16 @@ EXPORT_SYMBOL(drm_scdc_write); * Returns: * True if the scrambling is enabled, false otherwise. */ -bool drm_scdc_get_scrambling_status(struct i2c_adapter *adapter) +bool drm_scdc_get_scrambling_status(struct drm_connector *connector) { u8 status; int ret; - ret = drm_scdc_readb(adapter, SCDC_SCRAMBLER_STATUS, &status); + ret = drm_scdc_readb(connector->ddc, SCDC_SCRAMBLER_STATUS, &status); if (ret < 0) { - DRM_DEBUG_KMS("Failed to read scrambling status: %d\n", ret); + drm_dbg_kms(connector->dev, + "[CONNECTOR:%d:%s] Failed to read scrambling status: %d\n", + connector->base.id, connector->name, ret); return false; } @@ -165,7 +169,7 @@ EXPORT_SYMBOL(drm_scdc_get_scrambling_status); /** * drm_scdc_set_scrambling - enable scrambling - * @adapter: I2C adapter for DDC channel + * @connector: connector * @enable: bool to indicate if scrambling is to be enabled/disabled * * Writes the TMDS config register over SCDC channel, and: @@ -175,14 +179,17 @@ EXPORT_SYMBOL(drm_scdc_get_scrambling_status); * Returns: * True if scrambling is set/reset successfully, false otherwise. */ -bool drm_scdc_set_scrambling(struct i2c_adapter *adapter, bool enable) +bool drm_scdc_set_scrambling(struct drm_connector *connector, + bool enable) { u8 config; int ret; - ret = drm_scdc_readb(adapter, SCDC_TMDS_CONFIG, &config); + ret = drm_scdc_readb(connector->ddc, SCDC_TMDS_CONFIG, &config); if (ret < 0) { - DRM_DEBUG_KMS("Failed to read TMDS config: %d\n", ret); + drm_dbg_kms(connector->dev, + "[CONNECTOR:%d:%s] Failed to read TMDS config: %d\n", + connector->base.id, connector->name, ret); return false; } @@ -191,9 +198,11 @@ bool drm_scdc_set_scrambling(struct i2c_adapter *adapter, bool enable) else config &= ~SCDC_SCRAMBLING_ENABLE; - ret = drm_scdc_writeb(adapter, SCDC_TMDS_CONFIG, config); + ret = drm_scdc_writeb(connector->ddc, SCDC_TMDS_CONFIG, config); if (ret < 0) { - DRM_DEBUG_KMS("Failed to enable scrambling: %d\n", ret); + drm_dbg_kms(connector->dev, + "[CONNECTOR:%d:%s] Failed to enable scrambling: %d\n", + connector->base.id, connector->name, ret); return false; } @@ -203,7 +212,7 @@ EXPORT_SYMBOL(drm_scdc_set_scrambling); /** * drm_scdc_set_high_tmds_clock_ratio - set TMDS clock ratio - * @adapter: I2C adapter for DDC channel + * @connector: connector * @set: ret or reset the high clock ratio * * @@ -230,14 +239,17 @@ EXPORT_SYMBOL(drm_scdc_set_scrambling); * Returns: * True if write is successful, false otherwise. */ -bool drm_scdc_set_high_tmds_clock_ratio(struct i2c_adapter *adapter, bool set) +bool drm_scdc_set_high_tmds_clock_ratio(struct drm_connector *connector, + bool set) { u8 config; int ret; - ret = drm_scdc_readb(adapter, SCDC_TMDS_CONFIG, &config); + ret = drm_scdc_readb(connector->ddc, SCDC_TMDS_CONFIG, &config); if (ret < 0) { - DRM_DEBUG_KMS("Failed to read TMDS config: %d\n", ret); + drm_dbg_kms(connector->dev, + "[CONNECTOR:%d:%s] Failed to read TMDS config: %d\n", + connector->base.id, connector->name, ret); return false; } @@ -246,9 +258,11 @@ bool drm_scdc_set_high_tmds_clock_ratio(struct i2c_adapter *adapter, bool set) else config &= ~SCDC_TMDS_BIT_CLOCK_RATIO_BY_40; - ret = drm_scdc_writeb(adapter, SCDC_TMDS_CONFIG, config); + ret = drm_scdc_writeb(connector->ddc, SCDC_TMDS_CONFIG, config); if (ret < 0) { - DRM_DEBUG_KMS("Failed to set TMDS clock ratio: %d\n", ret); + drm_dbg_kms(connector->dev, + "[CONNECTOR:%d:%s] Failed to set TMDS clock ratio: %d\n", + connector->base.id, connector->name, ret); return false; } diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 73240cf78c8b..d8a9790f9d36 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3988,8 +3988,8 @@ static int intel_hdmi_reset_link(struct intel_encoder *encoder, ret = drm_scdc_readb(adapter, SCDC_TMDS_CONFIG, &config); if (ret < 0) { - drm_err(&dev_priv->drm, "Failed to read TMDS config: %d\n", - ret); + drm_err(&dev_priv->drm, "[CONNECTOR:%d:%s] Failed to read TMDS config: %d\n", + connector->base.base.id, connector->base.name, ret); return 0; } diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index c7e9e1fbed37..a690a5616506 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -2646,11 +2646,8 @@ bool intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder, bool scrambling) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); struct drm_scrambling *sink_scrambling = &connector->display_info.hdmi.scdc.scrambling; - struct i2c_adapter *adapter = - intel_gmbus_get_adapter(dev_priv, intel_hdmi->ddc_bus); if (!sink_scrambling->supported) return true; @@ -2661,9 +2658,8 @@ bool intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder, str_yes_no(scrambling), high_tmds_clock_ratio ? 40 : 10); /* Set TMDS bit clock ratio to 1/40 or 1/10, and enable/disable scrambling */ - return drm_scdc_set_high_tmds_clock_ratio(adapter, - high_tmds_clock_ratio) && - drm_scdc_set_scrambling(adapter, scrambling); + return drm_scdc_set_high_tmds_clock_ratio(connector, high_tmds_clock_ratio) && + drm_scdc_set_scrambling(connector, scrambling); } static u8 chv_port_to_ddc_pin(struct drm_i915_private *dev_priv, enum port port) diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c index 8af632740673..34af6724914f 100644 --- a/drivers/gpu/drm/tegra/sor.c +++ b/drivers/gpu/drm/tegra/sor.c @@ -2140,10 +2140,8 @@ static void tegra_sor_hdmi_disable_scrambling(struct tegra_sor *sor) static void tegra_sor_hdmi_scdc_disable(struct tegra_sor *sor) { - struct i2c_adapter *ddc = sor->output.ddc; - - drm_scdc_set_high_tmds_clock_ratio(ddc, false); - drm_scdc_set_scrambling(ddc, false); + drm_scdc_set_high_tmds_clock_ratio(&sor->output.connector, false); + drm_scdc_set_scrambling(&sor->output.connector, false); tegra_sor_hdmi_disable_scrambling(sor); } @@ -2168,10 +2166,8 @@ static void tegra_sor_hdmi_enable_scrambling(struct tegra_sor *sor) static void tegra_sor_hdmi_scdc_enable(struct tegra_sor *sor) { - struct i2c_adapter *ddc = sor->output.ddc; - - drm_scdc_set_high_tmds_clock_ratio(ddc, true); - drm_scdc_set_scrambling(ddc, true); + drm_scdc_set_high_tmds_clock_ratio(&sor->output.connector, true); + drm_scdc_set_scrambling(&sor->output.connector, true); tegra_sor_hdmi_enable_scrambling(sor); } @@ -2179,9 +2175,8 @@ static void tegra_sor_hdmi_scdc_enable(struct tegra_sor *sor) static void tegra_sor_hdmi_scdc_work(struct work_struct *work) { struct tegra_sor *sor = container_of(work, struct tegra_sor, scdc.work); - struct i2c_adapter *ddc = sor->output.ddc; - if (!drm_scdc_get_scrambling_status(ddc)) { + if (!drm_scdc_get_scrambling_status(&sor->output.connector)) { DRM_DEBUG_KMS("SCDC not scrambled\n"); tegra_sor_hdmi_scdc_enable(sor); } diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 464c3cc8e6fb..06713d8b82b5 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -885,7 +885,8 @@ static void vc4_hdmi_set_infoframes(struct drm_encoder *encoder) static void vc4_hdmi_enable_scrambling(struct drm_encoder *encoder) { struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); - struct drm_device *drm = vc4_hdmi->connector.dev; + struct drm_connector *connector = &vc4_hdmi->connector; + struct drm_device *drm = connector->dev; const struct drm_display_mode *mode = &vc4_hdmi->saved_adjusted_mode; unsigned long flags; int idx; @@ -903,8 +904,8 @@ static void vc4_hdmi_enable_scrambling(struct drm_encoder *encoder) if (!drm_dev_enter(drm, &idx)) return; - drm_scdc_set_high_tmds_clock_ratio(vc4_hdmi->ddc, true); - drm_scdc_set_scrambling(vc4_hdmi->ddc, true); + drm_scdc_set_high_tmds_clock_ratio(connector, true); + drm_scdc_set_scrambling(connector, true); spin_lock_irqsave(&vc4_hdmi->hw_lock, flags); HDMI_WRITE(HDMI_SCRAMBLER_CTL, HDMI_READ(HDMI_SCRAMBLER_CTL) | @@ -922,7 +923,8 @@ static void vc4_hdmi_enable_scrambling(struct drm_encoder *encoder) static void vc4_hdmi_disable_scrambling(struct drm_encoder *encoder) { struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); - struct drm_device *drm = vc4_hdmi->connector.dev; + struct drm_connector *connector = &vc4_hdmi->connector; + struct drm_device *drm = connector->dev; unsigned long flags; int idx; @@ -944,8 +946,8 @@ static void vc4_hdmi_disable_scrambling(struct drm_encoder *encoder) ~VC5_HDMI_SCRAMBLER_CTL_ENABLE); spin_unlock_irqrestore(&vc4_hdmi->hw_lock, flags); - drm_scdc_set_scrambling(vc4_hdmi->ddc, false); - drm_scdc_set_high_tmds_clock_ratio(vc4_hdmi->ddc, false); + drm_scdc_set_scrambling(connector, false); + drm_scdc_set_high_tmds_clock_ratio(connector, false); drm_dev_exit(idx); } @@ -955,12 +957,13 @@ static void vc4_hdmi_scrambling_wq(struct work_struct *work) struct vc4_hdmi *vc4_hdmi = container_of(to_delayed_work(work), struct vc4_hdmi, scrambling_work); + struct drm_connector *connector = &vc4_hdmi->connector; - if (drm_scdc_get_scrambling_status(vc4_hdmi->ddc)) + if (drm_scdc_get_scrambling_status(connector)) return; - drm_scdc_set_high_tmds_clock_ratio(vc4_hdmi->ddc, true); - drm_scdc_set_scrambling(vc4_hdmi->ddc, true); + drm_scdc_set_high_tmds_clock_ratio(connector, true); + drm_scdc_set_scrambling(connector, true); queue_delayed_work(system_wq, &vc4_hdmi->scrambling_work, msecs_to_jiffies(SCRAMBLING_POLLING_DELAY_MS)); diff --git a/include/drm/display/drm_scdc_helper.h b/include/drm/display/drm_scdc_helper.h index ded01fd948b4..34600476a1b9 100644 --- a/include/drm/display/drm_scdc_helper.h +++ b/include/drm/display/drm_scdc_helper.h @@ -28,6 +28,7 @@ #include +struct drm_connector; struct i2c_adapter; ssize_t drm_scdc_read(struct i2c_adapter *adapter, u8 offset, void *buffer, @@ -71,9 +72,9 @@ static inline int drm_scdc_writeb(struct i2c_adapter *adapter, u8 offset, return drm_scdc_write(adapter, offset, &value, sizeof(value)); } -bool drm_scdc_get_scrambling_status(struct i2c_adapter *adapter); +bool drm_scdc_get_scrambling_status(struct drm_connector *connector); -bool drm_scdc_set_scrambling(struct i2c_adapter *adapter, bool enable); -bool drm_scdc_set_high_tmds_clock_ratio(struct i2c_adapter *adapter, bool set); +bool drm_scdc_set_scrambling(struct drm_connector *connector, bool enable); +bool drm_scdc_set_high_tmds_clock_ratio(struct drm_connector *connector, bool set); #endif -- cgit v1.2.3 From f2c7ca890182d24ac817fa321489346000271c5c Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 5 Apr 2023 15:31:05 +0200 Subject: drm/atomic-helper: Don't set deadline for modesets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the crtc is being switched on or off then the semantics of computing the timestampe of the next vblank is somewhat ill-defined. And indeed, the code splats with a warning in the timestamp computation code. Specifically it hits the check to make sure that atomic drivers have full set up the timing constants in the drm_vblank structure, and that's just not the case before the crtc is actually on. For robustness it seems best to just not set deadlines for modesets. v2: Also skip on inactive crtc (Ville) Link: https://lore.kernel.org/dri-devel/dfc21f18-7e1e-48f0-c05a-d659b9c90b91@linaro.org/ Fixes: d39e48ca80c0 ("drm/atomic-helper: Set fence deadline for vblank") Cc: Ville Syrjälä Cc: Rob Clark Cc: Daniel Vetter Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Reported-by: Dmitry Baryshkov Tested-by: Dmitry Baryshkov # test patch only Cc: Dmitry Baryshkov Reviewed-by: Ville Syrjälä Reviewed-by: Rob Clark Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20230405133105.947834-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/drm_atomic_helper.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index d4d2a2ce40f8..2c2c9caf0be5 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1528,6 +1528,12 @@ static void set_fence_deadline(struct drm_device *dev, for_each_new_crtc_in_state (state, crtc, new_crtc_state, i) { ktime_t v; + if (drm_atomic_crtc_needs_modeset(new_crtc_state)) + continue; + + if (!new_crtc_state->active) + continue; + if (drm_crtc_next_vblank_start(crtc, &v)) continue; -- cgit v1.2.3 From 830f3f27a388220c8f7e3fad1a00feb708cb066e Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 27 Mar 2023 09:54:50 -0600 Subject: accel/qaic: Add documentation for AIC100 accelerator driver The Qualcomm Cloud AI 100 (AIC100) device is an Artificial Intelligence accelerator PCIe card. It contains a number of components both in the SoC and on the card which facilitate running workloads: QSM: management processor NSPs: workload compute units DMA Bridge: dedicated data mover for the workloads MHI: multiplexed communication channels DDR: workload storage and memory The Linux kernel driver for AIC100 is called "QAIC" and is located in the accel subsystem. Signed-off-by: Jeffrey Hugo Reviewed-by: Carl Vanderlip Reviewed-by: Pranjal Ramajor Asha Kanojiya Reviewed-by: Stanislaw Gruszka Reviewed-by: Jacek Lawrynowicz Acked-by: Oded Gabbay Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/1679932497-30277-2-git-send-email-quic_jhugo@quicinc.com --- Documentation/accel/index.rst | 1 + Documentation/accel/qaic/aic100.rst | 510 ++++++++++++++++++++++++++++++++++++ Documentation/accel/qaic/index.rst | 13 + Documentation/accel/qaic/qaic.rst | 170 ++++++++++++ 4 files changed, 694 insertions(+) create mode 100644 Documentation/accel/qaic/aic100.rst create mode 100644 Documentation/accel/qaic/index.rst create mode 100644 Documentation/accel/qaic/qaic.rst diff --git a/Documentation/accel/index.rst b/Documentation/accel/index.rst index 2b43c9a7f67b..e94a0160b6a0 100644 --- a/Documentation/accel/index.rst +++ b/Documentation/accel/index.rst @@ -8,6 +8,7 @@ Compute Accelerators :maxdepth: 1 introduction + qaic/index .. only:: subproject and html diff --git a/Documentation/accel/qaic/aic100.rst b/Documentation/accel/qaic/aic100.rst new file mode 100644 index 000000000000..c80d0f1307db --- /dev/null +++ b/Documentation/accel/qaic/aic100.rst @@ -0,0 +1,510 @@ +.. SPDX-License-Identifier: GPL-2.0-only + +=============================== + Qualcomm Cloud AI 100 (AIC100) +=============================== + +Overview +======== + +The Qualcomm Cloud AI 100/AIC100 family of products (including SA9000P - part of +Snapdragon Ride) are PCIe adapter cards which contain a dedicated SoC ASIC for +the purpose of efficiently running Artificial Intelligence (AI) Deep Learning +inference workloads. They are AI accelerators. + +The PCIe interface of AIC100 is capable of PCIe Gen4 speeds over eight lanes +(x8). An individual SoC on a card can have up to 16 NSPs for running workloads. +Each SoC has an A53 management CPU. On card, there can be up to 32 GB of DDR. + +Multiple AIC100 cards can be hosted in a single system to scale overall +performance. AIC100 cards are multi-user capable and able to execute workloads +from multiple users in a concurrent manner. + +Hardware Description +==================== + +An AIC100 card consists of an AIC100 SoC, on-card DDR, and a set of misc +peripherals (PMICs, etc). + +An AIC100 card can either be a PCIe HHHL form factor (a traditional PCIe card), +or a Dual M.2 card. Both use PCIe to connect to the host system. + +As a PCIe endpoint/adapter, AIC100 uses the standard VendorID(VID)/ +DeviceID(DID) combination to uniquely identify itself to the host. AIC100 +uses the standard Qualcomm VID (0x17cb). All AIC100 SKUs use the same +AIC100 DID (0xa100). + +AIC100 does not implement FLR (function level reset). + +AIC100 implements MSI but does not implement MSI-X. AIC100 requires 17 MSIs to +operate (1 for MHI, 16 for the DMA Bridge). + +As a PCIe device, AIC100 utilizes BARs to provide host interfaces to the device +hardware. AIC100 provides 3, 64-bit BARs. + +* The first BAR is 4K in size, and exposes the MHI interface to the host. + +* The second BAR is 2M in size, and exposes the DMA Bridge interface to the + host. + +* The third BAR is variable in size based on an individual AIC100's + configuration, but defaults to 64K. This BAR currently has no purpose. + +From the host perspective, AIC100 has several key hardware components - + +* MHI (Modem Host Interface) +* QSM (QAIC Service Manager) +* NSPs (Neural Signal Processor) +* DMA Bridge +* DDR + +MHI +--- + +AIC100 has one MHI interface over PCIe. MHI itself is documented at +Documentation/mhi/index.rst MHI is the mechanism the host uses to communicate +with the QSM. Except for workload data via the DMA Bridge, all interaction with +the device occurs via MHI. + +QSM +--- + +QAIC Service Manager. This is an ARM A53 CPU that runs the primary +firmware of the card and performs on-card management tasks. It also +communicates with the host via MHI. Each AIC100 has one of +these. + +NSP +--- + +Neural Signal Processor. Each AIC100 has up to 16 of these. These are +the processors that run the workloads on AIC100. Each NSP is a Qualcomm Hexagon +(Q6) DSP with HVX and HMX. Each NSP can only run one workload at a time, but +multiple NSPs may be assigned to a single workload. Since each NSP can only run +one workload, AIC100 is limited to 16 concurrent workloads. Workload +"scheduling" is under the purview of the host. AIC100 does not automatically +timeslice. + +DMA Bridge +---------- + +The DMA Bridge is custom DMA engine that manages the flow of data +in and out of workloads. AIC100 has one of these. The DMA Bridge has 16 +channels, each consisting of a set of request/response FIFOs. Each active +workload is assigned a single DMA Bridge channel. The DMA Bridge exposes +hardware registers to manage the FIFOs (head/tail pointers), but requires host +memory to store the FIFOs. + +DDR +--- + +AIC100 has on-card DDR. In total, an AIC100 can have up to 32 GB of DDR. +This DDR is used to store workloads, data for the workloads, and is used by the +QSM for managing the device. NSPs are granted access to sections of the DDR by +the QSM. The host does not have direct access to the DDR, and must make +requests to the QSM to transfer data to the DDR. + +High-level Use Flow +=================== + +AIC100 is a multi-user, programmable accelerator typically used for running +neural networks in inferencing mode to efficiently perform AI operations. +AIC100 is not intended for training neural networks. AIC100 can be utilized +for generic compute workloads. + +Assuming a user wants to utilize AIC100, they would follow these steps: + +1. Compile the workload into an ELF targeting the NSP(s) +2. Make requests to the QSM to load the workload and related artifacts into the + device DDR +3. Make a request to the QSM to activate the workload onto a set of idle NSPs +4. Make requests to the DMA Bridge to send input data to the workload to be + processed, and other requests to receive processed output data from the + workload. +5. Once the workload is no longer required, make a request to the QSM to + deactivate the workload, thus putting the NSPs back into an idle state. +6. Once the workload and related artifacts are no longer needed for future + sessions, make requests to the QSM to unload the data from DDR. This frees + the DDR to be used by other users. + + +Boot Flow +========= + +AIC100 uses a flashless boot flow, derived from Qualcomm MSMs. + +When AIC100 is first powered on, it begins executing PBL (Primary Bootloader) +from ROM. PBL enumerates the PCIe link, and initializes the BHI (Boot Host +Interface) component of MHI. + +Using BHI, the host points PBL to the location of the SBL (Secondary Bootloader) +image. The PBL pulls the image from the host, validates it, and begins +execution of SBL. + +SBL initializes MHI, and uses MHI to notify the host that the device has entered +the SBL stage. SBL performs a number of operations: + +* SBL initializes the majority of hardware (anything PBL left uninitialized), + including DDR. +* SBL offloads the bootlog to the host. +* SBL synchronizes timestamps with the host for future logging. +* SBL uses the Sahara protocol to obtain the runtime firmware images from the + host. + +Once SBL has obtained and validated the runtime firmware, it brings the NSPs out +of reset, and jumps into the QSM. + +The QSM uses MHI to notify the host that the device has entered the QSM stage +(AMSS in MHI terms). At this point, the AIC100 device is fully functional, and +ready to process workloads. + +Userspace components +==================== + +Compiler +-------- + +An open compiler for AIC100 based on upstream LLVM can be found at: +https://github.com/quic/software-kit-for-qualcomm-cloud-ai-100-cc + +Usermode Driver (UMD) +--------------------- + +An open UMD that interfaces with the qaic kernel driver can be found at: +https://github.com/quic/software-kit-for-qualcomm-cloud-ai-100 + +Sahara loader +------------- + +An open implementation of the Sahara protocol called kickstart can be found at: +https://github.com/andersson/qdl + +MHI Channels +============ + +AIC100 defines a number of MHI channels for different purposes. This is a list +of the defined channels, and their uses. + ++----------------+---------+----------+----------------------------------------+ +| Channel name | IDs | EEs | Purpose | ++================+=========+==========+========================================+ +| QAIC_LOOPBACK | 0 & 1 | AMSS | Any data sent to the device on this | +| | | | channel is sent back to the host. | ++----------------+---------+----------+----------------------------------------+ +| QAIC_SAHARA | 2 & 3 | SBL | Used by SBL to obtain the runtime | +| | | | firmware from the host. | ++----------------+---------+----------+----------------------------------------+ +| QAIC_DIAG | 4 & 5 | AMSS | Used to communicate with QSM via the | +| | | | DIAG protocol. | ++----------------+---------+----------+----------------------------------------+ +| QAIC_SSR | 6 & 7 | AMSS | Used to notify the host of subsystem | +| | | | restart events, and to offload SSR | +| | | | crashdumps. | ++----------------+---------+----------+----------------------------------------+ +| QAIC_QDSS | 8 & 9 | AMSS | Used for the Qualcomm Debug Subsystem. | ++----------------+---------+----------+----------------------------------------+ +| QAIC_CONTROL | 10 & 11 | AMSS | Used for the Neural Network Control | +| | | | (NNC) protocol. This is the primary | +| | | | channel between host and QSM for | +| | | | managing workloads. | ++----------------+---------+----------+----------------------------------------+ +| QAIC_LOGGING | 12 & 13 | SBL | Used by the SBL to send the bootlog to | +| | | | the host. | ++----------------+---------+----------+----------------------------------------+ +| QAIC_STATUS | 14 & 15 | AMSS | Used to notify the host of Reliability,| +| | | | Accessibility, Serviceability (RAS) | +| | | | events. | ++----------------+---------+----------+----------------------------------------+ +| QAIC_TELEMETRY | 16 & 17 | AMSS | Used to get/set power/thermal/etc | +| | | | attributes. | ++----------------+---------+----------+----------------------------------------+ +| QAIC_DEBUG | 18 & 19 | AMSS | Not used. | ++----------------+---------+----------+----------------------------------------+ +| QAIC_TIMESYNC | 20 & 21 | SBL/AMSS | Used to synchronize timestamps in the | +| | | | device side logs with the host time | +| | | | source. | ++----------------+---------+----------+----------------------------------------+ + +DMA Bridge +========== + +Overview +-------- + +The DMA Bridge is one of the main interfaces to the host from the device +(the other being MHI). As part of activating a workload to run on NSPs, the QSM +assigns that network a DMA Bridge channel. A workload's DMA Bridge channel +(DBC for short) is solely for the use of that workload and is not shared with +other workloads. + +Each DBC is a pair of FIFOs that manage data in and out of the workload. One +FIFO is the request FIFO. The other FIFO is the response FIFO. + +Each DBC contains 4 registers in hardware: + +* Request FIFO head pointer (offset 0x0). Read only by the host. Indicates the + latest item in the FIFO the device has consumed. +* Request FIFO tail pointer (offset 0x4). Read/write by the host. Host + increments this register to add new items to the FIFO. +* Response FIFO head pointer (offset 0x8). Read/write by the host. Indicates + the latest item in the FIFO the host has consumed. +* Response FIFO tail pointer (offset 0xc). Read only by the host. Device + increments this register to add new items to the FIFO. + +The values in each register are indexes in the FIFO. To get the location of the +FIFO element pointed to by the register: FIFO base address + register * element +size. + +DBC registers are exposed to the host via the second BAR. Each DBC consumes +4KB of space in the BAR. + +The actual FIFOs are backed by host memory. When sending a request to the QSM +to activate a network, the host must donate memory to be used for the FIFOs. +Due to internal mapping limitations of the device, a single contiguous chunk of +memory must be provided per DBC, which hosts both FIFOs. The request FIFO will +consume the beginning of the memory chunk, and the response FIFO will consume +the end of the memory chunk. + +Request FIFO +------------ + +A request FIFO element has the following structure: + +.. code-block:: c + + struct request_elem { + u16 req_id; + u8 seq_id; + u8 pcie_dma_cmd; + u32 reserved; + u64 pcie_dma_source_addr; + u64 pcie_dma_dest_addr; + u32 pcie_dma_len; + u32 reserved; + u64 doorbell_addr; + u8 doorbell_attr; + u8 reserved; + u16 reserved; + u32 doorbell_data; + u32 sem_cmd0; + u32 sem_cmd1; + u32 sem_cmd2; + u32 sem_cmd3; + }; + +Request field descriptions: + +req_id + request ID. A request FIFO element and a response FIFO element with + the same request ID refer to the same command. + +seq_id + sequence ID within a request. Ignored by the DMA Bridge. + +pcie_dma_cmd + describes the DMA element of this request. + + * Bit(7) is the force msi flag, which overrides the DMA Bridge MSI logic + and generates a MSI when this request is complete, and QSM + configures the DMA Bridge to look at this bit. + * Bits(6:5) are reserved. + * Bit(4) is the completion code flag, and indicates that the DMA Bridge + shall generate a response FIFO element when this request is + complete. + * Bit(3) indicates if this request is a linked list transfer(0) or a bulk + transfer(1). + * Bit(2) is reserved. + * Bits(1:0) indicate the type of transfer. No transfer(0), to device(1), + from device(2). Value 3 is illegal. + +pcie_dma_source_addr + source address for a bulk transfer, or the address of the linked list. + +pcie_dma_dest_addr + destination address for a bulk transfer. + +pcie_dma_len + length of the bulk transfer. Note that the size of this field + limits transfers to 4G in size. + +doorbell_addr + address of the doorbell to ring when this request is complete. + +doorbell_attr + doorbell attributes. + + * Bit(7) indicates if a write to a doorbell is to occur. + * Bits(6:2) are reserved. + * Bits(1:0) contain the encoding of the doorbell length. 0 is 32-bit, + 1 is 16-bit, 2 is 8-bit, 3 is reserved. The doorbell address + must be naturally aligned to the specified length. + +doorbell_data + data to write to the doorbell. Only the bits corresponding to + the doorbell length are valid. + +sem_cmdN + semaphore command. + + * Bit(31) indicates this semaphore command is enabled. + * Bit(30) is the to-device DMA fence. Block this request until all + to-device DMA transfers are complete. + * Bit(29) is the from-device DMA fence. Block this request until all + from-device DMA transfers are complete. + * Bits(28:27) are reserved. + * Bits(26:24) are the semaphore command. 0 is NOP. 1 is init with the + specified value. 2 is increment. 3 is decrement. 4 is wait + until the semaphore is equal to the specified value. 5 is wait + until the semaphore is greater or equal to the specified value. + 6 is "P", wait until semaphore is greater than 0, then + decrement by 1. 7 is reserved. + * Bit(23) is reserved. + * Bit(22) is the semaphore sync. 0 is post sync, which means that the + semaphore operation is done after the DMA transfer. 1 is + presync, which gates the DMA transfer. Only one presync is + allowed per request. + * Bit(21) is reserved. + * Bits(20:16) is the index of the semaphore to operate on. + * Bits(15:12) are reserved. + * Bits(11:0) are the semaphore value to use in operations. + +Overall, a request is processed in 4 steps: + +1. If specified, the presync semaphore condition must be true +2. If enabled, the DMA transfer occurs +3. If specified, the postsync semaphore conditions must be true +4. If enabled, the doorbell is written + +By using the semaphores in conjunction with the workload running on the NSPs, +the data pipeline can be synchronized such that the host can queue multiple +requests of data for the workload to process, but the DMA Bridge will only copy +the data into the memory of the workload when the workload is ready to process +the next input. + +Response FIFO +------------- + +Once a request is fully processed, a response FIFO element is generated if +specified in pcie_dma_cmd. The structure of a response FIFO element: + +.. code-block:: c + + struct response_elem { + u16 req_id; + u16 completion_code; + }; + +req_id + matches the req_id of the request that generated this element. + +completion_code + status of this request. 0 is success. Non-zero is an error. + +The DMA Bridge will generate a MSI to the host as a reaction to activity in the +response FIFO of a DBC. The DMA Bridge hardware has an IRQ storm mitigation +algorithm, where it will only generate a MSI when the response FIFO transitions +from empty to non-empty (unless force MSI is enabled and triggered). In +response to this MSI, the host is expected to drain the response FIFO, and must +take care to handle any race conditions between draining the FIFO, and the +device inserting elements into the FIFO. + +Neural Network Control (NNC) Protocol +===================================== + +The NNC protocol is how the host makes requests to the QSM to manage workloads. +It uses the QAIC_CONTROL MHI channel. + +Each NNC request is packaged into a message. Each message is a series of +transactions. A passthrough type transaction can contain elements known as +commands. + +QSM requires NNC messages be little endian encoded and the fields be naturally +aligned. Since there are 64-bit elements in some NNC messages, 64-bit alignment +must be maintained. + +A message contains a header and then a series of transactions. A message may be +at most 4K in size from QSM to the host. From the host to the QSM, a message +can be at most 64K (maximum size of a single MHI packet), but there is a +continuation feature where message N+1 can be marked as a continuation of +message N. This is used for exceedingly large DMA xfer transactions. + +Transaction descriptions +------------------------ + +passthrough + Allows userspace to send an opaque payload directly to the QSM. + This is used for NNC commands. Userspace is responsible for managing + the QSM message requirements in the payload. + +dma_xfer + DMA transfer. Describes an object that the QSM should DMA into the + device via address and size tuples. + +activate + Activate a workload onto NSPs. The host must provide memory to be + used by the DBC. + +deactivate + Deactivate an active workload and return the NSPs to idle. + +status + Query the QSM about it's NNC implementation. Returns the NNC version, + and if CRC is used. + +terminate + Release a user's resources. + +dma_xfer_cont + Continuation of a previous DMA transfer. If a DMA transfer + cannot be specified in a single message (highly fragmented), this + transaction can be used to specify more ranges. + +validate_partition + Query to QSM to determine if a partition identifier is valid. + +Each message is tagged with a user id, and a partition id. The user id allows +QSM to track resources, and release them when the user goes away (eg the process +crashes). A partition id identifies the resource partition that QSM manages, +which this message applies to. + +Messages may have CRCs. Messages should have CRCs applied until the QSM +reports via the status transaction that CRCs are not needed. The QSM on the +SA9000P requires CRCs for black channel safing. + +Subsystem Restart (SSR) +======================= + +SSR is the concept of limiting the impact of an error. An AIC100 device may +have multiple users, each with their own workload running. If the workload of +one user crashes, the fallout of that should be limited to that workload and not +impact other workloads. SSR accomplishes this. + +If a particular workload crashes, QSM notifies the host via the QAIC_SSR MHI +channel. This notification identifies the workload by it's assigned DBC. A +multi-stage recovery process is then used to cleanup both sides, and get the +DBC/NSPs into a working state. + +When SSR occurs, any state in the workload is lost. Any inputs that were in +process, or queued by not yet serviced, are lost. The loaded artifacts will +remain in on-card DDR, but the host will need to re-activate the workload if +it desires to recover the workload. + +Reliability, Accessibility, Serviceability (RAS) +================================================ + +AIC100 is expected to be deployed in server systems where RAS ideology is +applied. Simply put, RAS is the concept of detecting, classifying, and +reporting errors. While PCIe has AER (Advanced Error Reporting) which factors +into RAS, AER does not allow for a device to report details about internal +errors. Therefore, AIC100 implements a custom RAS mechanism. When a RAS event +occurs, QSM will report the event with appropriate details via the QAIC_STATUS +MHI channel. A sysadmin may determine that a particular device needs +additional service based on RAS reports. + +Telemetry +========= + +QSM has the ability to report various physical attributes of the device, and in +some cases, to allow the host to control them. Examples include thermal limits, +thermal readings, and power readings. These items are communicated via the +QAIC_TELEMETRY MHI channel. diff --git a/Documentation/accel/qaic/index.rst b/Documentation/accel/qaic/index.rst new file mode 100644 index 000000000000..ad19b88d1a66 --- /dev/null +++ b/Documentation/accel/qaic/index.rst @@ -0,0 +1,13 @@ +.. SPDX-License-Identifier: GPL-2.0-only + +===================================== + accel/qaic Qualcomm Cloud AI driver +===================================== + +The accel/qaic driver supports the Qualcomm Cloud AI machine learning +accelerator cards. + +.. toctree:: + + qaic + aic100 diff --git a/Documentation/accel/qaic/qaic.rst b/Documentation/accel/qaic/qaic.rst new file mode 100644 index 000000000000..72a70ab6e3a8 --- /dev/null +++ b/Documentation/accel/qaic/qaic.rst @@ -0,0 +1,170 @@ +.. SPDX-License-Identifier: GPL-2.0-only + +============= + QAIC driver +============= + +The QAIC driver is the Kernel Mode Driver (KMD) for the AIC100 family of AI +accelerator products. + +Interrupts +========== + +While the AIC100 DMA Bridge hardware implements an IRQ storm mitigation +mechanism, it is still possible for an IRQ storm to occur. A storm can happen +if the workload is particularly quick, and the host is responsive. If the host +can drain the response FIFO as quickly as the device can insert elements into +it, then the device will frequently transition the response FIFO from empty to +non-empty and generate MSIs at a rate equivalent to the speed of the +workload's ability to process inputs. The lprnet (license plate reader network) +workload is known to trigger this condition, and can generate in excess of 100k +MSIs per second. It has been observed that most systems cannot tolerate this +for long, and will crash due to some form of watchdog due to the overhead of +the interrupt controller interrupting the host CPU. + +To mitigate this issue, the QAIC driver implements specific IRQ handling. When +QAIC receives an IRQ, it disables that line. This prevents the interrupt +controller from interrupting the CPU. Then AIC drains the FIFO. Once the FIFO +is drained, QAIC implements a "last chance" polling algorithm where QAIC will +sleep for a time to see if the workload will generate more activity. The IRQ +line remains disabled during this time. If no activity is detected, QAIC exits +polling mode and reenables the IRQ line. + +This mitigation in QAIC is very effective. The same lprnet usecase that +generates 100k IRQs per second (per /proc/interrupts) is reduced to roughly 64 +IRQs over 5 minutes while keeping the host system stable, and having the same +workload throughput performance (within run to run noise variation). + + +Neural Network Control (NNC) Protocol +===================================== + +The implementation of NNC is split between the KMD (QAIC) and UMD. In general +QAIC understands how to encode/decode NNC wire protocol, and elements of the +protocol which require kernel space knowledge to process (for example, mapping +host memory to device IOVAs). QAIC understands the structure of a message, and +all of the transactions. QAIC does not understand commands (the payload of a +passthrough transaction). + +QAIC handles and enforces the required little endianness and 64-bit alignment, +to the degree that it can. Since QAIC does not know the contents of a +passthrough transaction, it relies on the UMD to satisfy the requirements. + +The terminate transaction is of particular use to QAIC. QAIC is not aware of +the resources that are loaded onto a device since the majority of that activity +occurs within NNC commands. As a result, QAIC does not have the means to +roll back userspace activity. To ensure that a userspace client's resources +are fully released in the case of a process crash, or a bug, QAIC uses the +terminate command to let QSM know when a user has gone away, and the resources +can be released. + +QSM can report a version number of the NNC protocol it supports. This is in the +form of a Major number and a Minor number. + +Major number updates indicate changes to the NNC protocol which impact the +message format, or transactions (impacts QAIC). + +Minor number updates indicate changes to the NNC protocol which impact the +commands (does not impact QAIC). + +uAPI +==== + +QAIC defines a number of driver specific IOCTLs as part of the userspace API. +This section describes those APIs. + +DRM_IOCTL_QAIC_MANAGE + This IOCTL allows userspace to send a NNC request to the QSM. The call will + block until a response is received, or the request has timed out. + +DRM_IOCTL_QAIC_CREATE_BO + This IOCTL allows userspace to allocate a buffer object (BO) which can send + or receive data from a workload. The call will return a GEM handle that + represents the allocated buffer. The BO is not usable until it has been + sliced (see DRM_IOCTL_QAIC_ATTACH_SLICE_BO). + +DRM_IOCTL_QAIC_MMAP_BO + This IOCTL allows userspace to prepare an allocated BO to be mmap'd into the + userspace process. + +DRM_IOCTL_QAIC_ATTACH_SLICE_BO + This IOCTL allows userspace to slice a BO in preparation for sending the BO + to the device. Slicing is the operation of describing what portions of a BO + get sent where to a workload. This requires a set of DMA transfers for the + DMA Bridge, and as such, locks the BO to a specific DBC. + +DRM_IOCTL_QAIC_EXECUTE_BO + This IOCTL allows userspace to submit a set of sliced BOs to the device. The + call is non-blocking. Success only indicates that the BOs have been queued + to the device, but does not guarantee they have been executed. + +DRM_IOCTL_QAIC_PARTIAL_EXECUTE_BO + This IOCTL operates like DRM_IOCTL_QAIC_EXECUTE_BO, but it allows userspace + to shrink the BOs sent to the device for this specific call. If a BO + typically has N inputs, but only a subset of those is available, this IOCTL + allows userspace to indicate that only the first M bytes of the BO should be + sent to the device to minimize data transfer overhead. This IOCTL dynamically + recomputes the slicing, and therefore has some processing overhead before the + BOs can be queued to the device. + +DRM_IOCTL_QAIC_WAIT_BO + This IOCTL allows userspace to determine when a particular BO has been + processed by the device. The call will block until either the BO has been + processed and can be re-queued to the device, or a timeout occurs. + +DRM_IOCTL_QAIC_PERF_STATS_BO + This IOCTL allows userspace to collect performance statistics on the most + recent execution of a BO. This allows userspace to construct an end to end + timeline of the BO processing for a performance analysis. + +DRM_IOCTL_QAIC_PART_DEV + This IOCTL allows userspace to request a duplicate "shadow device". This extra + accelN device is associated with a specific partition of resources on the + AIC100 device and can be used for limiting a process to some subset of + resources. + +Userspace Client Isolation +========================== + +AIC100 supports multiple clients. Multiple DBCs can be consumed by a single +client, and multiple clients can each consume one or more DBCs. Workloads +may contain sensitive information therefore only the client that owns the +workload should be allowed to interface with the DBC. + +Clients are identified by the instance associated with their open(). A client +may only use memory they allocate, and DBCs that are assigned to their +workloads. Attempts to access resources assigned to other clients will be +rejected. + +Module parameters +================= + +QAIC supports the following module parameters: + +**datapath_polling (bool)** + +Configures QAIC to use a polling thread for datapath events instead of relying +on the device interrupts. Useful for platforms with broken multiMSI. Must be +set at QAIC driver initialization. Default is 0 (off). + +**mhi_timeout_ms (unsigned int)** + +Sets the timeout value for MHI operations in milliseconds (ms). Must be set +at the time the driver detects a device. Default is 2000 (2 seconds). + +**control_resp_timeout_s (unsigned int)** + +Sets the timeout value for QSM responses to NNC messages in seconds (s). Must +be set at the time the driver is sending a request to QSM. Default is 60 (one +minute). + +**wait_exec_default_timeout_ms (unsigned int)** + +Sets the default timeout for the wait_exec ioctl in milliseconds (ms). Must be +set prior to the waic_exec ioctl call. A value specified in the ioctl call +overrides this for that call. Default is 5000 (5 seconds). + +**datapath_poll_interval_us (unsigned int)** + +Sets the polling interval in microseconds (us) when datapath polling is active. +Takes effect at the next polling interval. Default is 100 (100 us). -- cgit v1.2.3 From c501ca23a6a306a7c11631e02a26c8e0a768d64b Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 27 Mar 2023 09:54:51 -0600 Subject: accel/qaic: Add uapi and core driver file Add the QAIC driver uapi file and core driver file that binds to the PCIe device. The core driver file also creates the accel device and manages all the interconnections between the different parts of the driver. The driver can be built as a module. If so, it will be called "qaic.ko". Signed-off-by: Jeffrey Hugo Reviewed-by: Carl Vanderlip Reviewed-by: Pranjal Ramajor Asha Kanojiya Reviewed-by: Stanislaw Gruszka Reviewed-by: Jacek Lawrynowicz Acked-by: Oded Gabbay Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/1679932497-30277-3-git-send-email-quic_jhugo@quicinc.com --- drivers/accel/qaic/qaic.h | 282 ++++++++++++++++++ drivers/accel/qaic/qaic_drv.c | 647 ++++++++++++++++++++++++++++++++++++++++++ include/uapi/drm/qaic_accel.h | 397 ++++++++++++++++++++++++++ 3 files changed, 1326 insertions(+) create mode 100644 drivers/accel/qaic/qaic.h create mode 100644 drivers/accel/qaic/qaic_drv.c create mode 100644 include/uapi/drm/qaic_accel.h diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h new file mode 100644 index 000000000000..f2bd637a0d4e --- /dev/null +++ b/drivers/accel/qaic/qaic.h @@ -0,0 +1,282 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * + * Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _QAIC_H_ +#define _QAIC_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define QAIC_DBC_BASE SZ_128K +#define QAIC_DBC_SIZE SZ_4K + +#define QAIC_NO_PARTITION -1 + +#define QAIC_DBC_OFF(i) ((i) * QAIC_DBC_SIZE + QAIC_DBC_BASE) + +#define to_qaic_bo(obj) container_of(obj, struct qaic_bo, base) + +extern bool datapath_polling; + +struct qaic_user { + /* Uniquely identifies this user for the device */ + int handle; + struct kref ref_count; + /* Char device opened by this user */ + struct qaic_drm_device *qddev; + /* Node in list of users that opened this drm device */ + struct list_head node; + /* SRCU used to synchronize this user during cleanup */ + struct srcu_struct qddev_lock; + atomic_t chunk_id; +}; + +struct dma_bridge_chan { + /* Pointer to device strcut maintained by driver */ + struct qaic_device *qdev; + /* ID of this DMA bridge channel(DBC) */ + unsigned int id; + /* Synchronizes access to xfer_list */ + spinlock_t xfer_lock; + /* Base address of request queue */ + void *req_q_base; + /* Base address of response queue */ + void *rsp_q_base; + /* + * Base bus address of request queue. Response queue bus address can be + * calculated by adding request queue size to this variable + */ + dma_addr_t dma_addr; + /* Total size of request and response queue in byte */ + u32 total_size; + /* Capacity of request/response queue */ + u32 nelem; + /* The user that opened this DBC */ + struct qaic_user *usr; + /* + * Request ID of next memory handle that goes in request queue. One + * memory handle can enqueue more than one request elements, all + * this requests that belong to same memory handle have same request ID + */ + u16 next_req_id; + /* true: DBC is in use; false: DBC not in use */ + bool in_use; + /* + * Base address of device registers. Used to read/write request and + * response queue's head and tail pointer of this DBC. + */ + void __iomem *dbc_base; + /* Head of list where each node is a memory handle queued in request queue */ + struct list_head xfer_list; + /* Synchronizes DBC readers during cleanup */ + struct srcu_struct ch_lock; + /* + * When this DBC is released, any thread waiting on this wait queue is + * woken up + */ + wait_queue_head_t dbc_release; + /* Head of list where each node is a bo associated with this DBC */ + struct list_head bo_lists; + /* The irq line for this DBC. Used for polling */ + unsigned int irq; + /* Polling work item to simulate interrupts */ + struct work_struct poll_work; +}; + +struct qaic_device { + /* Pointer to base PCI device struct of our physical device */ + struct pci_dev *pdev; + /* Req. ID of request that will be queued next in MHI control device */ + u32 next_seq_num; + /* Base address of bar 0 */ + void __iomem *bar_0; + /* Base address of bar 2 */ + void __iomem *bar_2; + /* Controller structure for MHI devices */ + struct mhi_controller *mhi_cntrl; + /* MHI control channel device */ + struct mhi_device *cntl_ch; + /* List of requests queued in MHI control device */ + struct list_head cntl_xfer_list; + /* Synchronizes MHI control device transactions and its xfer list */ + struct mutex cntl_mutex; + /* Array of DBC struct of this device */ + struct dma_bridge_chan *dbc; + /* Work queue for tasks related to MHI control device */ + struct workqueue_struct *cntl_wq; + /* Synchronizes all the users of device during cleanup */ + struct srcu_struct dev_lock; + /* true: Device under reset; false: Device not under reset */ + bool in_reset; + /* + * true: A tx MHI transaction has failed and a rx buffer is still queued + * in control device. Such a buffer is considered lost rx buffer + * false: No rx buffer is lost in control device + */ + bool cntl_lost_buf; + /* Maximum number of DBC supported by this device */ + u32 num_dbc; + /* Reference to the drm_device for this device when it is created */ + struct qaic_drm_device *qddev; + /* Generate the CRC of a control message */ + u32 (*gen_crc)(void *msg); + /* Validate the CRC of a control message */ + bool (*valid_crc)(void *msg); +}; + +struct qaic_drm_device { + /* Pointer to the root device struct driven by this driver */ + struct qaic_device *qdev; + /* + * The physical device can be partition in number of logical devices. + * And each logical device is given a partition id. This member stores + * that id. QAIC_NO_PARTITION is a sentinel used to mark that this drm + * device is the actual physical device + */ + s32 partition_id; + /* Pointer to the drm device struct of this drm device */ + struct drm_device *ddev; + /* Head in list of users who have opened this drm device */ + struct list_head users; + /* Synchronizes access to users list */ + struct mutex users_mutex; +}; + +struct qaic_bo { + struct drm_gem_object base; + /* Scatter/gather table for allocate/imported BO */ + struct sg_table *sgt; + /* BO size requested by user. GEM object might be bigger in size. */ + u64 size; + /* Head in list of slices of this BO */ + struct list_head slices; + /* Total nents, for all slices of this BO */ + int total_slice_nents; + /* + * Direction of transfer. It can assume only two value DMA_TO_DEVICE and + * DMA_FROM_DEVICE. + */ + int dir; + /* The pointer of the DBC which operates on this BO */ + struct dma_bridge_chan *dbc; + /* Number of slice that belongs to this buffer */ + u32 nr_slice; + /* Number of slice that have been transferred by DMA engine */ + u32 nr_slice_xfer_done; + /* true = BO is queued for execution, true = BO is not queued */ + bool queued; + /* + * If true then user has attached slicing information to this BO by + * calling DRM_IOCTL_QAIC_ATTACH_SLICE_BO ioctl. + */ + bool sliced; + /* Request ID of this BO if it is queued for execution */ + u16 req_id; + /* Handle assigned to this BO */ + u32 handle; + /* Wait on this for completion of DMA transfer of this BO */ + struct completion xfer_done; + /* + * Node in linked list where head is dbc->xfer_list. + * This link list contain BO's that are queued for DMA transfer. + */ + struct list_head xfer_list; + /* + * Node in linked list where head is dbc->bo_lists. + * This link list contain BO's that are associated with the DBC it is + * linked to. + */ + struct list_head bo_list; + struct { + /* + * Latest timestamp(ns) at which kernel received a request to + * execute this BO + */ + u64 req_received_ts; + /* + * Latest timestamp(ns) at which kernel enqueued requests of + * this BO for execution in DMA queue + */ + u64 req_submit_ts; + /* + * Latest timestamp(ns) at which kernel received a completion + * interrupt for requests of this BO + */ + u64 req_processed_ts; + /* + * Number of elements already enqueued in DMA queue before + * enqueuing requests of this BO + */ + u32 queue_level_before; + } perf_stats; + +}; + +struct bo_slice { + /* Mapped pages */ + struct sg_table *sgt; + /* Number of requests required to queue in DMA queue */ + int nents; + /* See enum dma_data_direction */ + int dir; + /* Actual requests that will be copied in DMA queue */ + struct dbc_req *reqs; + struct kref ref_count; + /* true: No DMA transfer required */ + bool no_xfer; + /* Pointer to the parent BO handle */ + struct qaic_bo *bo; + /* Node in list of slices maintained by parent BO */ + struct list_head slice; + /* Size of this slice in bytes */ + u64 size; + /* Offset of this slice in buffer */ + u64 offset; +}; + +int get_dbc_req_elem_size(void); +int get_dbc_rsp_elem_size(void); +int get_cntl_version(struct qaic_device *qdev, struct qaic_user *usr, u16 *major, u16 *minor); +int qaic_manage_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +void qaic_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result); + +void qaic_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result); + +int qaic_control_open(struct qaic_device *qdev); +void qaic_control_close(struct qaic_device *qdev); +void qaic_release_usr(struct qaic_device *qdev, struct qaic_user *usr); + +irqreturn_t dbc_irq_threaded_fn(int irq, void *data); +irqreturn_t dbc_irq_handler(int irq, void *data); +int disable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr); +void enable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr); +void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id); +void release_dbc(struct qaic_device *qdev, u32 dbc_id); + +void wake_all_cntl(struct qaic_device *qdev); +void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool exit_reset); + +struct drm_gem_object *qaic_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf); + +int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int qaic_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int qaic_partial_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +void irq_polling_work(struct work_struct *work); + +#endif /* _QAIC_H_ */ diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c new file mode 100644 index 000000000000..1106ad88a5b6 --- /dev/null +++ b/drivers/accel/qaic/qaic_drv.c @@ -0,0 +1,647 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. */ +/* Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mhi_controller.h" +#include "mhi_qaic_ctrl.h" +#include "qaic.h" + +MODULE_IMPORT_NS(DMA_BUF); + +#define PCI_DEV_AIC100 0xa100 +#define QAIC_NAME "qaic" +#define QAIC_DESC "Qualcomm Cloud AI Accelerators" +#define CNTL_MAJOR 5 +#define CNTL_MINOR 0 + +bool datapath_polling; +module_param(datapath_polling, bool, 0400); +MODULE_PARM_DESC(datapath_polling, "Operate the datapath in polling mode"); +static bool link_up; +static DEFINE_IDA(qaic_usrs); + +static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id); +static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id); + +static void free_usr(struct kref *kref) +{ + struct qaic_user *usr = container_of(kref, struct qaic_user, ref_count); + + cleanup_srcu_struct(&usr->qddev_lock); + ida_free(&qaic_usrs, usr->handle); + kfree(usr); +} + +static int qaic_open(struct drm_device *dev, struct drm_file *file) +{ + struct qaic_drm_device *qddev = dev->dev_private; + struct qaic_device *qdev = qddev->qdev; + struct qaic_user *usr; + int rcu_id; + int ret; + + rcu_id = srcu_read_lock(&qdev->dev_lock); + if (qdev->in_reset) { + ret = -ENODEV; + goto dev_unlock; + } + + usr = kmalloc(sizeof(*usr), GFP_KERNEL); + if (!usr) { + ret = -ENOMEM; + goto dev_unlock; + } + + usr->handle = ida_alloc(&qaic_usrs, GFP_KERNEL); + if (usr->handle < 0) { + ret = usr->handle; + goto free_usr; + } + usr->qddev = qddev; + atomic_set(&usr->chunk_id, 0); + init_srcu_struct(&usr->qddev_lock); + kref_init(&usr->ref_count); + + ret = mutex_lock_interruptible(&qddev->users_mutex); + if (ret) + goto cleanup_usr; + + list_add(&usr->node, &qddev->users); + mutex_unlock(&qddev->users_mutex); + + file->driver_priv = usr; + + srcu_read_unlock(&qdev->dev_lock, rcu_id); + return 0; + +cleanup_usr: + cleanup_srcu_struct(&usr->qddev_lock); +free_usr: + kfree(usr); +dev_unlock: + srcu_read_unlock(&qdev->dev_lock, rcu_id); + return ret; +} + +static void qaic_postclose(struct drm_device *dev, struct drm_file *file) +{ + struct qaic_user *usr = file->driver_priv; + struct qaic_drm_device *qddev; + struct qaic_device *qdev; + int qdev_rcu_id; + int usr_rcu_id; + int i; + + qddev = usr->qddev; + usr_rcu_id = srcu_read_lock(&usr->qddev_lock); + if (qddev) { + qdev = qddev->qdev; + qdev_rcu_id = srcu_read_lock(&qdev->dev_lock); + if (!qdev->in_reset) { + qaic_release_usr(qdev, usr); + for (i = 0; i < qdev->num_dbc; ++i) + if (qdev->dbc[i].usr && qdev->dbc[i].usr->handle == usr->handle) + release_dbc(qdev, i); + } + srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); + + mutex_lock(&qddev->users_mutex); + if (!list_empty(&usr->node)) + list_del_init(&usr->node); + mutex_unlock(&qddev->users_mutex); + } + + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + kref_put(&usr->ref_count, free_usr); + + file->driver_priv = NULL; +} + +DEFINE_DRM_ACCEL_FOPS(qaic_accel_fops); + +static const struct drm_ioctl_desc qaic_drm_ioctls[] = { + DRM_IOCTL_DEF_DRV(QAIC_MANAGE, qaic_manage_ioctl, 0), + DRM_IOCTL_DEF_DRV(QAIC_CREATE_BO, qaic_create_bo_ioctl, 0), + DRM_IOCTL_DEF_DRV(QAIC_MMAP_BO, qaic_mmap_bo_ioctl, 0), + DRM_IOCTL_DEF_DRV(QAIC_ATTACH_SLICE_BO, qaic_attach_slice_bo_ioctl, 0), + DRM_IOCTL_DEF_DRV(QAIC_EXECUTE_BO, qaic_execute_bo_ioctl, 0), + DRM_IOCTL_DEF_DRV(QAIC_PARTIAL_EXECUTE_BO, qaic_partial_execute_bo_ioctl, 0), + DRM_IOCTL_DEF_DRV(QAIC_WAIT_BO, qaic_wait_bo_ioctl, 0), + DRM_IOCTL_DEF_DRV(QAIC_PERF_STATS_BO, qaic_perf_stats_bo_ioctl, 0), +}; + +static const struct drm_driver qaic_accel_driver = { + .driver_features = DRIVER_GEM | DRIVER_COMPUTE_ACCEL, + + .name = QAIC_NAME, + .desc = QAIC_DESC, + .date = "20190618", + + .fops = &qaic_accel_fops, + .open = qaic_open, + .postclose = qaic_postclose, + + .ioctls = qaic_drm_ioctls, + .num_ioctls = ARRAY_SIZE(qaic_drm_ioctls), + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, + .gem_prime_import = qaic_gem_prime_import, +}; + +static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id) +{ + struct qaic_drm_device *qddev; + struct drm_device *ddev; + struct device *pdev; + int ret; + + /* Hold off implementing partitions until the uapi is determined */ + if (partition_id != QAIC_NO_PARTITION) + return -EINVAL; + + pdev = &qdev->pdev->dev; + + qddev = kzalloc(sizeof(*qddev), GFP_KERNEL); + if (!qddev) + return -ENOMEM; + + ddev = drm_dev_alloc(&qaic_accel_driver, pdev); + if (IS_ERR(ddev)) { + ret = PTR_ERR(ddev); + goto ddev_fail; + } + + ddev->dev_private = qddev; + qddev->ddev = ddev; + + qddev->qdev = qdev; + qddev->partition_id = partition_id; + INIT_LIST_HEAD(&qddev->users); + mutex_init(&qddev->users_mutex); + + qdev->qddev = qddev; + + ret = drm_dev_register(ddev, 0); + if (ret) { + pci_dbg(qdev->pdev, "%s: drm_dev_register failed %d\n", __func__, ret); + goto drm_reg_fail; + } + + return 0; + +drm_reg_fail: + mutex_destroy(&qddev->users_mutex); + qdev->qddev = NULL; + drm_dev_put(ddev); +ddev_fail: + kfree(qddev); + return ret; +} + +static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id) +{ + struct qaic_drm_device *qddev; + struct qaic_user *usr; + + qddev = qdev->qddev; + + /* + * Existing users get unresolvable errors till they close FDs. + * Need to sync carefully with users calling close(). The + * list of users can be modified elsewhere when the lock isn't + * held here, but the sync'ing the srcu with the mutex held + * could deadlock. Grab the mutex so that the list will be + * unmodified. The user we get will exist as long as the + * lock is held. Signal that the qcdev is going away, and + * grab a reference to the user so they don't go away for + * synchronize_srcu(). Then release the mutex to avoid + * deadlock and make sure the user has observed the signal. + * With the lock released, we cannot maintain any state of the + * user list. + */ + mutex_lock(&qddev->users_mutex); + while (!list_empty(&qddev->users)) { + usr = list_first_entry(&qddev->users, struct qaic_user, node); + list_del_init(&usr->node); + kref_get(&usr->ref_count); + usr->qddev = NULL; + mutex_unlock(&qddev->users_mutex); + synchronize_srcu(&usr->qddev_lock); + kref_put(&usr->ref_count, free_usr); + mutex_lock(&qddev->users_mutex); + } + mutex_unlock(&qddev->users_mutex); + + if (qddev->ddev) { + drm_dev_unregister(qddev->ddev); + drm_dev_put(qddev->ddev); + } + + kfree(qddev); +} + +static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) +{ + struct qaic_device *qdev; + u16 major, minor; + int ret; + + /* + * Invoking this function indicates that the control channel to the + * device is available. We use that as a signal to indicate that + * the device side firmware has booted. The device side firmware + * manages the device resources, so we need to communicate with it + * via the control channel in order to utilize the device. Therefore + * we wait until this signal to create the drm dev that userspace will + * use to control the device, because without the device side firmware, + * userspace can't do anything useful. + */ + + qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev)); + + qdev->in_reset = false; + + dev_set_drvdata(&mhi_dev->dev, qdev); + qdev->cntl_ch = mhi_dev; + + ret = qaic_control_open(qdev); + if (ret) { + pci_dbg(qdev->pdev, "%s: control_open failed %d\n", __func__, ret); + return ret; + } + + ret = get_cntl_version(qdev, NULL, &major, &minor); + if (ret || major != CNTL_MAJOR || minor > CNTL_MINOR) { + pci_err(qdev->pdev, "%s: Control protocol version (%d.%d) not supported. Supported version is (%d.%d). Ret: %d\n", + __func__, major, minor, CNTL_MAJOR, CNTL_MINOR, ret); + ret = -EINVAL; + goto close_control; + } + + ret = qaic_create_drm_device(qdev, QAIC_NO_PARTITION); + + return ret; + +close_control: + qaic_control_close(qdev); + return ret; +} + +static void qaic_mhi_remove(struct mhi_device *mhi_dev) +{ +/* This is redundant since we have already observed the device crash */ +} + +static void qaic_notify_reset(struct qaic_device *qdev) +{ + int i; + + qdev->in_reset = true; + /* wake up any waiters to avoid waiting for timeouts at sync */ + wake_all_cntl(qdev); + for (i = 0; i < qdev->num_dbc; ++i) + wakeup_dbc(qdev, i); + synchronize_srcu(&qdev->dev_lock); +} + +void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool exit_reset) +{ + int i; + + qaic_notify_reset(qdev); + + /* remove drmdevs to prevent new users from coming in */ + qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION); + + /* start tearing things down */ + for (i = 0; i < qdev->num_dbc; ++i) + release_dbc(qdev, i); + + if (exit_reset) + qdev->in_reset = false; +} + +static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct qaic_device *qdev; + int i; + + qdev = devm_kzalloc(&pdev->dev, sizeof(*qdev), GFP_KERNEL); + if (!qdev) + return NULL; + + if (id->device == PCI_DEV_AIC100) { + qdev->num_dbc = 16; + qdev->dbc = devm_kcalloc(&pdev->dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL); + if (!qdev->dbc) + return NULL; + } + + qdev->cntl_wq = alloc_workqueue("qaic_cntl", WQ_UNBOUND, 0); + if (!qdev->cntl_wq) + return NULL; + + pci_set_drvdata(pdev, qdev); + qdev->pdev = pdev; + + mutex_init(&qdev->cntl_mutex); + INIT_LIST_HEAD(&qdev->cntl_xfer_list); + init_srcu_struct(&qdev->dev_lock); + + for (i = 0; i < qdev->num_dbc; ++i) { + spin_lock_init(&qdev->dbc[i].xfer_lock); + qdev->dbc[i].qdev = qdev; + qdev->dbc[i].id = i; + INIT_LIST_HEAD(&qdev->dbc[i].xfer_list); + init_srcu_struct(&qdev->dbc[i].ch_lock); + init_waitqueue_head(&qdev->dbc[i].dbc_release); + INIT_LIST_HEAD(&qdev->dbc[i].bo_lists); + } + + return qdev; +} + +static void cleanup_qdev(struct qaic_device *qdev) +{ + int i; + + for (i = 0; i < qdev->num_dbc; ++i) + cleanup_srcu_struct(&qdev->dbc[i].ch_lock); + cleanup_srcu_struct(&qdev->dev_lock); + pci_set_drvdata(qdev->pdev, NULL); + destroy_workqueue(qdev->cntl_wq); +} + +static int init_pci(struct qaic_device *qdev, struct pci_dev *pdev) +{ + int bars; + int ret; + + bars = pci_select_bars(pdev, IORESOURCE_MEM); + + /* make sure the device has the expected BARs */ + if (bars != (BIT(0) | BIT(2) | BIT(4))) { + pci_dbg(pdev, "%s: expected BARs 0, 2, and 4 not found in device. Found 0x%x\n", + __func__, bars); + return -EINVAL; + } + + ret = pcim_enable_device(pdev); + if (ret) + return ret; + + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) + return ret; + ret = dma_set_max_seg_size(&pdev->dev, UINT_MAX); + if (ret) + return ret; + + qdev->bar_0 = devm_ioremap_resource(&pdev->dev, &pdev->resource[0]); + if (IS_ERR(qdev->bar_0)) + return PTR_ERR(qdev->bar_0); + + qdev->bar_2 = devm_ioremap_resource(&pdev->dev, &pdev->resource[2]); + if (IS_ERR(qdev->bar_2)) + return PTR_ERR(qdev->bar_2); + + /* Managed release since we use pcim_enable_device above */ + pci_set_master(pdev); + + return 0; +} + +static int init_msi(struct qaic_device *qdev, struct pci_dev *pdev) +{ + int mhi_irq; + int ret; + int i; + + /* Managed release since we use pcim_enable_device */ + ret = pci_alloc_irq_vectors(pdev, 1, 32, PCI_IRQ_MSI); + if (ret < 0) + return ret; + + if (ret < 32) { + pci_err(pdev, "%s: Requested 32 MSIs. Obtained %d MSIs which is less than the 32 required.\n", + __func__, ret); + return -ENODEV; + } + + mhi_irq = pci_irq_vector(pdev, 0); + if (mhi_irq < 0) + return mhi_irq; + + for (i = 0; i < qdev->num_dbc; ++i) { + ret = devm_request_threaded_irq(&pdev->dev, pci_irq_vector(pdev, i + 1), + dbc_irq_handler, dbc_irq_threaded_fn, IRQF_SHARED, + "qaic_dbc", &qdev->dbc[i]); + if (ret) + return ret; + + if (datapath_polling) { + qdev->dbc[i].irq = pci_irq_vector(pdev, i + 1); + disable_irq_nosync(qdev->dbc[i].irq); + INIT_WORK(&qdev->dbc[i].poll_work, irq_polling_work); + } + } + + return mhi_irq; +} + +static int qaic_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct qaic_device *qdev; + int mhi_irq; + int ret; + int i; + + qdev = create_qdev(pdev, id); + if (!qdev) + return -ENOMEM; + + ret = init_pci(qdev, pdev); + if (ret) + goto cleanup_qdev; + + for (i = 0; i < qdev->num_dbc; ++i) + qdev->dbc[i].dbc_base = qdev->bar_2 + QAIC_DBC_OFF(i); + + mhi_irq = init_msi(qdev, pdev); + if (mhi_irq < 0) { + ret = mhi_irq; + goto cleanup_qdev; + } + + qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_0, mhi_irq); + if (IS_ERR(qdev->mhi_cntrl)) { + ret = PTR_ERR(qdev->mhi_cntrl); + goto cleanup_qdev; + } + + return 0; + +cleanup_qdev: + cleanup_qdev(qdev); + return ret; +} + +static void qaic_pci_remove(struct pci_dev *pdev) +{ + struct qaic_device *qdev = pci_get_drvdata(pdev); + + if (!qdev) + return; + + qaic_dev_reset_clean_local_state(qdev, false); + qaic_mhi_free_controller(qdev->mhi_cntrl, link_up); + cleanup_qdev(qdev); +} + +static void qaic_pci_shutdown(struct pci_dev *pdev) +{ + /* see qaic_exit for what link_up is doing */ + link_up = true; + qaic_pci_remove(pdev); +} + +static pci_ers_result_t qaic_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t error) +{ + return PCI_ERS_RESULT_NEED_RESET; +} + +static void qaic_pci_reset_prepare(struct pci_dev *pdev) +{ + struct qaic_device *qdev = pci_get_drvdata(pdev); + + qaic_notify_reset(qdev); + qaic_mhi_start_reset(qdev->mhi_cntrl); + qaic_dev_reset_clean_local_state(qdev, false); +} + +static void qaic_pci_reset_done(struct pci_dev *pdev) +{ + struct qaic_device *qdev = pci_get_drvdata(pdev); + + qdev->in_reset = false; + qaic_mhi_reset_done(qdev->mhi_cntrl); +} + +static const struct mhi_device_id qaic_mhi_match_table[] = { + { .chan = "QAIC_CONTROL", }, + {}, +}; + +static struct mhi_driver qaic_mhi_driver = { + .id_table = qaic_mhi_match_table, + .remove = qaic_mhi_remove, + .probe = qaic_mhi_probe, + .ul_xfer_cb = qaic_mhi_ul_xfer_cb, + .dl_xfer_cb = qaic_mhi_dl_xfer_cb, + .driver = { + .name = "qaic_mhi", + }, +}; + +static const struct pci_device_id qaic_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_QCOM, PCI_DEV_AIC100), }, + { } +}; +MODULE_DEVICE_TABLE(pci, qaic_ids); + +static const struct pci_error_handlers qaic_pci_err_handler = { + .error_detected = qaic_pci_error_detected, + .reset_prepare = qaic_pci_reset_prepare, + .reset_done = qaic_pci_reset_done, +}; + +static struct pci_driver qaic_pci_driver = { + .name = QAIC_NAME, + .id_table = qaic_ids, + .probe = qaic_pci_probe, + .remove = qaic_pci_remove, + .shutdown = qaic_pci_shutdown, + .err_handler = &qaic_pci_err_handler, +}; + +static int __init qaic_init(void) +{ + int ret; + + ret = mhi_driver_register(&qaic_mhi_driver); + if (ret) { + pr_debug("qaic: mhi_driver_register failed %d\n", ret); + return ret; + } + + ret = pci_register_driver(&qaic_pci_driver); + if (ret) { + pr_debug("qaic: pci_register_driver failed %d\n", ret); + goto free_mhi; + } + + ret = mhi_qaic_ctrl_init(); + if (ret) { + pr_debug("qaic: mhi_qaic_ctrl_init failed %d\n", ret); + goto free_pci; + } + + return 0; + +free_pci: + pci_unregister_driver(&qaic_pci_driver); +free_mhi: + mhi_driver_unregister(&qaic_mhi_driver); + return ret; +} + +static void __exit qaic_exit(void) +{ + /* + * We assume that qaic_pci_remove() is called due to a hotplug event + * which would mean that the link is down, and thus + * qaic_mhi_free_controller() should not try to access the device during + * cleanup. + * We call pci_unregister_driver() below, which also triggers + * qaic_pci_remove(), but since this is module exit, we expect the link + * to the device to be up, in which case qaic_mhi_free_controller() + * should try to access the device during cleanup to put the device in + * a sane state. + * For that reason, we set link_up here to let qaic_mhi_free_controller + * know the expected link state. Since the module is going to be + * removed at the end of this, we don't need to worry about + * reinitializing the link_up state after the cleanup is done. + */ + link_up = true; + mhi_qaic_ctrl_deinit(); + pci_unregister_driver(&qaic_pci_driver); + mhi_driver_unregister(&qaic_mhi_driver); +} + +module_init(qaic_init); +module_exit(qaic_exit); + +MODULE_AUTHOR(QAIC_DESC " Kernel Driver Team"); +MODULE_DESCRIPTION(QAIC_DESC " Accel Driver"); +MODULE_LICENSE("GPL"); diff --git a/include/uapi/drm/qaic_accel.h b/include/uapi/drm/qaic_accel.h new file mode 100644 index 000000000000..2d348744a853 --- /dev/null +++ b/include/uapi/drm/qaic_accel.h @@ -0,0 +1,397 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + * + * Copyright (c) 2019-2020, The Linux Foundation. All rights reserved. + * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef QAIC_ACCEL_H_ +#define QAIC_ACCEL_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* The length(4K) includes len and count fields of qaic_manage_msg */ +#define QAIC_MANAGE_MAX_MSG_LENGTH SZ_4K + +/* semaphore flags */ +#define QAIC_SEM_INSYNCFENCE 2 +#define QAIC_SEM_OUTSYNCFENCE 1 + +/* Semaphore commands */ +#define QAIC_SEM_NOP 0 +#define QAIC_SEM_INIT 1 +#define QAIC_SEM_INC 2 +#define QAIC_SEM_DEC 3 +#define QAIC_SEM_WAIT_EQUAL 4 +#define QAIC_SEM_WAIT_GT_EQ 5 /* Greater than or equal */ +#define QAIC_SEM_WAIT_GT_0 6 /* Greater than 0 */ + +#define QAIC_TRANS_UNDEFINED 0 +#define QAIC_TRANS_PASSTHROUGH_FROM_USR 1 +#define QAIC_TRANS_PASSTHROUGH_TO_USR 2 +#define QAIC_TRANS_PASSTHROUGH_FROM_DEV 3 +#define QAIC_TRANS_PASSTHROUGH_TO_DEV 4 +#define QAIC_TRANS_DMA_XFER_FROM_USR 5 +#define QAIC_TRANS_DMA_XFER_TO_DEV 6 +#define QAIC_TRANS_ACTIVATE_FROM_USR 7 +#define QAIC_TRANS_ACTIVATE_FROM_DEV 8 +#define QAIC_TRANS_ACTIVATE_TO_DEV 9 +#define QAIC_TRANS_DEACTIVATE_FROM_USR 10 +#define QAIC_TRANS_DEACTIVATE_FROM_DEV 11 +#define QAIC_TRANS_STATUS_FROM_USR 12 +#define QAIC_TRANS_STATUS_TO_USR 13 +#define QAIC_TRANS_STATUS_FROM_DEV 14 +#define QAIC_TRANS_STATUS_TO_DEV 15 +#define QAIC_TRANS_TERMINATE_FROM_DEV 16 +#define QAIC_TRANS_TERMINATE_TO_DEV 17 +#define QAIC_TRANS_DMA_XFER_CONT 18 +#define QAIC_TRANS_VALIDATE_PARTITION_FROM_DEV 19 +#define QAIC_TRANS_VALIDATE_PARTITION_TO_DEV 20 + +/** + * struct qaic_manage_trans_hdr - Header for a transaction in a manage message. + * @type: In. Identifies this transaction. See QAIC_TRANS_* defines. + * @len: In. Length of this transaction, including this header. + */ +struct qaic_manage_trans_hdr { + __u32 type; + __u32 len; +}; + +/** + * struct qaic_manage_trans_passthrough - Defines a passthrough transaction. + * @hdr: In. Header to identify this transaction. + * @data: In. Payload of this ransaction. Opaque to the driver. Userspace must + * encode in little endian and align/pad to 64-bit. + */ +struct qaic_manage_trans_passthrough { + struct qaic_manage_trans_hdr hdr; + __u8 data[]; +}; + +/** + * struct qaic_manage_trans_dma_xfer - Defines a DMA transfer transaction. + * @hdr: In. Header to identify this transaction. + * @tag: In. Identified this transfer in other transactions. Opaque to the + * driver. + * @pad: Structure padding. + * @addr: In. Address of the data to DMA to the device. + * @size: In. Length of the data to DMA to the device. + */ +struct qaic_manage_trans_dma_xfer { + struct qaic_manage_trans_hdr hdr; + __u32 tag; + __u32 pad; + __u64 addr; + __u64 size; +}; + +/** + * struct qaic_manage_trans_activate_to_dev - Defines an activate request. + * @hdr: In. Header to identify this transaction. + * @queue_size: In. Number of elements for DBC request and response queues. + * @eventfd: Unused. + * @options: In. Device specific options for this activate. + * @pad: Structure padding. Must be 0. + */ +struct qaic_manage_trans_activate_to_dev { + struct qaic_manage_trans_hdr hdr; + __u32 queue_size; + __u32 eventfd; + __u32 options; + __u32 pad; +}; + +/** + * struct qaic_manage_trans_activate_from_dev - Defines an activate response. + * @hdr: Out. Header to identify this transaction. + * @status: Out. Return code of the request from the device. + * @dbc_id: Out. Id of the assigned DBC for successful request. + * @options: Out. Device specific options for this activate. + */ +struct qaic_manage_trans_activate_from_dev { + struct qaic_manage_trans_hdr hdr; + __u32 status; + __u32 dbc_id; + __u64 options; +}; + +/** + * struct qaic_manage_trans_deactivate - Defines a deactivate request. + * @hdr: In. Header to identify this transaction. + * @dbc_id: In. Id of assigned DBC. + * @pad: Structure padding. Must be 0. + */ +struct qaic_manage_trans_deactivate { + struct qaic_manage_trans_hdr hdr; + __u32 dbc_id; + __u32 pad; +}; + +/** + * struct qaic_manage_trans_status_to_dev - Defines a status request. + * @hdr: In. Header to identify this transaction. + */ +struct qaic_manage_trans_status_to_dev { + struct qaic_manage_trans_hdr hdr; +}; + +/** + * struct qaic_manage_trans_status_from_dev - Defines a status response. + * @hdr: Out. Header to identify this transaction. + * @major: Out. NNC protocol version major number. + * @minor: Out. NNC protocol version minor number. + * @status: Out. Return code from device. + * @status_flags: Out. Flags from device. Bit 0 indicates if CRCs are required. + */ +struct qaic_manage_trans_status_from_dev { + struct qaic_manage_trans_hdr hdr; + __u16 major; + __u16 minor; + __u32 status; + __u64 status_flags; +}; + +/** + * struct qaic_manage_msg - Defines a message to the device. + * @len: In. Length of all the transactions contained within this message. + * @count: In. Number of transactions in this message. + * @data: In. Address to an array where the transactions can be found. + */ +struct qaic_manage_msg { + __u32 len; + __u32 count; + __u64 data; +}; + +/** + * struct qaic_create_bo - Defines a request to create a buffer object. + * @size: In. Size of the buffer in bytes. + * @handle: Out. GEM handle for the BO. + * @pad: Structure padding. Must be 0. + */ +struct qaic_create_bo { + __u64 size; + __u32 handle; + __u32 pad; +}; + +/** + * struct qaic_mmap_bo - Defines a request to prepare a BO for mmap(). + * @handle: In. Handle of the GEM BO to prepare for mmap(). + * @pad: Structure padding. Must be 0. + * @offset: Out. Offset value to provide to mmap(). + */ +struct qaic_mmap_bo { + __u32 handle; + __u32 pad; + __u64 offset; +}; + +/** + * struct qaic_sem - Defines a semaphore command for a BO slice. + * @val: In. Only lower 12 bits are valid. + * @index: In. Only lower 5 bits are valid. + * @presync: In. 1 if presync operation, 0 if postsync. + * @cmd: In. One of QAIC_SEM_*. + * @flags: In. Bitfield. See QAIC_SEM_INSYNCFENCE and QAIC_SEM_OUTSYNCFENCE + * @pad: Structure padding. Must be 0. + */ +struct qaic_sem { + __u16 val; + __u8 index; + __u8 presync; + __u8 cmd; + __u8 flags; + __u16 pad; +}; + +/** + * struct qaic_attach_slice_entry - Defines a single BO slice. + * @size: In. Size of this slice in bytes. + * @sem0: In. Semaphore command 0. Must be 0 is not valid. + * @sem1: In. Semaphore command 1. Must be 0 is not valid. + * @sem2: In. Semaphore command 2. Must be 0 is not valid. + * @sem3: In. Semaphore command 3. Must be 0 is not valid. + * @dev_addr: In. Device address this slice pushes to or pulls from. + * @db_addr: In. Address of the doorbell to ring. + * @db_data: In. Data to write to the doorbell. + * @db_len: In. Size of the doorbell data in bits - 32, 16, or 8. 0 is for + * inactive doorbells. + * @offset: In. Start of this slice as an offset from the start of the BO. + */ +struct qaic_attach_slice_entry { + __u64 size; + struct qaic_sem sem0; + struct qaic_sem sem1; + struct qaic_sem sem2; + struct qaic_sem sem3; + __u64 dev_addr; + __u64 db_addr; + __u32 db_data; + __u32 db_len; + __u64 offset; +}; + +/** + * struct qaic_attach_slice_hdr - Defines metadata for a set of BO slices. + * @count: In. Number of slices for this BO. + * @dbc_id: In. Associate the sliced BO with this DBC. + * @handle: In. GEM handle of the BO to slice. + * @dir: In. Direction of data flow. 1 = DMA_TO_DEVICE, 2 = DMA_FROM_DEVICE + * @size: In. Total length of the BO. + * If BO is imported (DMABUF/PRIME) then this size + * should not exceed the size of DMABUF provided. + * If BO is allocated using DRM_IOCTL_QAIC_CREATE_BO + * then this size should be exactly same as the size + * provided during DRM_IOCTL_QAIC_CREATE_BO. + * @dev_addr: In. Device address this slice pushes to or pulls from. + * @db_addr: In. Address of the doorbell to ring. + * @db_data: In. Data to write to the doorbell. + * @db_len: In. Size of the doorbell data in bits - 32, 16, or 8. 0 is for + * inactive doorbells. + * @offset: In. Start of this slice as an offset from the start of the BO. + */ +struct qaic_attach_slice_hdr { + __u32 count; + __u32 dbc_id; + __u32 handle; + __u32 dir; + __u64 size; +}; + +/** + * struct qaic_attach_slice - Defines a set of BO slices. + * @hdr: In. Metadata of the set of slices. + * @data: In. Pointer to an array containing the slice definitions. + */ +struct qaic_attach_slice { + struct qaic_attach_slice_hdr hdr; + __u64 data; +}; + +/** + * struct qaic_execute_entry - Defines a BO to submit to the device. + * @handle: In. GEM handle of the BO to commit to the device. + * @dir: In. Direction of data. 1 = to device, 2 = from device. + */ +struct qaic_execute_entry { + __u32 handle; + __u32 dir; +}; + +/** + * struct qaic_partial_execute_entry - Defines a BO to resize and submit. + * @handle: In. GEM handle of the BO to commit to the device. + * @dir: In. Direction of data. 1 = to device, 2 = from device. + * @resize: In. New size of the BO. Must be <= the original BO size. 0 is + * short for no resize. + */ +struct qaic_partial_execute_entry { + __u32 handle; + __u32 dir; + __u64 resize; +}; + +/** + * struct qaic_execute_hdr - Defines metadata for BO submission. + * @count: In. Number of BOs to submit. + * @dbc_id: In. DBC to submit the BOs on. + */ +struct qaic_execute_hdr { + __u32 count; + __u32 dbc_id; +}; + +/** + * struct qaic_execute - Defines a list of BOs to submit to the device. + * @hdr: In. BO list metadata. + * @data: In. Pointer to an array of BOs to submit. + */ +struct qaic_execute { + struct qaic_execute_hdr hdr; + __u64 data; +}; + +/** + * struct qaic_wait - Defines a blocking wait for BO execution. + * @handle: In. GEM handle of the BO to wait on. + * @timeout: In. Maximum time in ms to wait for the BO. + * @dbc_id: In. DBC the BO is submitted to. + * @pad: Structure padding. Must be 0. + */ +struct qaic_wait { + __u32 handle; + __u32 timeout; + __u32 dbc_id; + __u32 pad; +}; + +/** + * struct qaic_perf_stats_hdr - Defines metadata for getting BO perf info. + * @count: In. Number of BOs requested. + * @pad: Structure padding. Must be 0. + * @dbc_id: In. DBC the BO are associated with. + */ +struct qaic_perf_stats_hdr { + __u16 count; + __u16 pad; + __u32 dbc_id; +}; + +/** + * struct qaic_perf_stats - Defines a request for getting BO perf info. + * @hdr: In. Request metadata + * @data: In. Pointer to array of stats structures that will receive the data. + */ +struct qaic_perf_stats { + struct qaic_perf_stats_hdr hdr; + __u64 data; +}; + +/** + * struct qaic_perf_stats_entry - Defines a BO perf info. + * @handle: In. GEM handle of the BO to get perf stats for. + * @queue_level_before: Out. Number of elements in the queue before this BO + * was submitted. + * @num_queue_element: Out. Number of elements added to the queue to submit + * this BO. + * @submit_latency_us: Out. Time taken by the driver to submit this BO. + * @device_latency_us: Out. Time taken by the device to execute this BO. + * @pad: Structure padding. Must be 0. + */ +struct qaic_perf_stats_entry { + __u32 handle; + __u32 queue_level_before; + __u32 num_queue_element; + __u32 submit_latency_us; + __u32 device_latency_us; + __u32 pad; +}; + +#define DRM_QAIC_MANAGE 0x00 +#define DRM_QAIC_CREATE_BO 0x01 +#define DRM_QAIC_MMAP_BO 0x02 +#define DRM_QAIC_ATTACH_SLICE_BO 0x03 +#define DRM_QAIC_EXECUTE_BO 0x04 +#define DRM_QAIC_PARTIAL_EXECUTE_BO 0x05 +#define DRM_QAIC_WAIT_BO 0x06 +#define DRM_QAIC_PERF_STATS_BO 0x07 + +#define DRM_IOCTL_QAIC_MANAGE DRM_IOWR(DRM_COMMAND_BASE + DRM_QAIC_MANAGE, struct qaic_manage_msg) +#define DRM_IOCTL_QAIC_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_QAIC_CREATE_BO, struct qaic_create_bo) +#define DRM_IOCTL_QAIC_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_QAIC_MMAP_BO, struct qaic_mmap_bo) +#define DRM_IOCTL_QAIC_ATTACH_SLICE_BO DRM_IOW(DRM_COMMAND_BASE + DRM_QAIC_ATTACH_SLICE_BO, struct qaic_attach_slice) +#define DRM_IOCTL_QAIC_EXECUTE_BO DRM_IOW(DRM_COMMAND_BASE + DRM_QAIC_EXECUTE_BO, struct qaic_execute) +#define DRM_IOCTL_QAIC_PARTIAL_EXECUTE_BO DRM_IOW(DRM_COMMAND_BASE + DRM_QAIC_PARTIAL_EXECUTE_BO, struct qaic_execute) +#define DRM_IOCTL_QAIC_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_QAIC_WAIT_BO, struct qaic_wait) +#define DRM_IOCTL_QAIC_PERF_STATS_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_QAIC_PERF_STATS_BO, struct qaic_perf_stats) + +#if defined(__cplusplus) +} +#endif + +#endif /* QAIC_ACCEL_H_ */ -- cgit v1.2.3 From a36bf7af868b4b84f026a752671df66f0809bd3d Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 27 Mar 2023 09:54:52 -0600 Subject: accel/qaic: Add MHI controller An AIC100 device contains a MHI interface with a number of different channels for controlling different aspects of the device. The MHI controller works with the MHI bus to enable and drive that interface. AIC100 uses the BHI protocol in PBL to load SBL. The MHI controller expects the SBL to be located at /lib/firmware/qcom/aic100/sbl.bin and expects the MHI bus to manage the process of loading and sending SBL to the device. Signed-off-by: Jeffrey Hugo Reviewed-by: Carl Vanderlip Reviewed-by: Pranjal Ramajor Asha Kanojiya Reviewed-by: Stanislaw Gruszka Reviewed-by: Jacek Lawrynowicz Acked-by: Oded Gabbay Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/1679932497-30277-4-git-send-email-quic_jhugo@quicinc.com --- drivers/accel/qaic/mhi_controller.c | 563 ++++++++++++++++++++++++++++++++++++ drivers/accel/qaic/mhi_controller.h | 16 + 2 files changed, 579 insertions(+) create mode 100644 drivers/accel/qaic/mhi_controller.c create mode 100644 drivers/accel/qaic/mhi_controller.h diff --git a/drivers/accel/qaic/mhi_controller.c b/drivers/accel/qaic/mhi_controller.c new file mode 100644 index 000000000000..5036e58e7235 --- /dev/null +++ b/drivers/accel/qaic/mhi_controller.c @@ -0,0 +1,563 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. */ +/* Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ + +#include +#include +#include +#include +#include +#include +#include + +#include "mhi_controller.h" +#include "qaic.h" + +#define MAX_RESET_TIME_SEC 25 + +static unsigned int mhi_timeout_ms = 2000; /* 2 sec default */ +module_param(mhi_timeout_ms, uint, 0600); +MODULE_PARM_DESC(mhi_timeout_ms, "MHI controller timeout value"); + +static struct mhi_channel_config aic100_channels[] = { + { + .name = "QAIC_LOOPBACK", + .num = 0, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_LOOPBACK", + .num = 1, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_SAHARA", + .num = 2, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_SBL, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_SAHARA", + .num = 3, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_SBL, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_DIAG", + .num = 4, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_DIAG", + .num = 5, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_SSR", + .num = 6, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_SSR", + .num = 7, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_QDSS", + .num = 8, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_QDSS", + .num = 9, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_CONTROL", + .num = 10, + .num_elements = 128, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_CONTROL", + .num = 11, + .num_elements = 128, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_LOGGING", + .num = 12, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_SBL, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_LOGGING", + .num = 13, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_SBL, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_STATUS", + .num = 14, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_STATUS", + .num = 15, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_TELEMETRY", + .num = 16, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_TELEMETRY", + .num = 17, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_DEBUG", + .num = 18, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_DEBUG", + .num = 19, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "QAIC_TIMESYNC", + .num = 20, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_SBL | MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .num = 21, + .name = "QAIC_TIMESYNC", + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_SBL | MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, +}; + +static struct mhi_event_config aic100_events[] = { + { + .num_elements = 32, + .irq_moderation_ms = 0, + .irq = 0, + .channel = U32_MAX, + .priority = 1, + .mode = MHI_DB_BRST_DISABLE, + .data_type = MHI_ER_CTRL, + .hardware_event = false, + .client_managed = false, + .offload_channel = false, + }, +}; + +static struct mhi_controller_config aic100_config = { + .max_channels = 128, + .timeout_ms = 0, /* controlled by mhi_timeout */ + .buf_len = 0, + .num_channels = ARRAY_SIZE(aic100_channels), + .ch_cfg = aic100_channels, + .num_events = ARRAY_SIZE(aic100_events), + .event_cfg = aic100_events, + .use_bounce_buf = false, + .m2_no_db = false, +}; + +static int mhi_read_reg(struct mhi_controller *mhi_cntrl, void __iomem *addr, u32 *out) +{ + u32 tmp = readl_relaxed(addr); + + if (tmp == U32_MAX) + return -EIO; + + *out = tmp; + + return 0; +} + +static void mhi_write_reg(struct mhi_controller *mhi_cntrl, void __iomem *addr, u32 val) +{ + writel_relaxed(val, addr); +} + +static int mhi_runtime_get(struct mhi_controller *mhi_cntrl) +{ + return 0; +} + +static void mhi_runtime_put(struct mhi_controller *mhi_cntrl) +{ +} + +static void mhi_status_cb(struct mhi_controller *mhi_cntrl, enum mhi_callback reason) +{ + struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_cntrl->cntrl_dev)); + + /* this event occurs in atomic context */ + if (reason == MHI_CB_FATAL_ERROR) + pci_err(qdev->pdev, "Fatal error received from device. Attempting to recover\n"); + /* this event occurs in non-atomic context */ + if (reason == MHI_CB_SYS_ERROR) + qaic_dev_reset_clean_local_state(qdev, true); +} + +static int mhi_reset_and_async_power_up(struct mhi_controller *mhi_cntrl) +{ + u8 time_sec = 1; + int current_ee; + int ret; + + /* Reset the device to bring the device in PBL EE */ + mhi_soc_reset(mhi_cntrl); + + /* + * Keep checking the execution environment(EE) after every 1 second + * interval. + */ + do { + msleep(1000); + current_ee = mhi_get_exec_env(mhi_cntrl); + } while (current_ee != MHI_EE_PBL && time_sec++ <= MAX_RESET_TIME_SEC); + + /* If the device is in PBL EE retry power up */ + if (current_ee == MHI_EE_PBL) + ret = mhi_async_power_up(mhi_cntrl); + else + ret = -EIO; + + return ret; +} + +struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, void __iomem *mhi_bar, + int mhi_irq) +{ + struct mhi_controller *mhi_cntrl; + int ret; + + mhi_cntrl = devm_kzalloc(&pci_dev->dev, sizeof(*mhi_cntrl), GFP_KERNEL); + if (!mhi_cntrl) + return ERR_PTR(-ENOMEM); + + mhi_cntrl->cntrl_dev = &pci_dev->dev; + + /* + * Covers the entire possible physical ram region. Remote side is + * going to calculate a size of this range, so subtract 1 to prevent + * rollover. + */ + mhi_cntrl->iova_start = 0; + mhi_cntrl->iova_stop = PHYS_ADDR_MAX - 1; + mhi_cntrl->status_cb = mhi_status_cb; + mhi_cntrl->runtime_get = mhi_runtime_get; + mhi_cntrl->runtime_put = mhi_runtime_put; + mhi_cntrl->read_reg = mhi_read_reg; + mhi_cntrl->write_reg = mhi_write_reg; + mhi_cntrl->regs = mhi_bar; + mhi_cntrl->reg_len = SZ_4K; + mhi_cntrl->nr_irqs = 1; + mhi_cntrl->irq = devm_kmalloc(&pci_dev->dev, sizeof(*mhi_cntrl->irq), GFP_KERNEL); + + if (!mhi_cntrl->irq) + return ERR_PTR(-ENOMEM); + + mhi_cntrl->irq[0] = mhi_irq; + mhi_cntrl->fw_image = "qcom/aic100/sbl.bin"; + + /* use latest configured timeout */ + aic100_config.timeout_ms = mhi_timeout_ms; + ret = mhi_register_controller(mhi_cntrl, &aic100_config); + if (ret) { + pci_err(pci_dev, "mhi_register_controller failed %d\n", ret); + return ERR_PTR(ret); + } + + ret = mhi_prepare_for_power_up(mhi_cntrl); + if (ret) { + pci_err(pci_dev, "mhi_prepare_for_power_up failed %d\n", ret); + goto prepare_power_up_fail; + } + + ret = mhi_async_power_up(mhi_cntrl); + /* + * If EIO is returned it is possible that device is in SBL EE, which is + * undesired. SOC reset the device and try to power up again. + */ + if (ret == -EIO && MHI_EE_SBL == mhi_get_exec_env(mhi_cntrl)) { + pci_err(pci_dev, "Found device in SBL at MHI init. Attempting a reset.\n"); + ret = mhi_reset_and_async_power_up(mhi_cntrl); + } + + if (ret) { + pci_err(pci_dev, "mhi_async_power_up failed %d\n", ret); + goto power_up_fail; + } + + return mhi_cntrl; + +power_up_fail: + mhi_unprepare_after_power_down(mhi_cntrl); +prepare_power_up_fail: + mhi_unregister_controller(mhi_cntrl); + return ERR_PTR(ret); +} + +void qaic_mhi_free_controller(struct mhi_controller *mhi_cntrl, bool link_up) +{ + mhi_power_down(mhi_cntrl, link_up); + mhi_unprepare_after_power_down(mhi_cntrl); + mhi_unregister_controller(mhi_cntrl); +} + +void qaic_mhi_start_reset(struct mhi_controller *mhi_cntrl) +{ + mhi_power_down(mhi_cntrl, true); +} + +void qaic_mhi_reset_done(struct mhi_controller *mhi_cntrl) +{ + struct pci_dev *pci_dev = container_of(mhi_cntrl->cntrl_dev, struct pci_dev, dev); + int ret; + + ret = mhi_async_power_up(mhi_cntrl); + if (ret) + pci_err(pci_dev, "mhi_async_power_up failed after reset %d\n", ret); +} diff --git a/drivers/accel/qaic/mhi_controller.h b/drivers/accel/qaic/mhi_controller.h new file mode 100644 index 000000000000..2ae45d768e24 --- /dev/null +++ b/drivers/accel/qaic/mhi_controller.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * + * Copyright (c) 2019-2020, The Linux Foundation. All rights reserved. + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef MHICONTROLLERQAIC_H_ +#define MHICONTROLLERQAIC_H_ + +struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, void __iomem *mhi_bar, + int mhi_irq); +void qaic_mhi_free_controller(struct mhi_controller *mhi_cntrl, bool link_up); +void qaic_mhi_start_reset(struct mhi_controller *mhi_cntrl); +void qaic_mhi_reset_done(struct mhi_controller *mhi_cntrl); + +#endif /* MHICONTROLLERQAIC_H_ */ -- cgit v1.2.3 From 129776ac2e38231fa9c02ce20e116c99de291666 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 27 Mar 2023 09:54:53 -0600 Subject: accel/qaic: Add control path Add the control path component that talks to the management processor (QSM) to load workloads onto the AIC100 device. This implements the KMD portion of the NNC protocol over the QAIC_CONTROL MHI channel and the DRM_IOCTL_QAIC_MANAGE IOCTL to userspace. With this functionality, QAIC clients are able to load, run, and cleanup their workloads on the device but not interact with the workloads (run inferences). Signed-off-by: Jeffrey Hugo Reviewed-by: Carl Vanderlip Reviewed-by: Pranjal Ramajor Asha Kanojiya Reviewed-by: Stanislaw Gruszka Reviewed-by: Jacek Lawrynowicz Acked-by: Oded Gabbay Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/1679932497-30277-5-git-send-email-quic_jhugo@quicinc.com --- drivers/accel/qaic/qaic_control.c | 1526 +++++++++++++++++++++++++++++++++++++ 1 file changed, 1526 insertions(+) create mode 100644 drivers/accel/qaic/qaic_control.c diff --git a/drivers/accel/qaic/qaic_control.c b/drivers/accel/qaic/qaic_control.c new file mode 100644 index 000000000000..9f216eb6f76e --- /dev/null +++ b/drivers/accel/qaic/qaic_control.c @@ -0,0 +1,1526 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. */ +/* Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "qaic.h" + +#define MANAGE_MAGIC_NUMBER ((__force __le32)0x43494151) /* "QAIC" in little endian */ +#define QAIC_DBC_Q_GAP SZ_256 +#define QAIC_DBC_Q_BUF_ALIGN SZ_4K +#define QAIC_MANAGE_EXT_MSG_LENGTH SZ_64K /* Max DMA message length */ +#define QAIC_WRAPPER_MAX_SIZE SZ_4K +#define QAIC_MHI_RETRY_WAIT_MS 100 +#define QAIC_MHI_RETRY_MAX 20 + +static unsigned int control_resp_timeout_s = 60; /* 60 sec default */ +module_param(control_resp_timeout_s, uint, 0600); +MODULE_PARM_DESC(control_resp_timeout_s, "Timeout for NNC responses from QSM"); + +struct manage_msg { + u32 len; + u32 count; + u8 data[]; +}; + +/* + * wire encoding structures for the manage protocol. + * All fields are little endian on the wire + */ +struct wire_msg_hdr { + __le32 crc32; /* crc of everything following this field in the message */ + __le32 magic_number; + __le32 sequence_number; + __le32 len; /* length of this message */ + __le32 count; /* number of transactions in this message */ + __le32 handle; /* unique id to track the resources consumed */ + __le32 partition_id; /* partition id for the request (signed) */ + __le32 padding; /* must be 0 */ +} __packed; + +struct wire_msg { + struct wire_msg_hdr hdr; + u8 data[]; +} __packed; + +struct wire_trans_hdr { + __le32 type; + __le32 len; +} __packed; + +/* Each message sent from driver to device are organized in a list of wrapper_msg */ +struct wrapper_msg { + struct list_head list; + struct kref ref_count; + u32 len; /* length of data to transfer */ + struct wrapper_list *head; + union { + struct wire_msg msg; + struct wire_trans_hdr trans; + }; +}; + +struct wrapper_list { + struct list_head list; + spinlock_t lock; /* Protects the list state during additions and removals */ +}; + +struct wire_trans_passthrough { + struct wire_trans_hdr hdr; + u8 data[]; +} __packed; + +struct wire_addr_size_pair { + __le64 addr; + __le64 size; +} __packed; + +struct wire_trans_dma_xfer { + struct wire_trans_hdr hdr; + __le32 tag; + __le32 count; + __le32 dma_chunk_id; + __le32 padding; + struct wire_addr_size_pair data[]; +} __packed; + +/* Initiated by device to continue the DMA xfer of a large piece of data */ +struct wire_trans_dma_xfer_cont { + struct wire_trans_hdr hdr; + __le32 dma_chunk_id; + __le32 padding; + __le64 xferred_size; +} __packed; + +struct wire_trans_activate_to_dev { + struct wire_trans_hdr hdr; + __le64 req_q_addr; + __le64 rsp_q_addr; + __le32 req_q_size; + __le32 rsp_q_size; + __le32 buf_len; + __le32 options; /* unused, but BIT(16) has meaning to the device */ +} __packed; + +struct wire_trans_activate_from_dev { + struct wire_trans_hdr hdr; + __le32 status; + __le32 dbc_id; + __le64 options; /* unused */ +} __packed; + +struct wire_trans_deactivate_from_dev { + struct wire_trans_hdr hdr; + __le32 status; + __le32 dbc_id; +} __packed; + +struct wire_trans_terminate_to_dev { + struct wire_trans_hdr hdr; + __le32 handle; + __le32 padding; +} __packed; + +struct wire_trans_terminate_from_dev { + struct wire_trans_hdr hdr; + __le32 status; + __le32 padding; +} __packed; + +struct wire_trans_status_to_dev { + struct wire_trans_hdr hdr; +} __packed; + +struct wire_trans_status_from_dev { + struct wire_trans_hdr hdr; + __le16 major; + __le16 minor; + __le32 status; + __le64 status_flags; +} __packed; + +struct wire_trans_validate_part_to_dev { + struct wire_trans_hdr hdr; + __le32 part_id; + __le32 padding; +} __packed; + +struct wire_trans_validate_part_from_dev { + struct wire_trans_hdr hdr; + __le32 status; + __le32 padding; +} __packed; + +struct xfer_queue_elem { + /* + * Node in list of ongoing transfer request on control channel. + * Maintained by root device struct. + */ + struct list_head list; + /* Sequence number of this transfer request */ + u32 seq_num; + /* This is used to wait on until completion of transfer request */ + struct completion xfer_done; + /* Received data from device */ + void *buf; +}; + +struct dma_xfer { + /* Node in list of DMA transfers which is used for cleanup */ + struct list_head list; + /* SG table of memory used for DMA */ + struct sg_table *sgt; + /* Array pages used for DMA */ + struct page **page_list; + /* Number of pages used for DMA */ + unsigned long nr_pages; +}; + +struct ioctl_resources { + /* List of all DMA transfers which is used later for cleanup */ + struct list_head dma_xfers; + /* Base address of request queue which belongs to a DBC */ + void *buf; + /* + * Base bus address of request queue which belongs to a DBC. Response + * queue base bus address can be calculated by adding size of request + * queue to base bus address of request queue. + */ + dma_addr_t dma_addr; + /* Total size of request queue and response queue in byte */ + u32 total_size; + /* Total number of elements that can be queued in each of request and response queue */ + u32 nelem; + /* Base address of response queue which belongs to a DBC */ + void *rsp_q_base; + /* Status of the NNC message received */ + u32 status; + /* DBC id of the DBC received from device */ + u32 dbc_id; + /* + * DMA transfer request messages can be big in size and it may not be + * possible to send them in one shot. In such cases the messages are + * broken into chunks, this field stores ID of such chunks. + */ + u32 dma_chunk_id; + /* Total number of bytes transferred for a DMA xfer request */ + u64 xferred_dma_size; + /* Header of transaction message received from user. Used during DMA xfer request. */ + void *trans_hdr; +}; + +struct resp_work { + struct work_struct work; + struct qaic_device *qdev; + void *buf; +}; + +/* + * Since we're working with little endian messages, its useful to be able to + * increment without filling a whole line with conversions back and forth just + * to add one(1) to a message count. + */ +static __le32 incr_le32(__le32 val) +{ + return cpu_to_le32(le32_to_cpu(val) + 1); +} + +static u32 gen_crc(void *msg) +{ + struct wrapper_list *wrappers = msg; + struct wrapper_msg *w; + u32 crc = ~0; + + list_for_each_entry(w, &wrappers->list, list) + crc = crc32(crc, &w->msg, w->len); + + return crc ^ ~0; +} + +static u32 gen_crc_stub(void *msg) +{ + return 0; +} + +static bool valid_crc(void *msg) +{ + struct wire_msg_hdr *hdr = msg; + bool ret; + u32 crc; + + /* + * The output of this algorithm is always converted to the native + * endianness. + */ + crc = le32_to_cpu(hdr->crc32); + hdr->crc32 = 0; + ret = (crc32(~0, msg, le32_to_cpu(hdr->len)) ^ ~0) == crc; + hdr->crc32 = cpu_to_le32(crc); + return ret; +} + +static bool valid_crc_stub(void *msg) +{ + return true; +} + +static void free_wrapper(struct kref *ref) +{ + struct wrapper_msg *wrapper = container_of(ref, struct wrapper_msg, ref_count); + + list_del(&wrapper->list); + kfree(wrapper); +} + +static void save_dbc_buf(struct qaic_device *qdev, struct ioctl_resources *resources, + struct qaic_user *usr) +{ + u32 dbc_id = resources->dbc_id; + + if (resources->buf) { + wait_event_interruptible(qdev->dbc[dbc_id].dbc_release, !qdev->dbc[dbc_id].in_use); + qdev->dbc[dbc_id].req_q_base = resources->buf; + qdev->dbc[dbc_id].rsp_q_base = resources->rsp_q_base; + qdev->dbc[dbc_id].dma_addr = resources->dma_addr; + qdev->dbc[dbc_id].total_size = resources->total_size; + qdev->dbc[dbc_id].nelem = resources->nelem; + enable_dbc(qdev, dbc_id, usr); + qdev->dbc[dbc_id].in_use = true; + resources->buf = NULL; + } +} + +static void free_dbc_buf(struct qaic_device *qdev, struct ioctl_resources *resources) +{ + if (resources->buf) + dma_free_coherent(&qdev->pdev->dev, resources->total_size, resources->buf, + resources->dma_addr); + resources->buf = NULL; +} + +static void free_dma_xfers(struct qaic_device *qdev, struct ioctl_resources *resources) +{ + struct dma_xfer *xfer; + struct dma_xfer *x; + int i; + + list_for_each_entry_safe(xfer, x, &resources->dma_xfers, list) { + dma_unmap_sgtable(&qdev->pdev->dev, xfer->sgt, DMA_TO_DEVICE, 0); + sg_free_table(xfer->sgt); + kfree(xfer->sgt); + for (i = 0; i < xfer->nr_pages; ++i) + put_page(xfer->page_list[i]); + kfree(xfer->page_list); + list_del(&xfer->list); + kfree(xfer); + } +} + +static struct wrapper_msg *add_wrapper(struct wrapper_list *wrappers, u32 size) +{ + struct wrapper_msg *w = kzalloc(size, GFP_KERNEL); + + if (!w) + return NULL; + list_add_tail(&w->list, &wrappers->list); + kref_init(&w->ref_count); + w->head = wrappers; + return w; +} + +static int encode_passthrough(struct qaic_device *qdev, void *trans, struct wrapper_list *wrappers, + u32 *user_len) +{ + struct qaic_manage_trans_passthrough *in_trans = trans; + struct wire_trans_passthrough *out_trans; + struct wrapper_msg *trans_wrapper; + struct wrapper_msg *wrapper; + struct wire_msg *msg; + u32 msg_hdr_len; + + wrapper = list_first_entry(&wrappers->list, struct wrapper_msg, list); + msg = &wrapper->msg; + msg_hdr_len = le32_to_cpu(msg->hdr.len); + + if (in_trans->hdr.len % 8 != 0) + return -EINVAL; + + if (msg_hdr_len + in_trans->hdr.len > QAIC_MANAGE_EXT_MSG_LENGTH) + return -ENOSPC; + + trans_wrapper = add_wrapper(wrappers, + offsetof(struct wrapper_msg, trans) + in_trans->hdr.len); + if (!trans_wrapper) + return -ENOMEM; + trans_wrapper->len = in_trans->hdr.len; + out_trans = (struct wire_trans_passthrough *)&trans_wrapper->trans; + + memcpy(out_trans->data, in_trans->data, in_trans->hdr.len - sizeof(in_trans->hdr)); + msg->hdr.len = cpu_to_le32(msg_hdr_len + in_trans->hdr.len); + msg->hdr.count = incr_le32(msg->hdr.count); + *user_len += in_trans->hdr.len; + out_trans->hdr.type = cpu_to_le32(QAIC_TRANS_PASSTHROUGH_TO_DEV); + out_trans->hdr.len = cpu_to_le32(in_trans->hdr.len); + + return 0; +} + +/* returns error code for failure, 0 if enough pages alloc'd, 1 if dma_cont is needed */ +static int find_and_map_user_pages(struct qaic_device *qdev, + struct qaic_manage_trans_dma_xfer *in_trans, + struct ioctl_resources *resources, struct dma_xfer *xfer) +{ + unsigned long need_pages; + struct page **page_list; + unsigned long nr_pages; + struct sg_table *sgt; + u64 xfer_start_addr; + int ret; + int i; + + xfer_start_addr = in_trans->addr + resources->xferred_dma_size; + + need_pages = DIV_ROUND_UP(in_trans->size + offset_in_page(xfer_start_addr) - + resources->xferred_dma_size, PAGE_SIZE); + + nr_pages = need_pages; + + while (1) { + page_list = kmalloc_array(nr_pages, sizeof(*page_list), GFP_KERNEL | __GFP_NOWARN); + if (!page_list) { + nr_pages = nr_pages / 2; + if (!nr_pages) + return -ENOMEM; + } else { + break; + } + } + + ret = get_user_pages_fast(xfer_start_addr, nr_pages, 0, page_list); + if (ret < 0 || ret != nr_pages) { + ret = -EFAULT; + goto free_page_list; + } + + sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) { + ret = -ENOMEM; + goto put_pages; + } + + ret = sg_alloc_table_from_pages(sgt, page_list, nr_pages, + offset_in_page(xfer_start_addr), + in_trans->size - resources->xferred_dma_size, GFP_KERNEL); + if (ret) { + ret = -ENOMEM; + goto free_sgt; + } + + ret = dma_map_sgtable(&qdev->pdev->dev, sgt, DMA_TO_DEVICE, 0); + if (ret) + goto free_table; + + xfer->sgt = sgt; + xfer->page_list = page_list; + xfer->nr_pages = nr_pages; + + return need_pages > nr_pages ? 1 : 0; + +free_table: + sg_free_table(sgt); +free_sgt: + kfree(sgt); +put_pages: + for (i = 0; i < nr_pages; ++i) + put_page(page_list[i]); +free_page_list: + kfree(page_list); + return ret; +} + +/* returns error code for failure, 0 if everything was encoded, 1 if dma_cont is needed */ +static int encode_addr_size_pairs(struct dma_xfer *xfer, struct wrapper_list *wrappers, + struct ioctl_resources *resources, u32 msg_hdr_len, u32 *size, + struct wire_trans_dma_xfer **out_trans) +{ + struct wrapper_msg *trans_wrapper; + struct sg_table *sgt = xfer->sgt; + struct wire_addr_size_pair *asp; + struct scatterlist *sg; + struct wrapper_msg *w; + unsigned int dma_len; + u64 dma_chunk_len; + void *boundary; + int nents_dma; + int nents; + int i; + + nents = sgt->nents; + nents_dma = nents; + *size = QAIC_MANAGE_EXT_MSG_LENGTH - msg_hdr_len - sizeof(**out_trans); + for_each_sgtable_sg(sgt, sg, i) { + *size -= sizeof(*asp); + /* Save 1K for possible follow-up transactions. */ + if (*size < SZ_1K) { + nents_dma = i; + break; + } + } + + trans_wrapper = add_wrapper(wrappers, QAIC_WRAPPER_MAX_SIZE); + if (!trans_wrapper) + return -ENOMEM; + *out_trans = (struct wire_trans_dma_xfer *)&trans_wrapper->trans; + + asp = (*out_trans)->data; + boundary = (void *)trans_wrapper + QAIC_WRAPPER_MAX_SIZE; + *size = 0; + + dma_len = 0; + w = trans_wrapper; + dma_chunk_len = 0; + for_each_sg(sgt->sgl, sg, nents_dma, i) { + asp->size = cpu_to_le64(dma_len); + dma_chunk_len += dma_len; + if (dma_len) { + asp++; + if ((void *)asp + sizeof(*asp) > boundary) { + w->len = (void *)asp - (void *)&w->msg; + *size += w->len; + w = add_wrapper(wrappers, QAIC_WRAPPER_MAX_SIZE); + if (!w) + return -ENOMEM; + boundary = (void *)w + QAIC_WRAPPER_MAX_SIZE; + asp = (struct wire_addr_size_pair *)&w->msg; + } + } + asp->addr = cpu_to_le64(sg_dma_address(sg)); + dma_len = sg_dma_len(sg); + } + /* finalize the last segment */ + asp->size = cpu_to_le64(dma_len); + w->len = (void *)asp + sizeof(*asp) - (void *)&w->msg; + *size += w->len; + dma_chunk_len += dma_len; + resources->xferred_dma_size += dma_chunk_len; + + return nents_dma < nents ? 1 : 0; +} + +static void cleanup_xfer(struct qaic_device *qdev, struct dma_xfer *xfer) +{ + int i; + + dma_unmap_sgtable(&qdev->pdev->dev, xfer->sgt, DMA_TO_DEVICE, 0); + sg_free_table(xfer->sgt); + kfree(xfer->sgt); + for (i = 0; i < xfer->nr_pages; ++i) + put_page(xfer->page_list[i]); + kfree(xfer->page_list); +} + +static int encode_dma(struct qaic_device *qdev, void *trans, struct wrapper_list *wrappers, + u32 *user_len, struct ioctl_resources *resources, struct qaic_user *usr) +{ + struct qaic_manage_trans_dma_xfer *in_trans = trans; + struct wire_trans_dma_xfer *out_trans; + struct wrapper_msg *wrapper; + struct dma_xfer *xfer; + struct wire_msg *msg; + bool need_cont_dma; + u32 msg_hdr_len; + u32 size; + int ret; + + wrapper = list_first_entry(&wrappers->list, struct wrapper_msg, list); + msg = &wrapper->msg; + msg_hdr_len = le32_to_cpu(msg->hdr.len); + + if (msg_hdr_len > (UINT_MAX - QAIC_MANAGE_EXT_MSG_LENGTH)) + return -EINVAL; + + /* There should be enough space to hold at least one ASP entry. */ + if (msg_hdr_len + sizeof(*out_trans) + sizeof(struct wire_addr_size_pair) > + QAIC_MANAGE_EXT_MSG_LENGTH) + return -ENOMEM; + + if (in_trans->addr + in_trans->size < in_trans->addr || !in_trans->size) + return -EINVAL; + + xfer = kmalloc(sizeof(*xfer), GFP_KERNEL); + if (!xfer) + return -ENOMEM; + + ret = find_and_map_user_pages(qdev, in_trans, resources, xfer); + if (ret < 0) + goto free_xfer; + + need_cont_dma = (bool)ret; + + ret = encode_addr_size_pairs(xfer, wrappers, resources, msg_hdr_len, &size, &out_trans); + if (ret < 0) + goto cleanup_xfer; + + need_cont_dma = need_cont_dma || (bool)ret; + + msg->hdr.len = cpu_to_le32(msg_hdr_len + size); + msg->hdr.count = incr_le32(msg->hdr.count); + + out_trans->hdr.type = cpu_to_le32(QAIC_TRANS_DMA_XFER_TO_DEV); + out_trans->hdr.len = cpu_to_le32(size); + out_trans->tag = cpu_to_le32(in_trans->tag); + out_trans->count = cpu_to_le32((size - sizeof(*out_trans)) / + sizeof(struct wire_addr_size_pair)); + + *user_len += in_trans->hdr.len; + + if (resources->dma_chunk_id) { + out_trans->dma_chunk_id = cpu_to_le32(resources->dma_chunk_id); + } else if (need_cont_dma) { + while (resources->dma_chunk_id == 0) + resources->dma_chunk_id = atomic_inc_return(&usr->chunk_id); + + out_trans->dma_chunk_id = cpu_to_le32(resources->dma_chunk_id); + } + resources->trans_hdr = trans; + + list_add(&xfer->list, &resources->dma_xfers); + return 0; + +cleanup_xfer: + cleanup_xfer(qdev, xfer); +free_xfer: + kfree(xfer); + return ret; +} + +static int encode_activate(struct qaic_device *qdev, void *trans, struct wrapper_list *wrappers, + u32 *user_len, struct ioctl_resources *resources) +{ + struct qaic_manage_trans_activate_to_dev *in_trans = trans; + struct wire_trans_activate_to_dev *out_trans; + struct wrapper_msg *trans_wrapper; + struct wrapper_msg *wrapper; + struct wire_msg *msg; + dma_addr_t dma_addr; + u32 msg_hdr_len; + void *buf; + u32 nelem; + u32 size; + int ret; + + wrapper = list_first_entry(&wrappers->list, struct wrapper_msg, list); + msg = &wrapper->msg; + msg_hdr_len = le32_to_cpu(msg->hdr.len); + + if (msg_hdr_len + sizeof(*out_trans) > QAIC_MANAGE_MAX_MSG_LENGTH) + return -ENOSPC; + + if (!in_trans->queue_size) + return -EINVAL; + + if (in_trans->pad) + return -EINVAL; + + nelem = in_trans->queue_size; + size = (get_dbc_req_elem_size() + get_dbc_rsp_elem_size()) * nelem; + if (size / nelem != get_dbc_req_elem_size() + get_dbc_rsp_elem_size()) + return -EINVAL; + + if (size + QAIC_DBC_Q_GAP + QAIC_DBC_Q_BUF_ALIGN < size) + return -EINVAL; + + size = ALIGN((size + QAIC_DBC_Q_GAP), QAIC_DBC_Q_BUF_ALIGN); + + buf = dma_alloc_coherent(&qdev->pdev->dev, size, &dma_addr, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + trans_wrapper = add_wrapper(wrappers, + offsetof(struct wrapper_msg, trans) + sizeof(*out_trans)); + if (!trans_wrapper) { + ret = -ENOMEM; + goto free_dma; + } + trans_wrapper->len = sizeof(*out_trans); + out_trans = (struct wire_trans_activate_to_dev *)&trans_wrapper->trans; + + out_trans->hdr.type = cpu_to_le32(QAIC_TRANS_ACTIVATE_TO_DEV); + out_trans->hdr.len = cpu_to_le32(sizeof(*out_trans)); + out_trans->buf_len = cpu_to_le32(size); + out_trans->req_q_addr = cpu_to_le64(dma_addr); + out_trans->req_q_size = cpu_to_le32(nelem); + out_trans->rsp_q_addr = cpu_to_le64(dma_addr + size - nelem * get_dbc_rsp_elem_size()); + out_trans->rsp_q_size = cpu_to_le32(nelem); + out_trans->options = cpu_to_le32(in_trans->options); + + *user_len += in_trans->hdr.len; + msg->hdr.len = cpu_to_le32(msg_hdr_len + sizeof(*out_trans)); + msg->hdr.count = incr_le32(msg->hdr.count); + + resources->buf = buf; + resources->dma_addr = dma_addr; + resources->total_size = size; + resources->nelem = nelem; + resources->rsp_q_base = buf + size - nelem * get_dbc_rsp_elem_size(); + return 0; + +free_dma: + dma_free_coherent(&qdev->pdev->dev, size, buf, dma_addr); + return ret; +} + +static int encode_deactivate(struct qaic_device *qdev, void *trans, + u32 *user_len, struct qaic_user *usr) +{ + struct qaic_manage_trans_deactivate *in_trans = trans; + + if (in_trans->dbc_id >= qdev->num_dbc || in_trans->pad) + return -EINVAL; + + *user_len += in_trans->hdr.len; + + return disable_dbc(qdev, in_trans->dbc_id, usr); +} + +static int encode_status(struct qaic_device *qdev, void *trans, struct wrapper_list *wrappers, + u32 *user_len) +{ + struct qaic_manage_trans_status_to_dev *in_trans = trans; + struct wire_trans_status_to_dev *out_trans; + struct wrapper_msg *trans_wrapper; + struct wrapper_msg *wrapper; + struct wire_msg *msg; + u32 msg_hdr_len; + + wrapper = list_first_entry(&wrappers->list, struct wrapper_msg, list); + msg = &wrapper->msg; + msg_hdr_len = le32_to_cpu(msg->hdr.len); + + if (msg_hdr_len + in_trans->hdr.len > QAIC_MANAGE_MAX_MSG_LENGTH) + return -ENOSPC; + + trans_wrapper = add_wrapper(wrappers, sizeof(*trans_wrapper)); + if (!trans_wrapper) + return -ENOMEM; + + trans_wrapper->len = sizeof(*out_trans); + out_trans = (struct wire_trans_status_to_dev *)&trans_wrapper->trans; + + out_trans->hdr.type = cpu_to_le32(QAIC_TRANS_STATUS_TO_DEV); + out_trans->hdr.len = cpu_to_le32(in_trans->hdr.len); + msg->hdr.len = cpu_to_le32(msg_hdr_len + in_trans->hdr.len); + msg->hdr.count = incr_le32(msg->hdr.count); + *user_len += in_trans->hdr.len; + + return 0; +} + +static int encode_message(struct qaic_device *qdev, struct manage_msg *user_msg, + struct wrapper_list *wrappers, struct ioctl_resources *resources, + struct qaic_user *usr) +{ + struct qaic_manage_trans_hdr *trans_hdr; + struct wrapper_msg *wrapper; + struct wire_msg *msg; + u32 user_len = 0; + int ret; + int i; + + if (!user_msg->count) { + ret = -EINVAL; + goto out; + } + + wrapper = list_first_entry(&wrappers->list, struct wrapper_msg, list); + msg = &wrapper->msg; + + msg->hdr.len = cpu_to_le32(sizeof(msg->hdr)); + + if (resources->dma_chunk_id) { + ret = encode_dma(qdev, resources->trans_hdr, wrappers, &user_len, resources, usr); + msg->hdr.count = cpu_to_le32(1); + goto out; + } + + for (i = 0; i < user_msg->count; ++i) { + if (user_len >= user_msg->len) { + ret = -EINVAL; + break; + } + trans_hdr = (struct qaic_manage_trans_hdr *)(user_msg->data + user_len); + if (user_len + trans_hdr->len > user_msg->len) { + ret = -EINVAL; + break; + } + + switch (trans_hdr->type) { + case QAIC_TRANS_PASSTHROUGH_FROM_USR: + ret = encode_passthrough(qdev, trans_hdr, wrappers, &user_len); + break; + case QAIC_TRANS_DMA_XFER_FROM_USR: + ret = encode_dma(qdev, trans_hdr, wrappers, &user_len, resources, usr); + break; + case QAIC_TRANS_ACTIVATE_FROM_USR: + ret = encode_activate(qdev, trans_hdr, wrappers, &user_len, resources); + break; + case QAIC_TRANS_DEACTIVATE_FROM_USR: + ret = encode_deactivate(qdev, trans_hdr, &user_len, usr); + break; + case QAIC_TRANS_STATUS_FROM_USR: + ret = encode_status(qdev, trans_hdr, wrappers, &user_len); + break; + default: + ret = -EINVAL; + break; + } + + if (ret) + break; + } + + if (user_len != user_msg->len) + ret = -EINVAL; +out: + if (ret) { + free_dma_xfers(qdev, resources); + free_dbc_buf(qdev, resources); + return ret; + } + + return 0; +} + +static int decode_passthrough(struct qaic_device *qdev, void *trans, struct manage_msg *user_msg, + u32 *msg_len) +{ + struct qaic_manage_trans_passthrough *out_trans; + struct wire_trans_passthrough *in_trans = trans; + u32 len; + + out_trans = (void *)user_msg->data + user_msg->len; + + len = le32_to_cpu(in_trans->hdr.len); + if (len % 8 != 0) + return -EINVAL; + + if (user_msg->len + len > QAIC_MANAGE_MAX_MSG_LENGTH) + return -ENOSPC; + + memcpy(out_trans->data, in_trans->data, len - sizeof(in_trans->hdr)); + user_msg->len += len; + *msg_len += len; + out_trans->hdr.type = le32_to_cpu(in_trans->hdr.type); + out_trans->hdr.len = len; + + return 0; +} + +static int decode_activate(struct qaic_device *qdev, void *trans, struct manage_msg *user_msg, + u32 *msg_len, struct ioctl_resources *resources, struct qaic_user *usr) +{ + struct qaic_manage_trans_activate_from_dev *out_trans; + struct wire_trans_activate_from_dev *in_trans = trans; + u32 len; + + out_trans = (void *)user_msg->data + user_msg->len; + + len = le32_to_cpu(in_trans->hdr.len); + if (user_msg->len + len > QAIC_MANAGE_MAX_MSG_LENGTH) + return -ENOSPC; + + user_msg->len += len; + *msg_len += len; + out_trans->hdr.type = le32_to_cpu(in_trans->hdr.type); + out_trans->hdr.len = len; + out_trans->status = le32_to_cpu(in_trans->status); + out_trans->dbc_id = le32_to_cpu(in_trans->dbc_id); + out_trans->options = le64_to_cpu(in_trans->options); + + if (!resources->buf) + /* how did we get an activate response without a request? */ + return -EINVAL; + + if (out_trans->dbc_id >= qdev->num_dbc) + /* + * The device assigned an invalid resource, which should never + * happen. Return an error so the user can try to recover. + */ + return -ENODEV; + + if (out_trans->status) + /* + * Allocating resources failed on device side. This is not an + * expected behaviour, user is expected to handle this situation. + */ + return -ECANCELED; + + resources->status = out_trans->status; + resources->dbc_id = out_trans->dbc_id; + save_dbc_buf(qdev, resources, usr); + + return 0; +} + +static int decode_deactivate(struct qaic_device *qdev, void *trans, u32 *msg_len, + struct qaic_user *usr) +{ + struct wire_trans_deactivate_from_dev *in_trans = trans; + u32 dbc_id = le32_to_cpu(in_trans->dbc_id); + u32 status = le32_to_cpu(in_trans->status); + + if (dbc_id >= qdev->num_dbc) + /* + * The device assigned an invalid resource, which should never + * happen. Inject an error so the user can try to recover. + */ + return -ENODEV; + + if (status) { + /* + * Releasing resources failed on the device side, which puts + * us in a bind since they may still be in use, so enable the + * dbc. User is expected to retry deactivation. + */ + enable_dbc(qdev, dbc_id, usr); + return -ECANCELED; + } + + release_dbc(qdev, dbc_id); + *msg_len += sizeof(*in_trans); + + return 0; +} + +static int decode_status(struct qaic_device *qdev, void *trans, struct manage_msg *user_msg, + u32 *user_len, struct wire_msg *msg) +{ + struct qaic_manage_trans_status_from_dev *out_trans; + struct wire_trans_status_from_dev *in_trans = trans; + u32 len; + + out_trans = (void *)user_msg->data + user_msg->len; + + len = le32_to_cpu(in_trans->hdr.len); + if (user_msg->len + len > QAIC_MANAGE_MAX_MSG_LENGTH) + return -ENOSPC; + + out_trans->hdr.type = QAIC_TRANS_STATUS_FROM_DEV; + out_trans->hdr.len = len; + out_trans->major = le16_to_cpu(in_trans->major); + out_trans->minor = le16_to_cpu(in_trans->minor); + out_trans->status_flags = le64_to_cpu(in_trans->status_flags); + out_trans->status = le32_to_cpu(in_trans->status); + *user_len += le32_to_cpu(in_trans->hdr.len); + user_msg->len += len; + + if (out_trans->status) + return -ECANCELED; + if (out_trans->status_flags & BIT(0) && !valid_crc(msg)) + return -EPIPE; + + return 0; +} + +static int decode_message(struct qaic_device *qdev, struct manage_msg *user_msg, + struct wire_msg *msg, struct ioctl_resources *resources, + struct qaic_user *usr) +{ + u32 msg_hdr_len = le32_to_cpu(msg->hdr.len); + struct wire_trans_hdr *trans_hdr; + u32 msg_len = 0; + int ret; + int i; + + if (msg_hdr_len > QAIC_MANAGE_MAX_MSG_LENGTH) + return -EINVAL; + + user_msg->len = 0; + user_msg->count = le32_to_cpu(msg->hdr.count); + + for (i = 0; i < user_msg->count; ++i) { + trans_hdr = (struct wire_trans_hdr *)(msg->data + msg_len); + if (msg_len + le32_to_cpu(trans_hdr->len) > msg_hdr_len) + return -EINVAL; + + switch (le32_to_cpu(trans_hdr->type)) { + case QAIC_TRANS_PASSTHROUGH_FROM_DEV: + ret = decode_passthrough(qdev, trans_hdr, user_msg, &msg_len); + break; + case QAIC_TRANS_ACTIVATE_FROM_DEV: + ret = decode_activate(qdev, trans_hdr, user_msg, &msg_len, resources, usr); + break; + case QAIC_TRANS_DEACTIVATE_FROM_DEV: + ret = decode_deactivate(qdev, trans_hdr, &msg_len, usr); + break; + case QAIC_TRANS_STATUS_FROM_DEV: + ret = decode_status(qdev, trans_hdr, user_msg, &msg_len, msg); + break; + default: + return -EINVAL; + } + + if (ret) + return ret; + } + + if (msg_len != (msg_hdr_len - sizeof(msg->hdr))) + return -EINVAL; + + return 0; +} + +static void *msg_xfer(struct qaic_device *qdev, struct wrapper_list *wrappers, u32 seq_num, + bool ignore_signal) +{ + struct xfer_queue_elem elem; + struct wire_msg *out_buf; + struct wrapper_msg *w; + int retry_count; + long ret; + + if (qdev->in_reset) { + mutex_unlock(&qdev->cntl_mutex); + return ERR_PTR(-ENODEV); + } + + elem.seq_num = seq_num; + elem.buf = NULL; + init_completion(&elem.xfer_done); + if (likely(!qdev->cntl_lost_buf)) { + /* + * The max size of request to device is QAIC_MANAGE_EXT_MSG_LENGTH. + * The max size of response from device is QAIC_MANAGE_MAX_MSG_LENGTH. + */ + out_buf = kmalloc(QAIC_MANAGE_MAX_MSG_LENGTH, GFP_KERNEL); + if (!out_buf) { + mutex_unlock(&qdev->cntl_mutex); + return ERR_PTR(-ENOMEM); + } + + ret = mhi_queue_buf(qdev->cntl_ch, DMA_FROM_DEVICE, out_buf, + QAIC_MANAGE_MAX_MSG_LENGTH, MHI_EOT); + if (ret) { + mutex_unlock(&qdev->cntl_mutex); + return ERR_PTR(ret); + } + } else { + /* + * we lost a buffer because we queued a recv buf, but then + * queuing the corresponding tx buf failed. To try to avoid + * a memory leak, lets reclaim it and use it for this + * transaction. + */ + qdev->cntl_lost_buf = false; + } + + list_for_each_entry(w, &wrappers->list, list) { + kref_get(&w->ref_count); + retry_count = 0; +retry: + ret = mhi_queue_buf(qdev->cntl_ch, DMA_TO_DEVICE, &w->msg, w->len, + list_is_last(&w->list, &wrappers->list) ? MHI_EOT : MHI_CHAIN); + if (ret) { + if (ret == -EAGAIN && retry_count++ < QAIC_MHI_RETRY_MAX) { + msleep_interruptible(QAIC_MHI_RETRY_WAIT_MS); + if (!signal_pending(current)) + goto retry; + } + + qdev->cntl_lost_buf = true; + kref_put(&w->ref_count, free_wrapper); + mutex_unlock(&qdev->cntl_mutex); + return ERR_PTR(ret); + } + } + + list_add_tail(&elem.list, &qdev->cntl_xfer_list); + mutex_unlock(&qdev->cntl_mutex); + + if (ignore_signal) + ret = wait_for_completion_timeout(&elem.xfer_done, control_resp_timeout_s * HZ); + else + ret = wait_for_completion_interruptible_timeout(&elem.xfer_done, + control_resp_timeout_s * HZ); + /* + * not using _interruptable because we have to cleanup or we'll + * likely cause memory corruption + */ + mutex_lock(&qdev->cntl_mutex); + if (!list_empty(&elem.list)) + list_del(&elem.list); + if (!ret && !elem.buf) + ret = -ETIMEDOUT; + else if (ret > 0 && !elem.buf) + ret = -EIO; + mutex_unlock(&qdev->cntl_mutex); + + if (ret < 0) { + kfree(elem.buf); + return ERR_PTR(ret); + } else if (!qdev->valid_crc(elem.buf)) { + kfree(elem.buf); + return ERR_PTR(-EPIPE); + } + + return elem.buf; +} + +/* Add a transaction to abort the outstanding DMA continuation */ +static int abort_dma_cont(struct qaic_device *qdev, struct wrapper_list *wrappers, u32 dma_chunk_id) +{ + struct wire_trans_dma_xfer *out_trans; + u32 size = sizeof(*out_trans); + struct wrapper_msg *wrapper; + struct wrapper_msg *w; + struct wire_msg *msg; + + wrapper = list_first_entry(&wrappers->list, struct wrapper_msg, list); + msg = &wrapper->msg; + + /* Remove all but the first wrapper which has the msg header */ + list_for_each_entry_safe(wrapper, w, &wrappers->list, list) + if (!list_is_first(&wrapper->list, &wrappers->list)) + kref_put(&wrapper->ref_count, free_wrapper); + + wrapper = add_wrapper(wrappers, offsetof(struct wrapper_msg, trans) + sizeof(*out_trans)); + + if (!wrapper) + return -ENOMEM; + + out_trans = (struct wire_trans_dma_xfer *)&wrapper->trans; + out_trans->hdr.type = cpu_to_le32(QAIC_TRANS_DMA_XFER_TO_DEV); + out_trans->hdr.len = cpu_to_le32(size); + out_trans->tag = cpu_to_le32(0); + out_trans->count = cpu_to_le32(0); + out_trans->dma_chunk_id = cpu_to_le32(dma_chunk_id); + + msg->hdr.len = cpu_to_le32(size + sizeof(*msg)); + msg->hdr.count = cpu_to_le32(1); + wrapper->len = size; + + return 0; +} + +static struct wrapper_list *alloc_wrapper_list(void) +{ + struct wrapper_list *wrappers; + + wrappers = kmalloc(sizeof(*wrappers), GFP_KERNEL); + if (!wrappers) + return NULL; + INIT_LIST_HEAD(&wrappers->list); + spin_lock_init(&wrappers->lock); + + return wrappers; +} + +static int qaic_manage_msg_xfer(struct qaic_device *qdev, struct qaic_user *usr, + struct manage_msg *user_msg, struct ioctl_resources *resources, + struct wire_msg **rsp) +{ + struct wrapper_list *wrappers; + struct wrapper_msg *wrapper; + struct wrapper_msg *w; + bool all_done = false; + struct wire_msg *msg; + int ret; + + wrappers = alloc_wrapper_list(); + if (!wrappers) + return -ENOMEM; + + wrapper = add_wrapper(wrappers, sizeof(*wrapper)); + if (!wrapper) { + kfree(wrappers); + return -ENOMEM; + } + + msg = &wrapper->msg; + wrapper->len = sizeof(*msg); + + ret = encode_message(qdev, user_msg, wrappers, resources, usr); + if (ret && resources->dma_chunk_id) + ret = abort_dma_cont(qdev, wrappers, resources->dma_chunk_id); + if (ret) + goto encode_failed; + + ret = mutex_lock_interruptible(&qdev->cntl_mutex); + if (ret) + goto lock_failed; + + msg->hdr.magic_number = MANAGE_MAGIC_NUMBER; + msg->hdr.sequence_number = cpu_to_le32(qdev->next_seq_num++); + + if (usr) { + msg->hdr.handle = cpu_to_le32(usr->handle); + msg->hdr.partition_id = cpu_to_le32(usr->qddev->partition_id); + } else { + msg->hdr.handle = 0; + msg->hdr.partition_id = cpu_to_le32(QAIC_NO_PARTITION); + } + + msg->hdr.padding = cpu_to_le32(0); + msg->hdr.crc32 = cpu_to_le32(qdev->gen_crc(wrappers)); + + /* msg_xfer releases the mutex */ + *rsp = msg_xfer(qdev, wrappers, qdev->next_seq_num - 1, false); + if (IS_ERR(*rsp)) + ret = PTR_ERR(*rsp); + +lock_failed: + free_dma_xfers(qdev, resources); +encode_failed: + spin_lock(&wrappers->lock); + list_for_each_entry_safe(wrapper, w, &wrappers->list, list) + kref_put(&wrapper->ref_count, free_wrapper); + all_done = list_empty(&wrappers->list); + spin_unlock(&wrappers->lock); + if (all_done) + kfree(wrappers); + + return ret; +} + +static int qaic_manage(struct qaic_device *qdev, struct qaic_user *usr, struct manage_msg *user_msg) +{ + struct wire_trans_dma_xfer_cont *dma_cont = NULL; + struct ioctl_resources resources; + struct wire_msg *rsp = NULL; + int ret; + + memset(&resources, 0, sizeof(struct ioctl_resources)); + + INIT_LIST_HEAD(&resources.dma_xfers); + + if (user_msg->len > QAIC_MANAGE_MAX_MSG_LENGTH || + user_msg->count > QAIC_MANAGE_MAX_MSG_LENGTH / sizeof(struct qaic_manage_trans_hdr)) + return -EINVAL; + +dma_xfer_continue: + ret = qaic_manage_msg_xfer(qdev, usr, user_msg, &resources, &rsp); + if (ret) + return ret; + /* dma_cont should be the only transaction if present */ + if (le32_to_cpu(rsp->hdr.count) == 1) { + dma_cont = (struct wire_trans_dma_xfer_cont *)rsp->data; + if (le32_to_cpu(dma_cont->hdr.type) != QAIC_TRANS_DMA_XFER_CONT) + dma_cont = NULL; + } + if (dma_cont) { + if (le32_to_cpu(dma_cont->dma_chunk_id) == resources.dma_chunk_id && + le64_to_cpu(dma_cont->xferred_size) == resources.xferred_dma_size) { + kfree(rsp); + goto dma_xfer_continue; + } + + ret = -EINVAL; + goto dma_cont_failed; + } + + ret = decode_message(qdev, user_msg, rsp, &resources, usr); + +dma_cont_failed: + free_dbc_buf(qdev, &resources); + kfree(rsp); + return ret; +} + +int qaic_manage_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + struct qaic_manage_msg *user_msg; + struct qaic_device *qdev; + struct manage_msg *msg; + struct qaic_user *usr; + u8 __user *user_data; + int qdev_rcu_id; + int usr_rcu_id; + int ret; + + usr = file_priv->driver_priv; + + usr_rcu_id = srcu_read_lock(&usr->qddev_lock); + if (!usr->qddev) { + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + return -ENODEV; + } + + qdev = usr->qddev->qdev; + + qdev_rcu_id = srcu_read_lock(&qdev->dev_lock); + if (qdev->in_reset) { + srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + return -ENODEV; + } + + user_msg = data; + + if (user_msg->len > QAIC_MANAGE_MAX_MSG_LENGTH) { + ret = -EINVAL; + goto out; + } + + msg = kzalloc(QAIC_MANAGE_MAX_MSG_LENGTH + sizeof(*msg), GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto out; + } + + msg->len = user_msg->len; + msg->count = user_msg->count; + + user_data = u64_to_user_ptr(user_msg->data); + + if (copy_from_user(msg->data, user_data, user_msg->len)) { + ret = -EFAULT; + goto free_msg; + } + + ret = qaic_manage(qdev, usr, msg); + + /* + * If the qaic_manage() is successful then we copy the message onto + * userspace memory but we have an exception for -ECANCELED. + * For -ECANCELED, it means that device has NACKed the message with a + * status error code which userspace would like to know. + */ + if (ret == -ECANCELED || !ret) { + if (copy_to_user(user_data, msg->data, msg->len)) { + ret = -EFAULT; + } else { + user_msg->len = msg->len; + user_msg->count = msg->count; + } + } + +free_msg: + kfree(msg); +out: + srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + return ret; +} + +int get_cntl_version(struct qaic_device *qdev, struct qaic_user *usr, u16 *major, u16 *minor) +{ + struct qaic_manage_trans_status_from_dev *status_result; + struct qaic_manage_trans_status_to_dev *status_query; + struct manage_msg *user_msg; + int ret; + + user_msg = kmalloc(sizeof(*user_msg) + sizeof(*status_result), GFP_KERNEL); + if (!user_msg) { + ret = -ENOMEM; + goto out; + } + user_msg->len = sizeof(*status_query); + user_msg->count = 1; + + status_query = (struct qaic_manage_trans_status_to_dev *)user_msg->data; + status_query->hdr.type = QAIC_TRANS_STATUS_FROM_USR; + status_query->hdr.len = sizeof(status_query->hdr); + + ret = qaic_manage(qdev, usr, user_msg); + if (ret) + goto kfree_user_msg; + status_result = (struct qaic_manage_trans_status_from_dev *)user_msg->data; + *major = status_result->major; + *minor = status_result->minor; + + if (status_result->status_flags & BIT(0)) { /* device is using CRC */ + /* By default qdev->gen_crc is programmed to generate CRC */ + qdev->valid_crc = valid_crc; + } else { + /* By default qdev->valid_crc is programmed to bypass CRC */ + qdev->gen_crc = gen_crc_stub; + } + +kfree_user_msg: + kfree(user_msg); +out: + return ret; +} + +static void resp_worker(struct work_struct *work) +{ + struct resp_work *resp = container_of(work, struct resp_work, work); + struct qaic_device *qdev = resp->qdev; + struct wire_msg *msg = resp->buf; + struct xfer_queue_elem *elem; + struct xfer_queue_elem *i; + bool found = false; + + mutex_lock(&qdev->cntl_mutex); + list_for_each_entry_safe(elem, i, &qdev->cntl_xfer_list, list) { + if (elem->seq_num == le32_to_cpu(msg->hdr.sequence_number)) { + found = true; + list_del_init(&elem->list); + elem->buf = msg; + complete_all(&elem->xfer_done); + break; + } + } + mutex_unlock(&qdev->cntl_mutex); + + if (!found) + /* request must have timed out, drop packet */ + kfree(msg); + + kfree(resp); +} + +static void free_wrapper_from_list(struct wrapper_list *wrappers, struct wrapper_msg *wrapper) +{ + bool all_done = false; + + spin_lock(&wrappers->lock); + kref_put(&wrapper->ref_count, free_wrapper); + all_done = list_empty(&wrappers->list); + spin_unlock(&wrappers->lock); + + if (all_done) + kfree(wrappers); +} + +void qaic_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) +{ + struct wire_msg *msg = mhi_result->buf_addr; + struct wrapper_msg *wrapper = container_of(msg, struct wrapper_msg, msg); + + free_wrapper_from_list(wrapper->head, wrapper); +} + +void qaic_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) +{ + struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev); + struct wire_msg *msg = mhi_result->buf_addr; + struct resp_work *resp; + + if (mhi_result->transaction_status || msg->hdr.magic_number != MANAGE_MAGIC_NUMBER) { + kfree(msg); + return; + } + + resp = kmalloc(sizeof(*resp), GFP_ATOMIC); + if (!resp) { + kfree(msg); + return; + } + + INIT_WORK(&resp->work, resp_worker); + resp->qdev = qdev; + resp->buf = msg; + queue_work(qdev->cntl_wq, &resp->work); +} + +int qaic_control_open(struct qaic_device *qdev) +{ + if (!qdev->cntl_ch) + return -ENODEV; + + qdev->cntl_lost_buf = false; + /* + * By default qaic should assume that device has CRC enabled. + * Qaic comes to know if device has CRC enabled or disabled during the + * device status transaction, which is the first transaction performed + * on control channel. + * + * So CRC validation of first device status transaction response is + * ignored (by calling valid_crc_stub) and is done later during decoding + * if device has CRC enabled. + * Now that qaic knows whether device has CRC enabled or not it acts + * accordingly. + */ + qdev->gen_crc = gen_crc; + qdev->valid_crc = valid_crc_stub; + + return mhi_prepare_for_transfer(qdev->cntl_ch); +} + +void qaic_control_close(struct qaic_device *qdev) +{ + mhi_unprepare_from_transfer(qdev->cntl_ch); +} + +void qaic_release_usr(struct qaic_device *qdev, struct qaic_user *usr) +{ + struct wire_trans_terminate_to_dev *trans; + struct wrapper_list *wrappers; + struct wrapper_msg *wrapper; + struct wire_msg *msg; + struct wire_msg *rsp; + + wrappers = alloc_wrapper_list(); + if (!wrappers) + return; + + wrapper = add_wrapper(wrappers, sizeof(*wrapper) + sizeof(*msg) + sizeof(*trans)); + if (!wrapper) + return; + + msg = &wrapper->msg; + + trans = (struct wire_trans_terminate_to_dev *)msg->data; + + trans->hdr.type = cpu_to_le32(QAIC_TRANS_TERMINATE_TO_DEV); + trans->hdr.len = cpu_to_le32(sizeof(*trans)); + trans->handle = cpu_to_le32(usr->handle); + + mutex_lock(&qdev->cntl_mutex); + wrapper->len = sizeof(msg->hdr) + sizeof(*trans); + msg->hdr.magic_number = MANAGE_MAGIC_NUMBER; + msg->hdr.sequence_number = cpu_to_le32(qdev->next_seq_num++); + msg->hdr.len = cpu_to_le32(wrapper->len); + msg->hdr.count = cpu_to_le32(1); + msg->hdr.handle = cpu_to_le32(usr->handle); + msg->hdr.padding = cpu_to_le32(0); + msg->hdr.crc32 = cpu_to_le32(qdev->gen_crc(wrappers)); + + /* + * msg_xfer releases the mutex + * We don't care about the return of msg_xfer since we will not do + * anything different based on what happens. + * We ignore pending signals since one will be set if the user is + * killed, and we need give the device a chance to cleanup, otherwise + * DMA may still be in progress when we return. + */ + rsp = msg_xfer(qdev, wrappers, qdev->next_seq_num - 1, true); + if (!IS_ERR(rsp)) + kfree(rsp); + free_wrapper_from_list(wrappers, wrapper); +} + +void wake_all_cntl(struct qaic_device *qdev) +{ + struct xfer_queue_elem *elem; + struct xfer_queue_elem *i; + + mutex_lock(&qdev->cntl_mutex); + list_for_each_entry_safe(elem, i, &qdev->cntl_xfer_list, list) { + list_del_init(&elem->list); + complete_all(&elem->xfer_done); + } + mutex_unlock(&qdev->cntl_mutex); +} -- cgit v1.2.3 From ff13be8303336ead5621712f2c55012d738878b5 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 27 Mar 2023 09:54:54 -0600 Subject: accel/qaic: Add datapath Add the datapath component that manages BOs and submits them to running workloads on the qaic device via the dma_bridge hardware. This allows QAIC clients to interact with their workloads (run inferences) via the following ioctls along with mmap(): DRM_IOCTL_QAIC_CREATE_BO DRM_IOCTL_QAIC_MMAP_BO DRM_IOCTL_QAIC_ATTACH_SLICE_BO DRM_IOCTL_QAIC_EXECUTE_BO DRM_IOCTL_QAIC_PARTIAL_EXECUTE_BO DRM_IOCTL_QAIC_WAIT_BO DRM_IOCTL_QAIC_PERF_STATS_BO Signed-off-by: Jeffrey Hugo Reviewed-by: Carl Vanderlip Reviewed-by: Pranjal Ramajor Asha Kanojiya Reviewed-by: Stanislaw Gruszka Reviewed-by: Jacek Lawrynowicz Acked-by: Oded Gabbay Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/1679932497-30277-6-git-send-email-quic_jhugo@quicinc.com --- drivers/accel/qaic/qaic_data.c | 1902 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1902 insertions(+) create mode 100644 drivers/accel/qaic/qaic_data.c diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c new file mode 100644 index 000000000000..c0a574cd1b35 --- /dev/null +++ b/drivers/accel/qaic/qaic_data.c @@ -0,0 +1,1902 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. */ +/* Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "qaic.h" + +#define SEM_VAL_MASK GENMASK_ULL(11, 0) +#define SEM_INDEX_MASK GENMASK_ULL(4, 0) +#define BULK_XFER BIT(3) +#define GEN_COMPLETION BIT(4) +#define INBOUND_XFER 1 +#define OUTBOUND_XFER 2 +#define REQHP_OFF 0x0 /* we read this */ +#define REQTP_OFF 0x4 /* we write this */ +#define RSPHP_OFF 0x8 /* we write this */ +#define RSPTP_OFF 0xc /* we read this */ + +#define ENCODE_SEM(val, index, sync, cmd, flags) \ + ({ \ + FIELD_PREP(GENMASK(11, 0), (val)) | \ + FIELD_PREP(GENMASK(20, 16), (index)) | \ + FIELD_PREP(BIT(22), (sync)) | \ + FIELD_PREP(GENMASK(26, 24), (cmd)) | \ + FIELD_PREP(GENMASK(30, 29), (flags)) | \ + FIELD_PREP(BIT(31), (cmd) ? 1 : 0); \ + }) +#define NUM_EVENTS 128 +#define NUM_DELAYS 10 + +static unsigned int wait_exec_default_timeout_ms = 5000; /* 5 sec default */ +module_param(wait_exec_default_timeout_ms, uint, 0600); +MODULE_PARM_DESC(wait_exec_default_timeout_ms, "Default timeout for DRM_IOCTL_QAIC_WAIT_BO"); + +static unsigned int datapath_poll_interval_us = 100; /* 100 usec default */ +module_param(datapath_poll_interval_us, uint, 0600); +MODULE_PARM_DESC(datapath_poll_interval_us, + "Amount of time to sleep between activity when datapath polling is enabled"); + +struct dbc_req { + /* + * A request ID is assigned to each memory handle going in DMA queue. + * As a single memory handle can enqueue multiple elements in DMA queue + * all of them will have the same request ID. + */ + __le16 req_id; + /* Future use */ + __u8 seq_id; + /* + * Special encoded variable + * 7 0 - Do not force to generate MSI after DMA is completed + * 1 - Force to generate MSI after DMA is completed + * 6:5 Reserved + * 4 1 - Generate completion element in the response queue + * 0 - No Completion Code + * 3 0 - DMA request is a Link list transfer + * 1 - DMA request is a Bulk transfer + * 2 Reserved + * 1:0 00 - No DMA transfer involved + * 01 - DMA transfer is part of inbound transfer + * 10 - DMA transfer has outbound transfer + * 11 - NA + */ + __u8 cmd; + __le32 resv; + /* Source address for the transfer */ + __le64 src_addr; + /* Destination address for the transfer */ + __le64 dest_addr; + /* Length of transfer request */ + __le32 len; + __le32 resv2; + /* Doorbell address */ + __le64 db_addr; + /* + * Special encoded variable + * 7 1 - Doorbell(db) write + * 0 - No doorbell write + * 6:2 Reserved + * 1:0 00 - 32 bit access, db address must be aligned to 32bit-boundary + * 01 - 16 bit access, db address must be aligned to 16bit-boundary + * 10 - 8 bit access, db address must be aligned to 8bit-boundary + * 11 - Reserved + */ + __u8 db_len; + __u8 resv3; + __le16 resv4; + /* 32 bit data written to doorbell address */ + __le32 db_data; + /* + * Special encoded variable + * All the fields of sem_cmdX are passed from user and all are ORed + * together to form sem_cmd. + * 0:11 Semaphore value + * 15:12 Reserved + * 20:16 Semaphore index + * 21 Reserved + * 22 Semaphore Sync + * 23 Reserved + * 26:24 Semaphore command + * 28:27 Reserved + * 29 Semaphore DMA out bound sync fence + * 30 Semaphore DMA in bound sync fence + * 31 Enable semaphore command + */ + __le32 sem_cmd0; + __le32 sem_cmd1; + __le32 sem_cmd2; + __le32 sem_cmd3; +} __packed; + +struct dbc_rsp { + /* Request ID of the memory handle whose DMA transaction is completed */ + __le16 req_id; + /* Status of the DMA transaction. 0 : Success otherwise failure */ + __le16 status; +} __packed; + +inline int get_dbc_req_elem_size(void) +{ + return sizeof(struct dbc_req); +} + +inline int get_dbc_rsp_elem_size(void) +{ + return sizeof(struct dbc_rsp); +} + +static void free_slice(struct kref *kref) +{ + struct bo_slice *slice = container_of(kref, struct bo_slice, ref_count); + + list_del(&slice->slice); + drm_gem_object_put(&slice->bo->base); + sg_free_table(slice->sgt); + kfree(slice->sgt); + kfree(slice->reqs); + kfree(slice); +} + +static int clone_range_of_sgt_for_slice(struct qaic_device *qdev, struct sg_table **sgt_out, + struct sg_table *sgt_in, u64 size, u64 offset) +{ + int total_len, len, nents, offf = 0, offl = 0; + struct scatterlist *sg, *sgn, *sgf, *sgl; + struct sg_table *sgt; + int ret, j; + + /* find out number of relevant nents needed for this mem */ + total_len = 0; + sgf = NULL; + sgl = NULL; + nents = 0; + + size = size ? size : PAGE_SIZE; + for (sg = sgt_in->sgl; sg; sg = sg_next(sg)) { + len = sg_dma_len(sg); + + if (!len) + continue; + if (offset >= total_len && offset < total_len + len) { + sgf = sg; + offf = offset - total_len; + } + if (sgf) + nents++; + if (offset + size >= total_len && + offset + size <= total_len + len) { + sgl = sg; + offl = offset + size - total_len; + break; + } + total_len += len; + } + + if (!sgf || !sgl) { + ret = -EINVAL; + goto out; + } + + sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) { + ret = -ENOMEM; + goto out; + } + + ret = sg_alloc_table(sgt, nents, GFP_KERNEL); + if (ret) + goto free_sgt; + + /* copy relevant sg node and fix page and length */ + sgn = sgf; + for_each_sgtable_sg(sgt, sg, j) { + memcpy(sg, sgn, sizeof(*sg)); + if (sgn == sgf) { + sg_dma_address(sg) += offf; + sg_dma_len(sg) -= offf; + sg_set_page(sg, sg_page(sgn), sg_dma_len(sg), offf); + } else { + offf = 0; + } + if (sgn == sgl) { + sg_dma_len(sg) = offl - offf; + sg_set_page(sg, sg_page(sgn), offl - offf, offf); + sg_mark_end(sg); + break; + } + sgn = sg_next(sgn); + } + + *sgt_out = sgt; + return ret; + +free_sgt: + kfree(sgt); +out: + *sgt_out = NULL; + return ret; +} + +static int encode_reqs(struct qaic_device *qdev, struct bo_slice *slice, + struct qaic_attach_slice_entry *req) +{ + __le64 db_addr = cpu_to_le64(req->db_addr); + __le32 db_data = cpu_to_le32(req->db_data); + struct scatterlist *sg; + __u8 cmd = BULK_XFER; + int presync_sem; + u64 dev_addr; + __u8 db_len; + int i; + + if (!slice->no_xfer) + cmd |= (slice->dir == DMA_TO_DEVICE ? INBOUND_XFER : OUTBOUND_XFER); + + if (req->db_len && !IS_ALIGNED(req->db_addr, req->db_len / 8)) + return -EINVAL; + + presync_sem = req->sem0.presync + req->sem1.presync + req->sem2.presync + req->sem3.presync; + if (presync_sem > 1) + return -EINVAL; + + presync_sem = req->sem0.presync << 0 | req->sem1.presync << 1 | + req->sem2.presync << 2 | req->sem3.presync << 3; + + switch (req->db_len) { + case 32: + db_len = BIT(7); + break; + case 16: + db_len = BIT(7) | 1; + break; + case 8: + db_len = BIT(7) | 2; + break; + case 0: + db_len = 0; /* doorbell is not active for this command */ + break; + default: + return -EINVAL; /* should never hit this */ + } + + /* + * When we end up splitting up a single request (ie a buf slice) into + * multiple DMA requests, we have to manage the sync data carefully. + * There can only be one presync sem. That needs to be on every xfer + * so that the DMA engine doesn't transfer data before the receiver is + * ready. We only do the doorbell and postsync sems after the xfer. + * To guarantee previous xfers for the request are complete, we use a + * fence. + */ + dev_addr = req->dev_addr; + for_each_sgtable_sg(slice->sgt, sg, i) { + slice->reqs[i].cmd = cmd; + slice->reqs[i].src_addr = cpu_to_le64(slice->dir == DMA_TO_DEVICE ? + sg_dma_address(sg) : dev_addr); + slice->reqs[i].dest_addr = cpu_to_le64(slice->dir == DMA_TO_DEVICE ? + dev_addr : sg_dma_address(sg)); + /* + * sg_dma_len(sg) returns size of a DMA segment, maximum DMA + * segment size is set to UINT_MAX by qaic and hence return + * values of sg_dma_len(sg) can never exceed u32 range. So, + * by down sizing we are not corrupting the value. + */ + slice->reqs[i].len = cpu_to_le32((u32)sg_dma_len(sg)); + switch (presync_sem) { + case BIT(0): + slice->reqs[i].sem_cmd0 = cpu_to_le32(ENCODE_SEM(req->sem0.val, + req->sem0.index, + req->sem0.presync, + req->sem0.cmd, + req->sem0.flags)); + break; + case BIT(1): + slice->reqs[i].sem_cmd1 = cpu_to_le32(ENCODE_SEM(req->sem1.val, + req->sem1.index, + req->sem1.presync, + req->sem1.cmd, + req->sem1.flags)); + break; + case BIT(2): + slice->reqs[i].sem_cmd2 = cpu_to_le32(ENCODE_SEM(req->sem2.val, + req->sem2.index, + req->sem2.presync, + req->sem2.cmd, + req->sem2.flags)); + break; + case BIT(3): + slice->reqs[i].sem_cmd3 = cpu_to_le32(ENCODE_SEM(req->sem3.val, + req->sem3.index, + req->sem3.presync, + req->sem3.cmd, + req->sem3.flags)); + break; + } + dev_addr += sg_dma_len(sg); + } + /* add post transfer stuff to last segment */ + i--; + slice->reqs[i].cmd |= GEN_COMPLETION; + slice->reqs[i].db_addr = db_addr; + slice->reqs[i].db_len = db_len; + slice->reqs[i].db_data = db_data; + /* + * Add a fence if we have more than one request going to the hardware + * representing the entirety of the user request, and the user request + * has no presync condition. + * Fences are expensive, so we try to avoid them. We rely on the + * hardware behavior to avoid needing one when there is a presync + * condition. When a presync exists, all requests for that same + * presync will be queued into a fifo. Thus, since we queue the + * post xfer activity only on the last request we queue, the hardware + * will ensure that the last queued request is processed last, thus + * making sure the post xfer activity happens at the right time without + * a fence. + */ + if (i && !presync_sem) + req->sem0.flags |= (slice->dir == DMA_TO_DEVICE ? + QAIC_SEM_INSYNCFENCE : QAIC_SEM_OUTSYNCFENCE); + slice->reqs[i].sem_cmd0 = cpu_to_le32(ENCODE_SEM(req->sem0.val, req->sem0.index, + req->sem0.presync, req->sem0.cmd, + req->sem0.flags)); + slice->reqs[i].sem_cmd1 = cpu_to_le32(ENCODE_SEM(req->sem1.val, req->sem1.index, + req->sem1.presync, req->sem1.cmd, + req->sem1.flags)); + slice->reqs[i].sem_cmd2 = cpu_to_le32(ENCODE_SEM(req->sem2.val, req->sem2.index, + req->sem2.presync, req->sem2.cmd, + req->sem2.flags)); + slice->reqs[i].sem_cmd3 = cpu_to_le32(ENCODE_SEM(req->sem3.val, req->sem3.index, + req->sem3.presync, req->sem3.cmd, + req->sem3.flags)); + + return 0; +} + +static int qaic_map_one_slice(struct qaic_device *qdev, struct qaic_bo *bo, + struct qaic_attach_slice_entry *slice_ent) +{ + struct sg_table *sgt = NULL; + struct bo_slice *slice; + int ret; + + ret = clone_range_of_sgt_for_slice(qdev, &sgt, bo->sgt, slice_ent->size, slice_ent->offset); + if (ret) + goto out; + + slice = kmalloc(sizeof(*slice), GFP_KERNEL); + if (!slice) { + ret = -ENOMEM; + goto free_sgt; + } + + slice->reqs = kcalloc(sgt->nents, sizeof(*slice->reqs), GFP_KERNEL); + if (!slice->reqs) { + ret = -ENOMEM; + goto free_slice; + } + + slice->no_xfer = !slice_ent->size; + slice->sgt = sgt; + slice->nents = sgt->nents; + slice->dir = bo->dir; + slice->bo = bo; + slice->size = slice_ent->size; + slice->offset = slice_ent->offset; + + ret = encode_reqs(qdev, slice, slice_ent); + if (ret) + goto free_req; + + bo->total_slice_nents += sgt->nents; + kref_init(&slice->ref_count); + drm_gem_object_get(&bo->base); + list_add_tail(&slice->slice, &bo->slices); + + return 0; + +free_req: + kfree(slice->reqs); +free_slice: + kfree(slice); +free_sgt: + sg_free_table(sgt); + kfree(sgt); +out: + return ret; +} + +static int create_sgt(struct qaic_device *qdev, struct sg_table **sgt_out, u64 size) +{ + struct scatterlist *sg; + struct sg_table *sgt; + struct page **pages; + int *pages_order; + int buf_extra; + int max_order; + int nr_pages; + int ret = 0; + int i, j, k; + int order; + + if (size) { + nr_pages = DIV_ROUND_UP(size, PAGE_SIZE); + /* + * calculate how much extra we are going to allocate, to remove + * later + */ + buf_extra = (PAGE_SIZE - size % PAGE_SIZE) % PAGE_SIZE; + max_order = min(MAX_ORDER - 1, get_order(size)); + } else { + /* allocate a single page for book keeping */ + nr_pages = 1; + buf_extra = 0; + max_order = 0; + } + + pages = kvmalloc_array(nr_pages, sizeof(*pages) + sizeof(*pages_order), GFP_KERNEL); + if (!pages) { + ret = -ENOMEM; + goto out; + } + pages_order = (void *)pages + sizeof(*pages) * nr_pages; + + /* + * Allocate requested memory using alloc_pages. It is possible to allocate + * the requested memory in multiple chunks by calling alloc_pages + * multiple times. Use SG table to handle multiple allocated pages. + */ + i = 0; + while (nr_pages > 0) { + order = min(get_order(nr_pages * PAGE_SIZE), max_order); + while (1) { + pages[i] = alloc_pages(GFP_KERNEL | GFP_HIGHUSER | + __GFP_NOWARN | __GFP_ZERO | + (order ? __GFP_NORETRY : __GFP_RETRY_MAYFAIL), + order); + if (pages[i]) + break; + if (!order--) { + ret = -ENOMEM; + goto free_partial_alloc; + } + } + + max_order = order; + pages_order[i] = order; + + nr_pages -= 1 << order; + if (nr_pages <= 0) + /* account for over allocation */ + buf_extra += abs(nr_pages) * PAGE_SIZE; + i++; + } + + sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) { + ret = -ENOMEM; + goto free_partial_alloc; + } + + if (sg_alloc_table(sgt, i, GFP_KERNEL)) { + ret = -ENOMEM; + goto free_sgt; + } + + /* Populate the SG table with the allocated memory pages */ + sg = sgt->sgl; + for (k = 0; k < i; k++, sg = sg_next(sg)) { + /* Last entry requires special handling */ + if (k < i - 1) { + sg_set_page(sg, pages[k], PAGE_SIZE << pages_order[k], 0); + } else { + sg_set_page(sg, pages[k], (PAGE_SIZE << pages_order[k]) - buf_extra, 0); + sg_mark_end(sg); + } + } + + kvfree(pages); + *sgt_out = sgt; + return ret; + +free_sgt: + kfree(sgt); +free_partial_alloc: + for (j = 0; j < i; j++) + __free_pages(pages[j], pages_order[j]); + kvfree(pages); +out: + *sgt_out = NULL; + return ret; +} + +static bool invalid_sem(struct qaic_sem *sem) +{ + if (sem->val & ~SEM_VAL_MASK || sem->index & ~SEM_INDEX_MASK || + !(sem->presync == 0 || sem->presync == 1) || sem->pad || + sem->flags & ~(QAIC_SEM_INSYNCFENCE | QAIC_SEM_OUTSYNCFENCE) || + sem->cmd > QAIC_SEM_WAIT_GT_0) + return true; + return false; +} + +static int qaic_validate_req(struct qaic_device *qdev, struct qaic_attach_slice_entry *slice_ent, + u32 count, u64 total_size) +{ + int i; + + for (i = 0; i < count; i++) { + if (!(slice_ent[i].db_len == 32 || slice_ent[i].db_len == 16 || + slice_ent[i].db_len == 8 || slice_ent[i].db_len == 0) || + invalid_sem(&slice_ent[i].sem0) || invalid_sem(&slice_ent[i].sem1) || + invalid_sem(&slice_ent[i].sem2) || invalid_sem(&slice_ent[i].sem3)) + return -EINVAL; + + if (slice_ent[i].offset + slice_ent[i].size > total_size) + return -EINVAL; + } + + return 0; +} + +static void qaic_free_sgt(struct sg_table *sgt) +{ + struct scatterlist *sg; + + for (sg = sgt->sgl; sg; sg = sg_next(sg)) + if (sg_page(sg)) + __free_pages(sg_page(sg), get_order(sg->length)); + sg_free_table(sgt); + kfree(sgt); +} + +static void qaic_gem_print_info(struct drm_printer *p, unsigned int indent, + const struct drm_gem_object *obj) +{ + struct qaic_bo *bo = to_qaic_bo(obj); + + drm_printf_indent(p, indent, "user requested size=%llu\n", bo->size); +} + +static const struct vm_operations_struct drm_vm_ops = { + .open = drm_gem_vm_open, + .close = drm_gem_vm_close, +}; + +static int qaic_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + struct qaic_bo *bo = to_qaic_bo(obj); + unsigned long offset = 0; + struct scatterlist *sg; + int ret; + + if (obj->import_attach) + return -EINVAL; + + for (sg = bo->sgt->sgl; sg; sg = sg_next(sg)) { + if (sg_page(sg)) { + ret = remap_pfn_range(vma, vma->vm_start + offset, page_to_pfn(sg_page(sg)), + sg->length, vma->vm_page_prot); + if (ret) + goto out; + offset += sg->length; + } + } + +out: + return ret; +} + +static void qaic_free_object(struct drm_gem_object *obj) +{ + struct qaic_bo *bo = to_qaic_bo(obj); + + if (obj->import_attach) { + /* DMABUF/PRIME Path */ + dma_buf_detach(obj->import_attach->dmabuf, obj->import_attach); + dma_buf_put(obj->import_attach->dmabuf); + } else { + /* Private buffer allocation path */ + qaic_free_sgt(bo->sgt); + } + + drm_gem_object_release(obj); + kfree(bo); +} + +static const struct drm_gem_object_funcs qaic_gem_funcs = { + .free = qaic_free_object, + .print_info = qaic_gem_print_info, + .mmap = qaic_gem_object_mmap, + .vm_ops = &drm_vm_ops, +}; + +static struct qaic_bo *qaic_alloc_init_bo(void) +{ + struct qaic_bo *bo; + + bo = kzalloc(sizeof(*bo), GFP_KERNEL); + if (!bo) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&bo->slices); + init_completion(&bo->xfer_done); + complete_all(&bo->xfer_done); + + return bo; +} + +int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + struct qaic_create_bo *args = data; + int usr_rcu_id, qdev_rcu_id; + struct drm_gem_object *obj; + struct qaic_device *qdev; + struct qaic_user *usr; + struct qaic_bo *bo; + size_t size; + int ret; + + if (args->pad) + return -EINVAL; + + usr = file_priv->driver_priv; + usr_rcu_id = srcu_read_lock(&usr->qddev_lock); + if (!usr->qddev) { + ret = -ENODEV; + goto unlock_usr_srcu; + } + + qdev = usr->qddev->qdev; + qdev_rcu_id = srcu_read_lock(&qdev->dev_lock); + if (qdev->in_reset) { + ret = -ENODEV; + goto unlock_dev_srcu; + } + + size = PAGE_ALIGN(args->size); + if (size == 0) { + ret = -EINVAL; + goto unlock_dev_srcu; + } + + bo = qaic_alloc_init_bo(); + if (IS_ERR(bo)) { + ret = PTR_ERR(bo); + goto unlock_dev_srcu; + } + obj = &bo->base; + + drm_gem_private_object_init(dev, obj, size); + + obj->funcs = &qaic_gem_funcs; + ret = create_sgt(qdev, &bo->sgt, size); + if (ret) + goto free_bo; + + bo->size = args->size; + + ret = drm_gem_handle_create(file_priv, obj, &args->handle); + if (ret) + goto free_sgt; + + bo->handle = args->handle; + drm_gem_object_put(obj); + srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + + return 0; + +free_sgt: + qaic_free_sgt(bo->sgt); +free_bo: + kfree(bo); +unlock_dev_srcu: + srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); +unlock_usr_srcu: + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + return ret; +} + +int qaic_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + struct qaic_mmap_bo *args = data; + int usr_rcu_id, qdev_rcu_id; + struct drm_gem_object *obj; + struct qaic_device *qdev; + struct qaic_user *usr; + int ret; + + usr = file_priv->driver_priv; + usr_rcu_id = srcu_read_lock(&usr->qddev_lock); + if (!usr->qddev) { + ret = -ENODEV; + goto unlock_usr_srcu; + } + + qdev = usr->qddev->qdev; + qdev_rcu_id = srcu_read_lock(&qdev->dev_lock); + if (qdev->in_reset) { + ret = -ENODEV; + goto unlock_dev_srcu; + } + + obj = drm_gem_object_lookup(file_priv, args->handle); + if (!obj) { + ret = -ENOENT; + goto unlock_dev_srcu; + } + + ret = drm_gem_create_mmap_offset(obj); + if (ret == 0) + args->offset = drm_vma_node_offset_addr(&obj->vma_node); + + drm_gem_object_put(obj); + +unlock_dev_srcu: + srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); +unlock_usr_srcu: + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + return ret; +} + +struct drm_gem_object *qaic_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf) +{ + struct dma_buf_attachment *attach; + struct drm_gem_object *obj; + struct qaic_bo *bo; + size_t size; + int ret; + + bo = qaic_alloc_init_bo(); + if (IS_ERR(bo)) { + ret = PTR_ERR(bo); + goto out; + } + + obj = &bo->base; + get_dma_buf(dma_buf); + + attach = dma_buf_attach(dma_buf, dev->dev); + if (IS_ERR(attach)) { + ret = PTR_ERR(attach); + goto attach_fail; + } + + size = PAGE_ALIGN(attach->dmabuf->size); + if (size == 0) { + ret = -EINVAL; + goto size_align_fail; + } + + drm_gem_private_object_init(dev, obj, size); + /* + * skipping dma_buf_map_attachment() as we do not know the direction + * just yet. Once the direction is known in the subsequent IOCTL to + * attach slicing, we can do it then. + */ + + obj->funcs = &qaic_gem_funcs; + obj->import_attach = attach; + obj->resv = dma_buf->resv; + + return obj; + +size_align_fail: + dma_buf_detach(dma_buf, attach); +attach_fail: + dma_buf_put(dma_buf); + kfree(bo); +out: + return ERR_PTR(ret); +} + +static int qaic_prepare_import_bo(struct qaic_bo *bo, struct qaic_attach_slice_hdr *hdr) +{ + struct drm_gem_object *obj = &bo->base; + struct sg_table *sgt; + int ret; + + if (obj->import_attach->dmabuf->size < hdr->size) + return -EINVAL; + + sgt = dma_buf_map_attachment(obj->import_attach, hdr->dir); + if (IS_ERR(sgt)) { + ret = PTR_ERR(sgt); + return ret; + } + + bo->sgt = sgt; + bo->size = hdr->size; + + return 0; +} + +static int qaic_prepare_export_bo(struct qaic_device *qdev, struct qaic_bo *bo, + struct qaic_attach_slice_hdr *hdr) +{ + int ret; + + if (bo->size != hdr->size) + return -EINVAL; + + ret = dma_map_sgtable(&qdev->pdev->dev, bo->sgt, hdr->dir, 0); + if (ret) + return -EFAULT; + + return 0; +} + +static int qaic_prepare_bo(struct qaic_device *qdev, struct qaic_bo *bo, + struct qaic_attach_slice_hdr *hdr) +{ + int ret; + + if (bo->base.import_attach) + ret = qaic_prepare_import_bo(bo, hdr); + else + ret = qaic_prepare_export_bo(qdev, bo, hdr); + + if (ret == 0) + bo->dir = hdr->dir; + + return ret; +} + +static void qaic_unprepare_import_bo(struct qaic_bo *bo) +{ + dma_buf_unmap_attachment(bo->base.import_attach, bo->sgt, bo->dir); + bo->sgt = NULL; + bo->size = 0; +} + +static void qaic_unprepare_export_bo(struct qaic_device *qdev, struct qaic_bo *bo) +{ + dma_unmap_sgtable(&qdev->pdev->dev, bo->sgt, bo->dir, 0); +} + +static void qaic_unprepare_bo(struct qaic_device *qdev, struct qaic_bo *bo) +{ + if (bo->base.import_attach) + qaic_unprepare_import_bo(bo); + else + qaic_unprepare_export_bo(qdev, bo); + + bo->dir = 0; +} + +static void qaic_free_slices_bo(struct qaic_bo *bo) +{ + struct bo_slice *slice, *temp; + + list_for_each_entry_safe(slice, temp, &bo->slices, slice) + kref_put(&slice->ref_count, free_slice); +} + +static int qaic_attach_slicing_bo(struct qaic_device *qdev, struct qaic_bo *bo, + struct qaic_attach_slice_hdr *hdr, + struct qaic_attach_slice_entry *slice_ent) +{ + int ret, i; + + for (i = 0; i < hdr->count; i++) { + ret = qaic_map_one_slice(qdev, bo, &slice_ent[i]); + if (ret) { + qaic_free_slices_bo(bo); + return ret; + } + } + + if (bo->total_slice_nents > qdev->dbc[hdr->dbc_id].nelem) { + qaic_free_slices_bo(bo); + return -ENOSPC; + } + + bo->sliced = true; + bo->nr_slice = hdr->count; + list_add_tail(&bo->bo_list, &qdev->dbc[hdr->dbc_id].bo_lists); + + return 0; +} + +int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + struct qaic_attach_slice_entry *slice_ent; + struct qaic_attach_slice *args = data; + struct dma_bridge_chan *dbc; + int usr_rcu_id, qdev_rcu_id; + struct drm_gem_object *obj; + struct qaic_device *qdev; + unsigned long arg_size; + struct qaic_user *usr; + u8 __user *user_data; + struct qaic_bo *bo; + int ret; + + usr = file_priv->driver_priv; + usr_rcu_id = srcu_read_lock(&usr->qddev_lock); + if (!usr->qddev) { + ret = -ENODEV; + goto unlock_usr_srcu; + } + + qdev = usr->qddev->qdev; + qdev_rcu_id = srcu_read_lock(&qdev->dev_lock); + if (qdev->in_reset) { + ret = -ENODEV; + goto unlock_dev_srcu; + } + + if (args->hdr.count == 0) { + ret = -EINVAL; + goto unlock_dev_srcu; + } + + arg_size = args->hdr.count * sizeof(*slice_ent); + if (arg_size / args->hdr.count != sizeof(*slice_ent)) { + ret = -EINVAL; + goto unlock_dev_srcu; + } + + if (args->hdr.dbc_id >= qdev->num_dbc) { + ret = -EINVAL; + goto unlock_dev_srcu; + } + + if (args->hdr.size == 0) { + ret = -EINVAL; + goto unlock_dev_srcu; + } + + if (!(args->hdr.dir == DMA_TO_DEVICE || args->hdr.dir == DMA_FROM_DEVICE)) { + ret = -EINVAL; + goto unlock_dev_srcu; + } + + dbc = &qdev->dbc[args->hdr.dbc_id]; + if (dbc->usr != usr) { + ret = -EINVAL; + goto unlock_dev_srcu; + } + + if (args->data == 0) { + ret = -EINVAL; + goto unlock_dev_srcu; + } + + user_data = u64_to_user_ptr(args->data); + + slice_ent = kzalloc(arg_size, GFP_KERNEL); + if (!slice_ent) { + ret = -EINVAL; + goto unlock_dev_srcu; + } + + ret = copy_from_user(slice_ent, user_data, arg_size); + if (ret) { + ret = -EFAULT; + goto free_slice_ent; + } + + ret = qaic_validate_req(qdev, slice_ent, args->hdr.count, args->hdr.size); + if (ret) + goto free_slice_ent; + + obj = drm_gem_object_lookup(file_priv, args->hdr.handle); + if (!obj) { + ret = -ENOENT; + goto free_slice_ent; + } + + bo = to_qaic_bo(obj); + + ret = qaic_prepare_bo(qdev, bo, &args->hdr); + if (ret) + goto put_bo; + + ret = qaic_attach_slicing_bo(qdev, bo, &args->hdr, slice_ent); + if (ret) + goto unprepare_bo; + + if (args->hdr.dir == DMA_TO_DEVICE) + dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, args->hdr.dir); + + bo->dbc = dbc; + drm_gem_object_put(obj); + srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + + return 0; + +unprepare_bo: + qaic_unprepare_bo(qdev, bo); +put_bo: + drm_gem_object_put(obj); +free_slice_ent: + kfree(slice_ent); +unlock_dev_srcu: + srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); +unlock_usr_srcu: + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + return ret; +} + +static inline int copy_exec_reqs(struct qaic_device *qdev, struct bo_slice *slice, u32 dbc_id, + u32 head, u32 *ptail) +{ + struct dma_bridge_chan *dbc = &qdev->dbc[dbc_id]; + struct dbc_req *reqs = slice->reqs; + u32 tail = *ptail; + u32 avail; + + avail = head - tail; + if (head <= tail) + avail += dbc->nelem; + + --avail; + + if (avail < slice->nents) + return -EAGAIN; + + if (tail + slice->nents > dbc->nelem) { + avail = dbc->nelem - tail; + avail = min_t(u32, avail, slice->nents); + memcpy(dbc->req_q_base + tail * get_dbc_req_elem_size(), reqs, + sizeof(*reqs) * avail); + reqs += avail; + avail = slice->nents - avail; + if (avail) + memcpy(dbc->req_q_base, reqs, sizeof(*reqs) * avail); + } else { + memcpy(dbc->req_q_base + tail * get_dbc_req_elem_size(), reqs, + sizeof(*reqs) * slice->nents); + } + + *ptail = (tail + slice->nents) % dbc->nelem; + + return 0; +} + +/* + * Based on the value of resize we may only need to transmit first_n + * entries and the last entry, with last_bytes to send from the last entry. + * Note that first_n could be 0. + */ +static inline int copy_partial_exec_reqs(struct qaic_device *qdev, struct bo_slice *slice, + u64 resize, u32 dbc_id, u32 head, u32 *ptail) +{ + struct dma_bridge_chan *dbc = &qdev->dbc[dbc_id]; + struct dbc_req *reqs = slice->reqs; + struct dbc_req *last_req; + u32 tail = *ptail; + u64 total_bytes; + u64 last_bytes; + u32 first_n; + u32 avail; + int ret; + int i; + + avail = head - tail; + if (head <= tail) + avail += dbc->nelem; + + --avail; + + total_bytes = 0; + for (i = 0; i < slice->nents; i++) { + total_bytes += le32_to_cpu(reqs[i].len); + if (total_bytes >= resize) + break; + } + + if (total_bytes < resize) { + /* User space should have used the full buffer path. */ + ret = -EINVAL; + return ret; + } + + first_n = i; + last_bytes = i ? resize + le32_to_cpu(reqs[i].len) - total_bytes : resize; + + if (avail < (first_n + 1)) + return -EAGAIN; + + if (first_n) { + if (tail + first_n > dbc->nelem) { + avail = dbc->nelem - tail; + avail = min_t(u32, avail, first_n); + memcpy(dbc->req_q_base + tail * get_dbc_req_elem_size(), reqs, + sizeof(*reqs) * avail); + last_req = reqs + avail; + avail = first_n - avail; + if (avail) + memcpy(dbc->req_q_base, last_req, sizeof(*reqs) * avail); + } else { + memcpy(dbc->req_q_base + tail * get_dbc_req_elem_size(), reqs, + sizeof(*reqs) * first_n); + } + } + + /* Copy over the last entry. Here we need to adjust len to the left over + * size, and set src and dst to the entry it is copied to. + */ + last_req = dbc->req_q_base + (tail + first_n) % dbc->nelem * get_dbc_req_elem_size(); + memcpy(last_req, reqs + slice->nents - 1, sizeof(*reqs)); + + /* + * last_bytes holds size of a DMA segment, maximum DMA segment size is + * set to UINT_MAX by qaic and hence last_bytes can never exceed u32 + * range. So, by down sizing we are not corrupting the value. + */ + last_req->len = cpu_to_le32((u32)last_bytes); + last_req->src_addr = reqs[first_n].src_addr; + last_req->dest_addr = reqs[first_n].dest_addr; + + *ptail = (tail + first_n + 1) % dbc->nelem; + + return 0; +} + +static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *file_priv, + struct qaic_execute_entry *exec, unsigned int count, + bool is_partial, struct dma_bridge_chan *dbc, u32 head, + u32 *tail) +{ + struct qaic_partial_execute_entry *pexec = (struct qaic_partial_execute_entry *)exec; + struct drm_gem_object *obj; + struct bo_slice *slice; + unsigned long flags; + struct qaic_bo *bo; + bool queued; + int i, j; + int ret; + + for (i = 0; i < count; i++) { + /* + * ref count will be decremented when the transfer of this + * buffer is complete. It is inside dbc_irq_threaded_fn(). + */ + obj = drm_gem_object_lookup(file_priv, + is_partial ? pexec[i].handle : exec[i].handle); + if (!obj) { + ret = -ENOENT; + goto failed_to_send_bo; + } + + bo = to_qaic_bo(obj); + + if (!bo->sliced) { + ret = -EINVAL; + goto failed_to_send_bo; + } + + if (is_partial && pexec[i].resize > bo->size) { + ret = -EINVAL; + goto failed_to_send_bo; + } + + spin_lock_irqsave(&dbc->xfer_lock, flags); + queued = bo->queued; + bo->queued = true; + if (queued) { + spin_unlock_irqrestore(&dbc->xfer_lock, flags); + ret = -EINVAL; + goto failed_to_send_bo; + } + + bo->req_id = dbc->next_req_id++; + + list_for_each_entry(slice, &bo->slices, slice) { + /* + * If this slice does not fall under the given + * resize then skip this slice and continue the loop + */ + if (is_partial && pexec[i].resize && pexec[i].resize <= slice->offset) + continue; + + for (j = 0; j < slice->nents; j++) + slice->reqs[j].req_id = cpu_to_le16(bo->req_id); + + /* + * If it is a partial execute ioctl call then check if + * resize has cut this slice short then do a partial copy + * else do complete copy + */ + if (is_partial && pexec[i].resize && + pexec[i].resize < slice->offset + slice->size) + ret = copy_partial_exec_reqs(qdev, slice, + pexec[i].resize - slice->offset, + dbc->id, head, tail); + else + ret = copy_exec_reqs(qdev, slice, dbc->id, head, tail); + if (ret) { + bo->queued = false; + spin_unlock_irqrestore(&dbc->xfer_lock, flags); + goto failed_to_send_bo; + } + } + reinit_completion(&bo->xfer_done); + list_add_tail(&bo->xfer_list, &dbc->xfer_list); + spin_unlock_irqrestore(&dbc->xfer_lock, flags); + dma_sync_sgtable_for_device(&qdev->pdev->dev, bo->sgt, bo->dir); + } + + return 0; + +failed_to_send_bo: + if (likely(obj)) + drm_gem_object_put(obj); + for (j = 0; j < i; j++) { + spin_lock_irqsave(&dbc->xfer_lock, flags); + bo = list_last_entry(&dbc->xfer_list, struct qaic_bo, xfer_list); + obj = &bo->base; + bo->queued = false; + list_del(&bo->xfer_list); + spin_unlock_irqrestore(&dbc->xfer_lock, flags); + dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir); + drm_gem_object_put(obj); + } + return ret; +} + +static void update_profiling_data(struct drm_file *file_priv, + struct qaic_execute_entry *exec, unsigned int count, + bool is_partial, u64 received_ts, u64 submit_ts, u32 queue_level) +{ + struct qaic_partial_execute_entry *pexec = (struct qaic_partial_execute_entry *)exec; + struct drm_gem_object *obj; + struct qaic_bo *bo; + int i; + + for (i = 0; i < count; i++) { + /* + * Since we already committed the BO to hardware, the only way + * this should fail is a pending signal. We can't cancel the + * submit to hardware, so we have to just skip the profiling + * data. In case the signal is not fatal to the process, we + * return success so that the user doesn't try to resubmit. + */ + obj = drm_gem_object_lookup(file_priv, + is_partial ? pexec[i].handle : exec[i].handle); + if (!obj) + break; + bo = to_qaic_bo(obj); + bo->perf_stats.req_received_ts = received_ts; + bo->perf_stats.req_submit_ts = submit_ts; + bo->perf_stats.queue_level_before = queue_level; + queue_level += bo->total_slice_nents; + drm_gem_object_put(obj); + } +} + +static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv, + bool is_partial) +{ + struct qaic_partial_execute_entry *pexec; + struct qaic_execute *args = data; + struct qaic_execute_entry *exec; + struct dma_bridge_chan *dbc; + int usr_rcu_id, qdev_rcu_id; + struct qaic_device *qdev; + struct qaic_user *usr; + u8 __user *user_data; + unsigned long n; + u64 received_ts; + u32 queue_level; + u64 submit_ts; + int rcu_id; + u32 head; + u32 tail; + u64 size; + int ret; + + received_ts = ktime_get_ns(); + + size = is_partial ? sizeof(*pexec) : sizeof(*exec); + + n = (unsigned long)size * args->hdr.count; + if (args->hdr.count == 0 || n / args->hdr.count != size) + return -EINVAL; + + user_data = u64_to_user_ptr(args->data); + + exec = kcalloc(args->hdr.count, size, GFP_KERNEL); + pexec = (struct qaic_partial_execute_entry *)exec; + if (!exec) + return -ENOMEM; + + if (copy_from_user(exec, user_data, n)) { + ret = -EFAULT; + goto free_exec; + } + + usr = file_priv->driver_priv; + usr_rcu_id = srcu_read_lock(&usr->qddev_lock); + if (!usr->qddev) { + ret = -ENODEV; + goto unlock_usr_srcu; + } + + qdev = usr->qddev->qdev; + qdev_rcu_id = srcu_read_lock(&qdev->dev_lock); + if (qdev->in_reset) { + ret = -ENODEV; + goto unlock_dev_srcu; + } + + if (args->hdr.dbc_id >= qdev->num_dbc) { + ret = -EINVAL; + goto unlock_dev_srcu; + } + + dbc = &qdev->dbc[args->hdr.dbc_id]; + + rcu_id = srcu_read_lock(&dbc->ch_lock); + if (!dbc->usr || dbc->usr->handle != usr->handle) { + ret = -EPERM; + goto release_ch_rcu; + } + + head = readl(dbc->dbc_base + REQHP_OFF); + tail = readl(dbc->dbc_base + REQTP_OFF); + + if (head == U32_MAX || tail == U32_MAX) { + /* PCI link error */ + ret = -ENODEV; + goto release_ch_rcu; + } + + queue_level = head <= tail ? tail - head : dbc->nelem - (head - tail); + + ret = send_bo_list_to_device(qdev, file_priv, exec, args->hdr.count, is_partial, dbc, + head, &tail); + if (ret) + goto release_ch_rcu; + + /* Finalize commit to hardware */ + submit_ts = ktime_get_ns(); + writel(tail, dbc->dbc_base + REQTP_OFF); + + update_profiling_data(file_priv, exec, args->hdr.count, is_partial, received_ts, + submit_ts, queue_level); + + if (datapath_polling) + schedule_work(&dbc->poll_work); + +release_ch_rcu: + srcu_read_unlock(&dbc->ch_lock, rcu_id); +unlock_dev_srcu: + srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); +unlock_usr_srcu: + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); +free_exec: + kfree(exec); + return ret; +} + +int qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + return __qaic_execute_bo_ioctl(dev, data, file_priv, false); +} + +int qaic_partial_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + return __qaic_execute_bo_ioctl(dev, data, file_priv, true); +} + +/* + * Our interrupt handling is a bit more complicated than a simple ideal, but + * sadly necessary. + * + * Each dbc has a completion queue. Entries in the queue correspond to DMA + * requests which the device has processed. The hardware already has a built + * in irq mitigation. When the device puts an entry into the queue, it will + * only trigger an interrupt if the queue was empty. Therefore, when adding + * the Nth event to a non-empty queue, the hardware doesn't trigger an + * interrupt. This means the host doesn't get additional interrupts signaling + * the same thing - the queue has something to process. + * This behavior can be overridden in the DMA request. + * This means that when the host receives an interrupt, it is required to + * drain the queue. + * + * This behavior is what NAPI attempts to accomplish, although we can't use + * NAPI as we don't have a netdev. We use threaded irqs instead. + * + * However, there is a situation where the host drains the queue fast enough + * that every event causes an interrupt. Typically this is not a problem as + * the rate of events would be low. However, that is not the case with + * lprnet for example. On an Intel Xeon D-2191 where we run 8 instances of + * lprnet, the host receives roughly 80k interrupts per second from the device + * (per /proc/interrupts). While NAPI documentation indicates the host should + * just chug along, sadly that behavior causes instability in some hosts. + * + * Therefore, we implement an interrupt disable scheme similar to NAPI. The + * key difference is that we will delay after draining the queue for a small + * time to allow additional events to come in via polling. Using the above + * lprnet workload, this reduces the number of interrupts processed from + * ~80k/sec to about 64 in 5 minutes and appears to solve the system + * instability. + */ +irqreturn_t dbc_irq_handler(int irq, void *data) +{ + struct dma_bridge_chan *dbc = data; + int rcu_id; + u32 head; + u32 tail; + + rcu_id = srcu_read_lock(&dbc->ch_lock); + + if (!dbc->usr) { + srcu_read_unlock(&dbc->ch_lock, rcu_id); + return IRQ_HANDLED; + } + + head = readl(dbc->dbc_base + RSPHP_OFF); + if (head == U32_MAX) { /* PCI link error */ + srcu_read_unlock(&dbc->ch_lock, rcu_id); + return IRQ_NONE; + } + + tail = readl(dbc->dbc_base + RSPTP_OFF); + if (tail == U32_MAX) { /* PCI link error */ + srcu_read_unlock(&dbc->ch_lock, rcu_id); + return IRQ_NONE; + } + + if (head == tail) { /* queue empty */ + srcu_read_unlock(&dbc->ch_lock, rcu_id); + return IRQ_NONE; + } + + disable_irq_nosync(irq); + srcu_read_unlock(&dbc->ch_lock, rcu_id); + return IRQ_WAKE_THREAD; +} + +void irq_polling_work(struct work_struct *work) +{ + struct dma_bridge_chan *dbc = container_of(work, struct dma_bridge_chan, poll_work); + unsigned long flags; + int rcu_id; + u32 head; + u32 tail; + + rcu_id = srcu_read_lock(&dbc->ch_lock); + + while (1) { + if (dbc->qdev->in_reset) { + srcu_read_unlock(&dbc->ch_lock, rcu_id); + return; + } + if (!dbc->usr) { + srcu_read_unlock(&dbc->ch_lock, rcu_id); + return; + } + spin_lock_irqsave(&dbc->xfer_lock, flags); + if (list_empty(&dbc->xfer_list)) { + spin_unlock_irqrestore(&dbc->xfer_lock, flags); + srcu_read_unlock(&dbc->ch_lock, rcu_id); + return; + } + spin_unlock_irqrestore(&dbc->xfer_lock, flags); + + head = readl(dbc->dbc_base + RSPHP_OFF); + if (head == U32_MAX) { /* PCI link error */ + srcu_read_unlock(&dbc->ch_lock, rcu_id); + return; + } + + tail = readl(dbc->dbc_base + RSPTP_OFF); + if (tail == U32_MAX) { /* PCI link error */ + srcu_read_unlock(&dbc->ch_lock, rcu_id); + return; + } + + if (head != tail) { + irq_wake_thread(dbc->irq, dbc); + srcu_read_unlock(&dbc->ch_lock, rcu_id); + return; + } + + cond_resched(); + usleep_range(datapath_poll_interval_us, 2 * datapath_poll_interval_us); + } +} + +irqreturn_t dbc_irq_threaded_fn(int irq, void *data) +{ + struct dma_bridge_chan *dbc = data; + int event_count = NUM_EVENTS; + int delay_count = NUM_DELAYS; + struct qaic_device *qdev; + struct qaic_bo *bo, *i; + struct dbc_rsp *rsp; + unsigned long flags; + int rcu_id; + u16 status; + u16 req_id; + u32 head; + u32 tail; + + rcu_id = srcu_read_lock(&dbc->ch_lock); + + head = readl(dbc->dbc_base + RSPHP_OFF); + if (head == U32_MAX) /* PCI link error */ + goto error_out; + + qdev = dbc->qdev; +read_fifo: + + if (!event_count) { + event_count = NUM_EVENTS; + cond_resched(); + } + + /* + * if this channel isn't assigned or gets unassigned during processing + * we have nothing further to do + */ + if (!dbc->usr) + goto error_out; + + tail = readl(dbc->dbc_base + RSPTP_OFF); + if (tail == U32_MAX) /* PCI link error */ + goto error_out; + + if (head == tail) { /* queue empty */ + if (delay_count) { + --delay_count; + usleep_range(100, 200); + goto read_fifo; /* check for a new event */ + } + goto normal_out; + } + + delay_count = NUM_DELAYS; + while (head != tail) { + if (!event_count) + break; + --event_count; + rsp = dbc->rsp_q_base + head * sizeof(*rsp); + req_id = le16_to_cpu(rsp->req_id); + status = le16_to_cpu(rsp->status); + if (status) + pci_dbg(qdev->pdev, "req_id %d failed with status %d\n", req_id, status); + spin_lock_irqsave(&dbc->xfer_lock, flags); + /* + * A BO can receive multiple interrupts, since a BO can be + * divided into multiple slices and a buffer receives as many + * interrupts as slices. So until it receives interrupts for + * all the slices we cannot mark that buffer complete. + */ + list_for_each_entry_safe(bo, i, &dbc->xfer_list, xfer_list) { + if (bo->req_id == req_id) + bo->nr_slice_xfer_done++; + else + continue; + + if (bo->nr_slice_xfer_done < bo->nr_slice) + break; + + /* + * At this point we have received all the interrupts for + * BO, which means BO execution is complete. + */ + dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir); + bo->nr_slice_xfer_done = 0; + bo->queued = false; + list_del(&bo->xfer_list); + bo->perf_stats.req_processed_ts = ktime_get_ns(); + complete_all(&bo->xfer_done); + drm_gem_object_put(&bo->base); + break; + } + spin_unlock_irqrestore(&dbc->xfer_lock, flags); + head = (head + 1) % dbc->nelem; + } + + /* + * Update the head pointer of response queue and let the device know + * that we have consumed elements from the queue. + */ + writel(head, dbc->dbc_base + RSPHP_OFF); + + /* elements might have been put in the queue while we were processing */ + goto read_fifo; + +normal_out: + if (likely(!datapath_polling)) + enable_irq(irq); + else + schedule_work(&dbc->poll_work); + /* checking the fifo and enabling irqs is a race, missed event check */ + tail = readl(dbc->dbc_base + RSPTP_OFF); + if (tail != U32_MAX && head != tail) { + if (likely(!datapath_polling)) + disable_irq_nosync(irq); + goto read_fifo; + } + srcu_read_unlock(&dbc->ch_lock, rcu_id); + return IRQ_HANDLED; + +error_out: + srcu_read_unlock(&dbc->ch_lock, rcu_id); + if (likely(!datapath_polling)) + enable_irq(irq); + else + schedule_work(&dbc->poll_work); + + return IRQ_HANDLED; +} + +int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + struct qaic_wait *args = data; + int usr_rcu_id, qdev_rcu_id; + struct dma_bridge_chan *dbc; + struct drm_gem_object *obj; + struct qaic_device *qdev; + unsigned long timeout; + struct qaic_user *usr; + struct qaic_bo *bo; + int rcu_id; + int ret; + + usr = file_priv->driver_priv; + usr_rcu_id = srcu_read_lock(&usr->qddev_lock); + if (!usr->qddev) { + ret = -ENODEV; + goto unlock_usr_srcu; + } + + qdev = usr->qddev->qdev; + qdev_rcu_id = srcu_read_lock(&qdev->dev_lock); + if (qdev->in_reset) { + ret = -ENODEV; + goto unlock_dev_srcu; + } + + if (args->pad != 0) { + ret = -EINVAL; + goto unlock_dev_srcu; + } + + if (args->dbc_id >= qdev->num_dbc) { + ret = -EINVAL; + goto unlock_dev_srcu; + } + + dbc = &qdev->dbc[args->dbc_id]; + + rcu_id = srcu_read_lock(&dbc->ch_lock); + if (dbc->usr != usr) { + ret = -EPERM; + goto unlock_ch_srcu; + } + + obj = drm_gem_object_lookup(file_priv, args->handle); + if (!obj) { + ret = -ENOENT; + goto unlock_ch_srcu; + } + + bo = to_qaic_bo(obj); + timeout = args->timeout ? args->timeout : wait_exec_default_timeout_ms; + timeout = msecs_to_jiffies(timeout); + ret = wait_for_completion_interruptible_timeout(&bo->xfer_done, timeout); + if (!ret) { + ret = -ETIMEDOUT; + goto put_obj; + } + if (ret > 0) + ret = 0; + + if (!dbc->usr) + ret = -EPERM; + +put_obj: + drm_gem_object_put(obj); +unlock_ch_srcu: + srcu_read_unlock(&dbc->ch_lock, rcu_id); +unlock_dev_srcu: + srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); +unlock_usr_srcu: + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + return ret; +} + +int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + struct qaic_perf_stats_entry *ent = NULL; + struct qaic_perf_stats *args = data; + int usr_rcu_id, qdev_rcu_id; + struct drm_gem_object *obj; + struct qaic_device *qdev; + struct qaic_user *usr; + struct qaic_bo *bo; + int ret, i; + + usr = file_priv->driver_priv; + usr_rcu_id = srcu_read_lock(&usr->qddev_lock); + if (!usr->qddev) { + ret = -ENODEV; + goto unlock_usr_srcu; + } + + qdev = usr->qddev->qdev; + qdev_rcu_id = srcu_read_lock(&qdev->dev_lock); + if (qdev->in_reset) { + ret = -ENODEV; + goto unlock_dev_srcu; + } + + if (args->hdr.dbc_id >= qdev->num_dbc) { + ret = -EINVAL; + goto unlock_dev_srcu; + } + + ent = kcalloc(args->hdr.count, sizeof(*ent), GFP_KERNEL); + if (!ent) { + ret = -EINVAL; + goto unlock_dev_srcu; + } + + ret = copy_from_user(ent, u64_to_user_ptr(args->data), args->hdr.count * sizeof(*ent)); + if (ret) { + ret = -EFAULT; + goto free_ent; + } + + for (i = 0; i < args->hdr.count; i++) { + obj = drm_gem_object_lookup(file_priv, ent[i].handle); + if (!obj) { + ret = -ENOENT; + goto free_ent; + } + bo = to_qaic_bo(obj); + /* + * perf stats ioctl is called before wait ioctl is complete then + * the latency information is invalid. + */ + if (bo->perf_stats.req_processed_ts < bo->perf_stats.req_submit_ts) { + ent[i].device_latency_us = 0; + } else { + ent[i].device_latency_us = div_u64((bo->perf_stats.req_processed_ts - + bo->perf_stats.req_submit_ts), 1000); + } + ent[i].submit_latency_us = div_u64((bo->perf_stats.req_submit_ts - + bo->perf_stats.req_received_ts), 1000); + ent[i].queue_level_before = bo->perf_stats.queue_level_before; + ent[i].num_queue_element = bo->total_slice_nents; + drm_gem_object_put(obj); + } + + if (copy_to_user(u64_to_user_ptr(args->data), ent, args->hdr.count * sizeof(*ent))) + ret = -EFAULT; + +free_ent: + kfree(ent); +unlock_dev_srcu: + srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); +unlock_usr_srcu: + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + return ret; +} + +static void empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *dbc) +{ + unsigned long flags; + struct qaic_bo *bo; + + spin_lock_irqsave(&dbc->xfer_lock, flags); + while (!list_empty(&dbc->xfer_list)) { + bo = list_first_entry(&dbc->xfer_list, typeof(*bo), xfer_list); + bo->queued = false; + list_del(&bo->xfer_list); + spin_unlock_irqrestore(&dbc->xfer_lock, flags); + dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir); + complete_all(&bo->xfer_done); + drm_gem_object_put(&bo->base); + spin_lock_irqsave(&dbc->xfer_lock, flags); + } + spin_unlock_irqrestore(&dbc->xfer_lock, flags); +} + +int disable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr) +{ + if (!qdev->dbc[dbc_id].usr || qdev->dbc[dbc_id].usr->handle != usr->handle) + return -EPERM; + + qdev->dbc[dbc_id].usr = NULL; + synchronize_srcu(&qdev->dbc[dbc_id].ch_lock); + return 0; +} + +/** + * enable_dbc - Enable the DBC. DBCs are disabled by removing the context of + * user. Add user context back to DBC to enable it. This function trusts the + * DBC ID passed and expects the DBC to be disabled. + * @qdev: Qranium device handle + * @dbc_id: ID of the DBC + * @usr: User context + */ +void enable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr) +{ + qdev->dbc[dbc_id].usr = usr; +} + +void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id) +{ + struct dma_bridge_chan *dbc = &qdev->dbc[dbc_id]; + + dbc->usr = NULL; + empty_xfer_list(qdev, dbc); + synchronize_srcu(&dbc->ch_lock); +} + +void release_dbc(struct qaic_device *qdev, u32 dbc_id) +{ + struct bo_slice *slice, *slice_temp; + struct qaic_bo *bo, *bo_temp; + struct dma_bridge_chan *dbc; + + dbc = &qdev->dbc[dbc_id]; + if (!dbc->in_use) + return; + + wakeup_dbc(qdev, dbc_id); + + dma_free_coherent(&qdev->pdev->dev, dbc->total_size, dbc->req_q_base, dbc->dma_addr); + dbc->total_size = 0; + dbc->req_q_base = NULL; + dbc->dma_addr = 0; + dbc->nelem = 0; + dbc->usr = NULL; + + list_for_each_entry_safe(bo, bo_temp, &dbc->bo_lists, bo_list) { + list_for_each_entry_safe(slice, slice_temp, &bo->slices, slice) + kref_put(&slice->ref_count, free_slice); + bo->sliced = false; + INIT_LIST_HEAD(&bo->slices); + bo->total_slice_nents = 0; + bo->dir = 0; + bo->dbc = NULL; + bo->nr_slice = 0; + bo->nr_slice_xfer_done = 0; + bo->queued = false; + bo->req_id = 0; + init_completion(&bo->xfer_done); + complete_all(&bo->xfer_done); + list_del(&bo->bo_list); + bo->perf_stats.req_received_ts = 0; + bo->perf_stats.req_submit_ts = 0; + bo->perf_stats.req_processed_ts = 0; + bo->perf_stats.queue_level_before = 0; + } + + dbc->in_use = false; + wake_up(&dbc->dbc_release); +} -- cgit v1.2.3 From 566fc96198b4bb07ca6806386956669881225271 Mon Sep 17 00:00:00 2001 From: Pranjal Ramajor Asha Kanojiya Date: Mon, 27 Mar 2023 09:54:55 -0600 Subject: accel/qaic: Add mhi_qaic_cntl Some of the MHI channels for an AIC100 device need to be routed to userspace so that userspace can communicate directly with QSM. The MHI bus does not support this, and while the WWAN subsystem does (for the same reasons), AIC100 is not a WWAN device. Also, MHI is not something that other accelerators are expected to share, thus an accel subsystem function that meets this usecase is unlikely. Create a QAIC specific MHI userspace shim that exposes these channels. Start with QAIC_SAHARA which is required to boot AIC100 and is consumed by the kickstart application as documented in aic100.rst Each AIC100 instance (currently, up to 16) in a system will create a chardev for QAIC_SAHARA. This chardev will be found as /dev/_QAIC_SAHARA For example - /dev/mhi0_QAIC_SAHARA Signed-off-by: Pranjal Ramajor Asha Kanojiya Signed-off-by: Jeffrey Hugo Reviewed-by: Carl Vanderlip Reviewed-by: Stanislaw Gruszka Reviewed-by: Jacek Lawrynowicz Acked-by: Oded Gabbay Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/1679932497-30277-7-git-send-email-quic_jhugo@quicinc.com --- drivers/accel/qaic/mhi_qaic_ctrl.c | 569 +++++++++++++++++++++++++++++++++++++ drivers/accel/qaic/mhi_qaic_ctrl.h | 12 + 2 files changed, 581 insertions(+) create mode 100644 drivers/accel/qaic/mhi_qaic_ctrl.c create mode 100644 drivers/accel/qaic/mhi_qaic_ctrl.h diff --git a/drivers/accel/qaic/mhi_qaic_ctrl.c b/drivers/accel/qaic/mhi_qaic_ctrl.c new file mode 100644 index 000000000000..0c7e571f1f12 --- /dev/null +++ b/drivers/accel/qaic/mhi_qaic_ctrl.c @@ -0,0 +1,569 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ + +#include +#include +#include +#include +#include +#include +#include + +#include "mhi_qaic_ctrl.h" +#include "qaic.h" + +#define MHI_QAIC_CTRL_DRIVER_NAME "mhi_qaic_ctrl" +#define MHI_QAIC_CTRL_MAX_MINORS 128 +#define MHI_MAX_MTU 0xffff +static DEFINE_XARRAY_ALLOC(mqc_xa); +static struct class *mqc_dev_class; +static int mqc_dev_major; + +/** + * struct mqc_buf - Buffer structure used to receive data from device + * @data: Address of data to read from + * @odata: Original address returned from *alloc() API. Used to free this buf. + * @len: Length of data in byte + * @node: This buffer will be part of list managed in struct mqc_dev + */ +struct mqc_buf { + void *data; + void *odata; + size_t len; + struct list_head node; +}; + +/** + * struct mqc_dev - MHI QAIC Control Device + * @minor: MQC device node minor number + * @mhi_dev: Associated mhi device object + * @mtu: Max TRE buffer length + * @enabled: Flag to track the state of the MQC device + * @lock: Mutex lock to serialize access to open_count + * @read_lock: Mutex lock to serialize readers + * @write_lock: Mutex lock to serialize writers + * @ul_wq: Wait queue for writers + * @dl_wq: Wait queue for readers + * @dl_queue_lock: Spin lock to serialize access to download queue + * @dl_queue: Queue of downloaded buffers + * @open_count: Track open counts + * @ref_count: Reference count for this structure + */ +struct mqc_dev { + u32 minor; + struct mhi_device *mhi_dev; + size_t mtu; + bool enabled; + struct mutex lock; + struct mutex read_lock; + struct mutex write_lock; + wait_queue_head_t ul_wq; + wait_queue_head_t dl_wq; + spinlock_t dl_queue_lock; + struct list_head dl_queue; + unsigned int open_count; + struct kref ref_count; +}; + +static void mqc_dev_release(struct kref *ref) +{ + struct mqc_dev *mqcdev = container_of(ref, struct mqc_dev, ref_count); + + mutex_destroy(&mqcdev->read_lock); + mutex_destroy(&mqcdev->write_lock); + mutex_destroy(&mqcdev->lock); + kfree(mqcdev); +} + +static int mhi_qaic_ctrl_fill_dl_queue(struct mqc_dev *mqcdev) +{ + struct mhi_device *mhi_dev = mqcdev->mhi_dev; + struct mqc_buf *ctrlbuf; + int rx_budget; + int ret = 0; + void *data; + + rx_budget = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE); + if (rx_budget < 0) + return -EIO; + + while (rx_budget--) { + data = kzalloc(mqcdev->mtu + sizeof(*ctrlbuf), GFP_KERNEL); + if (!data) + return -ENOMEM; + + ctrlbuf = data + mqcdev->mtu; + ctrlbuf->odata = data; + + ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, data, mqcdev->mtu, MHI_EOT); + if (ret) { + kfree(data); + dev_err(&mhi_dev->dev, "Failed to queue buffer\n"); + return ret; + } + } + + return ret; +} + +static int mhi_qaic_ctrl_dev_start_chan(struct mqc_dev *mqcdev) +{ + struct device *dev = &mqcdev->mhi_dev->dev; + int ret = 0; + + ret = mutex_lock_interruptible(&mqcdev->lock); + if (ret) + return ret; + if (!mqcdev->enabled) { + ret = -ENODEV; + goto release_dev_lock; + } + if (!mqcdev->open_count) { + ret = mhi_prepare_for_transfer(mqcdev->mhi_dev); + if (ret) { + dev_err(dev, "Error starting transfer channels\n"); + goto release_dev_lock; + } + + ret = mhi_qaic_ctrl_fill_dl_queue(mqcdev); + if (ret) { + dev_err(dev, "Error filling download queue.\n"); + goto mhi_unprepare; + } + } + mqcdev->open_count++; + mutex_unlock(&mqcdev->lock); + + return 0; + +mhi_unprepare: + mhi_unprepare_from_transfer(mqcdev->mhi_dev); +release_dev_lock: + mutex_unlock(&mqcdev->lock); + return ret; +} + +static struct mqc_dev *mqc_dev_get_by_minor(unsigned int minor) +{ + struct mqc_dev *mqcdev; + + xa_lock(&mqc_xa); + mqcdev = xa_load(&mqc_xa, minor); + if (mqcdev) + kref_get(&mqcdev->ref_count); + xa_unlock(&mqc_xa); + + return mqcdev; +} + +static int mhi_qaic_ctrl_open(struct inode *inode, struct file *filp) +{ + struct mqc_dev *mqcdev; + int ret; + + mqcdev = mqc_dev_get_by_minor(iminor(inode)); + if (!mqcdev) { + pr_debug("mqc: minor %d not found\n", iminor(inode)); + return -EINVAL; + } + + ret = mhi_qaic_ctrl_dev_start_chan(mqcdev); + if (ret) { + kref_put(&mqcdev->ref_count, mqc_dev_release); + return ret; + } + + filp->private_data = mqcdev; + + return 0; +} + +static void mhi_qaic_ctrl_buf_free(struct mqc_buf *ctrlbuf) +{ + list_del(&ctrlbuf->node); + kfree(ctrlbuf->odata); +} + +static void __mhi_qaic_ctrl_release(struct mqc_dev *mqcdev) +{ + struct mqc_buf *ctrlbuf, *tmp; + + mhi_unprepare_from_transfer(mqcdev->mhi_dev); + wake_up_interruptible(&mqcdev->ul_wq); + wake_up_interruptible(&mqcdev->dl_wq); + /* + * Free the dl_queue. As we have already unprepared mhi transfers, we + * do not expect any callback functions that update dl_queue hence no need + * to grab dl_queue lock. + */ + mutex_lock(&mqcdev->read_lock); + list_for_each_entry_safe(ctrlbuf, tmp, &mqcdev->dl_queue, node) + mhi_qaic_ctrl_buf_free(ctrlbuf); + mutex_unlock(&mqcdev->read_lock); +} + +static int mhi_qaic_ctrl_release(struct inode *inode, struct file *file) +{ + struct mqc_dev *mqcdev = file->private_data; + + mutex_lock(&mqcdev->lock); + mqcdev->open_count--; + if (!mqcdev->open_count && mqcdev->enabled) + __mhi_qaic_ctrl_release(mqcdev); + mutex_unlock(&mqcdev->lock); + + kref_put(&mqcdev->ref_count, mqc_dev_release); + + return 0; +} + +static __poll_t mhi_qaic_ctrl_poll(struct file *file, poll_table *wait) +{ + struct mqc_dev *mqcdev = file->private_data; + struct mhi_device *mhi_dev; + __poll_t mask = 0; + + mhi_dev = mqcdev->mhi_dev; + + poll_wait(file, &mqcdev->ul_wq, wait); + poll_wait(file, &mqcdev->dl_wq, wait); + + mutex_lock(&mqcdev->lock); + if (!mqcdev->enabled) { + mutex_unlock(&mqcdev->lock); + return EPOLLERR; + } + + spin_lock_bh(&mqcdev->dl_queue_lock); + if (!list_empty(&mqcdev->dl_queue)) + mask |= EPOLLIN | EPOLLRDNORM; + spin_unlock_bh(&mqcdev->dl_queue_lock); + + if (mutex_lock_interruptible(&mqcdev->write_lock)) { + mutex_unlock(&mqcdev->lock); + return EPOLLERR; + } + if (mhi_get_free_desc_count(mhi_dev, DMA_TO_DEVICE) > 0) + mask |= EPOLLOUT | EPOLLWRNORM; + mutex_unlock(&mqcdev->write_lock); + mutex_unlock(&mqcdev->lock); + + dev_dbg(&mhi_dev->dev, "Client attempted to poll, returning mask 0x%x\n", mask); + + return mask; +} + +static int mhi_qaic_ctrl_tx(struct mqc_dev *mqcdev) +{ + int ret; + + ret = wait_event_interruptible(mqcdev->ul_wq, !mqcdev->enabled || + mhi_get_free_desc_count(mqcdev->mhi_dev, DMA_TO_DEVICE) > 0); + + if (!mqcdev->enabled) + return -ENODEV; + + return ret; +} + +static ssize_t mhi_qaic_ctrl_write(struct file *file, const char __user *buf, size_t count, + loff_t *offp) +{ + struct mqc_dev *mqcdev = file->private_data; + struct mhi_device *mhi_dev; + size_t bytes_xfered = 0; + struct device *dev; + int ret, nr_desc; + + mhi_dev = mqcdev->mhi_dev; + dev = &mhi_dev->dev; + + if (!mhi_dev->ul_chan) + return -EOPNOTSUPP; + + if (!buf || !count) + return -EINVAL; + + dev_dbg(dev, "Request to transfer %zu bytes\n", count); + + ret = mhi_qaic_ctrl_tx(mqcdev); + if (ret) + return ret; + + if (mutex_lock_interruptible(&mqcdev->write_lock)) + return -EINTR; + + nr_desc = mhi_get_free_desc_count(mhi_dev, DMA_TO_DEVICE); + if (nr_desc * mqcdev->mtu < count) { + ret = -EMSGSIZE; + dev_dbg(dev, "Buffer too big to transfer\n"); + goto unlock_mutex; + } + + while (count != bytes_xfered) { + enum mhi_flags flags; + size_t to_copy; + void *kbuf; + + to_copy = min_t(size_t, count - bytes_xfered, mqcdev->mtu); + kbuf = kmalloc(to_copy, GFP_KERNEL); + if (!kbuf) { + ret = -ENOMEM; + goto unlock_mutex; + } + + ret = copy_from_user(kbuf, buf + bytes_xfered, to_copy); + if (ret) { + kfree(kbuf); + ret = -EFAULT; + goto unlock_mutex; + } + + if (bytes_xfered + to_copy == count) + flags = MHI_EOT; + else + flags = MHI_CHAIN; + + ret = mhi_queue_buf(mhi_dev, DMA_TO_DEVICE, kbuf, to_copy, flags); + if (ret) { + kfree(kbuf); + dev_err(dev, "Failed to queue buf of size %zu\n", to_copy); + goto unlock_mutex; + } + + bytes_xfered += to_copy; + } + + mutex_unlock(&mqcdev->write_lock); + dev_dbg(dev, "bytes xferred: %zu\n", bytes_xfered); + + return bytes_xfered; + +unlock_mutex: + mutex_unlock(&mqcdev->write_lock); + return ret; +} + +static int mhi_qaic_ctrl_rx(struct mqc_dev *mqcdev) +{ + int ret; + + ret = wait_event_interruptible(mqcdev->dl_wq, + !mqcdev->enabled || !list_empty(&mqcdev->dl_queue)); + + if (!mqcdev->enabled) + return -ENODEV; + + return ret; +} + +static ssize_t mhi_qaic_ctrl_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) +{ + struct mqc_dev *mqcdev = file->private_data; + struct mqc_buf *ctrlbuf; + size_t to_copy; + int ret; + + if (!mqcdev->mhi_dev->dl_chan) + return -EOPNOTSUPP; + + ret = mhi_qaic_ctrl_rx(mqcdev); + if (ret) + return ret; + + if (mutex_lock_interruptible(&mqcdev->read_lock)) + return -EINTR; + + ctrlbuf = list_first_entry_or_null(&mqcdev->dl_queue, struct mqc_buf, node); + if (!ctrlbuf) { + mutex_unlock(&mqcdev->read_lock); + ret = -ENODEV; + goto error_out; + } + + to_copy = min_t(size_t, count, ctrlbuf->len); + if (copy_to_user(buf, ctrlbuf->data, to_copy)) { + mutex_unlock(&mqcdev->read_lock); + dev_dbg(&mqcdev->mhi_dev->dev, "Failed to copy data to user buffer\n"); + ret = -EFAULT; + goto error_out; + } + + ctrlbuf->len -= to_copy; + ctrlbuf->data += to_copy; + + if (!ctrlbuf->len) { + spin_lock_bh(&mqcdev->dl_queue_lock); + mhi_qaic_ctrl_buf_free(ctrlbuf); + spin_unlock_bh(&mqcdev->dl_queue_lock); + mhi_qaic_ctrl_fill_dl_queue(mqcdev); + dev_dbg(&mqcdev->mhi_dev->dev, "Read buf freed\n"); + } + + mutex_unlock(&mqcdev->read_lock); + return to_copy; + +error_out: + mutex_unlock(&mqcdev->read_lock); + return ret; +} + +static const struct file_operations mhidev_fops = { + .owner = THIS_MODULE, + .open = mhi_qaic_ctrl_open, + .release = mhi_qaic_ctrl_release, + .read = mhi_qaic_ctrl_read, + .write = mhi_qaic_ctrl_write, + .poll = mhi_qaic_ctrl_poll, +}; + +static void mhi_qaic_ctrl_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) +{ + struct mqc_dev *mqcdev = dev_get_drvdata(&mhi_dev->dev); + + dev_dbg(&mhi_dev->dev, "%s: status: %d xfer_len: %zu\n", __func__, + mhi_result->transaction_status, mhi_result->bytes_xferd); + + kfree(mhi_result->buf_addr); + + if (!mhi_result->transaction_status) + wake_up_interruptible(&mqcdev->ul_wq); +} + +static void mhi_qaic_ctrl_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) +{ + struct mqc_dev *mqcdev = dev_get_drvdata(&mhi_dev->dev); + struct mqc_buf *ctrlbuf; + + dev_dbg(&mhi_dev->dev, "%s: status: %d receive_len: %zu\n", __func__, + mhi_result->transaction_status, mhi_result->bytes_xferd); + + if (mhi_result->transaction_status && + mhi_result->transaction_status != -EOVERFLOW) { + kfree(mhi_result->buf_addr); + return; + } + + ctrlbuf = mhi_result->buf_addr + mqcdev->mtu; + ctrlbuf->data = mhi_result->buf_addr; + ctrlbuf->len = mhi_result->bytes_xferd; + spin_lock_bh(&mqcdev->dl_queue_lock); + list_add_tail(&ctrlbuf->node, &mqcdev->dl_queue); + spin_unlock_bh(&mqcdev->dl_queue_lock); + + wake_up_interruptible(&mqcdev->dl_wq); +} + +static int mhi_qaic_ctrl_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) +{ + struct mqc_dev *mqcdev; + struct device *dev; + int ret; + + mqcdev = kzalloc(sizeof(*mqcdev), GFP_KERNEL); + if (!mqcdev) + return -ENOMEM; + + kref_init(&mqcdev->ref_count); + mutex_init(&mqcdev->lock); + mqcdev->mhi_dev = mhi_dev; + + ret = xa_alloc(&mqc_xa, &mqcdev->minor, mqcdev, XA_LIMIT(0, MHI_QAIC_CTRL_MAX_MINORS), + GFP_KERNEL); + if (ret) { + kfree(mqcdev); + return ret; + } + + init_waitqueue_head(&mqcdev->ul_wq); + init_waitqueue_head(&mqcdev->dl_wq); + mutex_init(&mqcdev->read_lock); + mutex_init(&mqcdev->write_lock); + spin_lock_init(&mqcdev->dl_queue_lock); + INIT_LIST_HEAD(&mqcdev->dl_queue); + mqcdev->mtu = min_t(size_t, id->driver_data, MHI_MAX_MTU); + mqcdev->enabled = true; + mqcdev->open_count = 0; + dev_set_drvdata(&mhi_dev->dev, mqcdev); + + dev = device_create(mqc_dev_class, &mhi_dev->dev, MKDEV(mqc_dev_major, mqcdev->minor), + mqcdev, "%s", dev_name(&mhi_dev->dev)); + if (IS_ERR(dev)) { + xa_erase(&mqc_xa, mqcdev->minor); + dev_set_drvdata(&mhi_dev->dev, NULL); + kfree(mqcdev); + return PTR_ERR(dev); + } + + return 0; +}; + +static void mhi_qaic_ctrl_remove(struct mhi_device *mhi_dev) +{ + struct mqc_dev *mqcdev = dev_get_drvdata(&mhi_dev->dev); + + device_destroy(mqc_dev_class, MKDEV(mqc_dev_major, mqcdev->minor)); + + mutex_lock(&mqcdev->lock); + mqcdev->enabled = false; + if (mqcdev->open_count) + __mhi_qaic_ctrl_release(mqcdev); + mutex_unlock(&mqcdev->lock); + + xa_erase(&mqc_xa, mqcdev->minor); + kref_put(&mqcdev->ref_count, mqc_dev_release); +} + +/* .driver_data stores max mtu */ +static const struct mhi_device_id mhi_qaic_ctrl_match_table[] = { + { .chan = "QAIC_SAHARA", .driver_data = SZ_32K}, + {}, +}; +MODULE_DEVICE_TABLE(mhi, mhi_qaic_ctrl_match_table); + +static struct mhi_driver mhi_qaic_ctrl_driver = { + .id_table = mhi_qaic_ctrl_match_table, + .remove = mhi_qaic_ctrl_remove, + .probe = mhi_qaic_ctrl_probe, + .ul_xfer_cb = mhi_qaic_ctrl_ul_xfer_cb, + .dl_xfer_cb = mhi_qaic_ctrl_dl_xfer_cb, + .driver = { + .name = MHI_QAIC_CTRL_DRIVER_NAME, + }, +}; + +int mhi_qaic_ctrl_init(void) +{ + int ret; + + ret = register_chrdev(0, MHI_QAIC_CTRL_DRIVER_NAME, &mhidev_fops); + if (ret < 0) + return ret; + + mqc_dev_major = ret; + mqc_dev_class = class_create(THIS_MODULE, MHI_QAIC_CTRL_DRIVER_NAME); + if (IS_ERR(mqc_dev_class)) { + ret = PTR_ERR(mqc_dev_class); + goto unregister_chrdev; + } + + ret = mhi_driver_register(&mhi_qaic_ctrl_driver); + if (ret) + goto destroy_class; + + return 0; + +destroy_class: + class_destroy(mqc_dev_class); +unregister_chrdev: + unregister_chrdev(mqc_dev_major, MHI_QAIC_CTRL_DRIVER_NAME); + return ret; +} + +void mhi_qaic_ctrl_deinit(void) +{ + mhi_driver_unregister(&mhi_qaic_ctrl_driver); + class_destroy(mqc_dev_class); + unregister_chrdev(mqc_dev_major, MHI_QAIC_CTRL_DRIVER_NAME); + xa_destroy(&mqc_xa); +} diff --git a/drivers/accel/qaic/mhi_qaic_ctrl.h b/drivers/accel/qaic/mhi_qaic_ctrl.h new file mode 100644 index 000000000000..930b3ace1a59 --- /dev/null +++ b/drivers/accel/qaic/mhi_qaic_ctrl.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef __MHI_QAIC_CTRL_H__ +#define __MHI_QAIC_CTRL_H__ + +int mhi_qaic_ctrl_init(void); +void mhi_qaic_ctrl_deinit(void); + +#endif /* __MHI_QAIC_CTRL_H__ */ -- cgit v1.2.3 From 759662e4f1f8cd6ea0ec466e10cf29fe7c1fec36 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 27 Mar 2023 09:54:56 -0600 Subject: accel/qaic: Add qaic driver to the build system Now that we have all the components of a minimum QAIC which can boot and run an AIC100 device, add the infrastructure that allows the QAIC driver to be built. Signed-off-by: Jeffrey Hugo Reviewed-by: Carl Vanderlip Reviewed-by: Pranjal Ramajor Asha Kanojiya Reviewed-by: Stanislaw Gruszka Reviewed-by: Jacek Lawrynowicz Acked-by: Oded Gabbay Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/1679932497-30277-8-git-send-email-quic_jhugo@quicinc.com --- drivers/accel/Kconfig | 1 + drivers/accel/Makefile | 1 + drivers/accel/qaic/Kconfig | 23 +++++++++++++++++++++++ drivers/accel/qaic/Makefile | 13 +++++++++++++ 4 files changed, 38 insertions(+) create mode 100644 drivers/accel/qaic/Kconfig create mode 100644 drivers/accel/qaic/Makefile diff --git a/drivers/accel/Kconfig b/drivers/accel/Kconfig index c437206aa3f1..64065fb8922b 100644 --- a/drivers/accel/Kconfig +++ b/drivers/accel/Kconfig @@ -26,5 +26,6 @@ menuconfig DRM_ACCEL source "drivers/accel/habanalabs/Kconfig" source "drivers/accel/ivpu/Kconfig" +source "drivers/accel/qaic/Kconfig" endif diff --git a/drivers/accel/Makefile b/drivers/accel/Makefile index f22fd44d586b..ab3df932937f 100644 --- a/drivers/accel/Makefile +++ b/drivers/accel/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_DRM_ACCEL_HABANALABS) += habanalabs/ obj-$(CONFIG_DRM_ACCEL_IVPU) += ivpu/ +obj-$(CONFIG_DRM_ACCEL_QAIC) += qaic/ diff --git a/drivers/accel/qaic/Kconfig b/drivers/accel/qaic/Kconfig new file mode 100644 index 000000000000..a9f866230058 --- /dev/null +++ b/drivers/accel/qaic/Kconfig @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Qualcomm Cloud AI accelerators driver +# + +config DRM_ACCEL_QAIC + tristate "Qualcomm Cloud AI accelerators" + depends on DRM_ACCEL + depends on PCI && HAS_IOMEM + depends on MHI_BUS + depends on MMU + select CRC32 + help + Enables driver for Qualcomm's Cloud AI accelerator PCIe cards that are + designed to accelerate Deep Learning inference workloads. + + The driver manages the PCIe devices and provides an IOCTL interface + for users to submit workloads to the devices. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called qaic. diff --git a/drivers/accel/qaic/Makefile b/drivers/accel/qaic/Makefile new file mode 100644 index 000000000000..d5f4952ae79a --- /dev/null +++ b/drivers/accel/qaic/Makefile @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for Qualcomm Cloud AI accelerators driver +# + +obj-$(CONFIG_DRM_ACCEL_QAIC) := qaic.o + +qaic-y := \ + mhi_controller.o \ + mhi_qaic_ctrl.o \ + qaic_control.o \ + qaic_data.o \ + qaic_drv.o -- cgit v1.2.3 From 864b438085df0a9b93f264e3c8a0f68db1e93bdc Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 27 Mar 2023 09:54:57 -0600 Subject: MAINTAINERS: Add entry for QAIC driver Add MAINTAINERS entry for the Qualcomm Cloud AI 100 driver. Signed-off-by: Jeffrey Hugo Reviewed-by: Carl Vanderlip Reviewed-by: Pranjal Ramajor Asha Kanojiya Reviewed-by: Stanislaw Gruszka Reviewed-by: Jacek Lawrynowicz Acked-by: Oded Gabbay Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/1679932497-30277-9-git-send-email-quic_jhugo@quicinc.com --- MAINTAINERS | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 9736e04d3bd3..d037504a5748 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17265,6 +17265,16 @@ F: Documentation/devicetree/bindings/clock/qcom,* F: drivers/clk/qcom/ F: include/dt-bindings/clock/qcom,* +QUALCOMM CLOUD AI (QAIC) DRIVER +M: Jeffrey Hugo +L: linux-arm-msm@vger.kernel.org +L: dri-devel@lists.freedesktop.org +S: Supported +T: git git://anongit.freedesktop.org/drm/drm-misc +F: Documentation/accel/qaic/ +F: drivers/accel/qaic/ +F: include/uapi/drm/qaic_accel.h + QUALCOMM CORE POWER REDUCTION (CPR) AVS DRIVER M: Bjorn Andersson M: Konrad Dybcio -- cgit v1.2.3 From 379989e7cbdc7aa7496a00ee286ec146c7599cf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Tue, 4 Apr 2023 22:06:48 +0200 Subject: drm/ttm/pool: Fix ttm_pool_alloc error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When hitting an error, the error path forgot to unmap dma mappings and could call set_pages_wb() on already uncached pages. Fix this by introducing a common ttm_pool_free_range() function that does the right thing. v2: - Simplify that common function (Christian König) v3: - Rename that common function to ttm_pool_free_range() (Christian König) Fixes: d099fc8f540a ("drm/ttm: new TT backend allocation pool v3") Cc: Christian König Cc: Dave Airlie Cc: Christian Koenig Cc: Huang Rui Cc: dri-devel@lists.freedesktop.org Signed-off-by: Thomas Hellström Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20230404200650.11043-2-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/ttm/ttm_pool.c | 81 ++++++++++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index aa116a7bbae3..dfce896c4bae 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -367,6 +367,43 @@ static int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned int order, return 0; } +/** + * ttm_pool_free_range() - Free a range of TTM pages + * @pool: The pool used for allocating. + * @tt: The struct ttm_tt holding the page pointers. + * @caching: The page caching mode used by the range. + * @start_page: index for first page to free. + * @end_page: index for last page to free + 1. + * + * During allocation the ttm_tt page-vector may be populated with ranges of + * pages with different attributes if allocation hit an error without being + * able to completely fulfill the allocation. This function can be used + * to free these individual ranges. + */ +static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, + enum ttm_caching caching, + pgoff_t start_page, pgoff_t end_page) +{ + struct page **pages = tt->pages; + unsigned int order; + pgoff_t i, nr; + + for (i = start_page; i < end_page; i += nr, pages += nr) { + struct ttm_pool_type *pt = NULL; + + order = ttm_pool_page_order(pool, *pages); + nr = (1UL << order); + if (tt->dma_address) + ttm_pool_unmap(pool, tt->dma_address[i], nr); + + pt = ttm_pool_select_type(pool, caching, order); + if (pt) + ttm_pool_type_give(pt, *pages); + else + ttm_pool_free_page(pool, caching, order, *pages); + } +} + /** * ttm_pool_alloc - Fill a ttm_tt object * @@ -382,12 +419,14 @@ static int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned int order, int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, struct ttm_operation_ctx *ctx) { - unsigned long num_pages = tt->num_pages; + pgoff_t num_pages = tt->num_pages; dma_addr_t *dma_addr = tt->dma_address; struct page **caching = tt->pages; struct page **pages = tt->pages; + enum ttm_caching page_caching; gfp_t gfp_flags = GFP_USER; - unsigned int i, order; + pgoff_t caching_divide; + unsigned int order; struct page *p; int r; @@ -410,6 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, order = min_t(unsigned int, order, __fls(num_pages))) { struct ttm_pool_type *pt; + page_caching = tt->caching; pt = ttm_pool_select_type(pool, tt->caching, order); p = pt ? ttm_pool_type_take(pt) : NULL; if (p) { @@ -418,6 +458,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, if (r) goto error_free_page; + caching = pages; do { r = ttm_pool_page_allocated(pool, order, p, &dma_addr, @@ -426,14 +467,15 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, if (r) goto error_free_page; + caching = pages; if (num_pages < (1 << order)) break; p = ttm_pool_type_take(pt); } while (p); - caching = pages; } + page_caching = ttm_cached; while (num_pages >= (1 << order) && (p = ttm_pool_alloc_page(pool, gfp_flags, order))) { @@ -442,6 +484,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, tt->caching); if (r) goto error_free_page; + caching = pages; } r = ttm_pool_page_allocated(pool, order, p, &dma_addr, &num_pages, &pages); @@ -468,15 +511,13 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, return 0; error_free_page: - ttm_pool_free_page(pool, tt->caching, order, p); + ttm_pool_free_page(pool, page_caching, order, p); error_free_all: num_pages = tt->num_pages - num_pages; - for (i = 0; i < num_pages; ) { - order = ttm_pool_page_order(pool, tt->pages[i]); - ttm_pool_free_page(pool, tt->caching, order, tt->pages[i]); - i += 1 << order; - } + caching_divide = caching - tt->pages; + ttm_pool_free_range(pool, tt, tt->caching, 0, caching_divide); + ttm_pool_free_range(pool, tt, ttm_cached, caching_divide, num_pages); return r; } @@ -492,27 +533,7 @@ EXPORT_SYMBOL(ttm_pool_alloc); */ void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt) { - unsigned int i; - - for (i = 0; i < tt->num_pages; ) { - struct page *p = tt->pages[i]; - unsigned int order, num_pages; - struct ttm_pool_type *pt; - - order = ttm_pool_page_order(pool, p); - num_pages = 1ULL << order; - if (tt->dma_address) - ttm_pool_unmap(pool, tt->dma_address[i], num_pages); - - pt = ttm_pool_select_type(pool, tt->caching, order); - if (pt) - ttm_pool_type_give(pt, tt->pages[i]); - else - ttm_pool_free_page(pool, tt->caching, order, - tt->pages[i]); - - i += num_pages; - } + ttm_pool_free_range(pool, tt, tt->caching, 0, tt->num_pages); while (atomic_long_read(&allocated_pages) > page_pool_size) ttm_pool_shrink(); -- cgit v1.2.3 From 322458c2bb1a0398c5775333e1e71e1ece8a461f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Tue, 4 Apr 2023 22:06:49 +0200 Subject: drm/ttm: Reduce the number of used allocation orders for TTM pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When swapping out, we will split multi-order pages both in order to move them to the swap-cache and to be able to return memory to the swap cache as soon as possible on a page-by-page basis. Reduce the page max order to the system PMD size, as we can then be nicer to the system and avoid splitting gigantic pages. Looking forward to when we might be able to swap out PMD size folios without splitting, this will also be a benefit. v2: - Include all orders up to the PMD size (Christian König) v3: - Avoid compilation errors for architectures with special PFN_SHIFTs Signed-off-by: Thomas Hellström Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20230404200650.11043-3-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/ttm/ttm_pool.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index dfce896c4bae..18c342a919a2 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -47,6 +47,11 @@ #include "ttm_module.h" +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT) +#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1) +/* Some architectures have a weird PMD_SHIFT */ +#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER) + /** * struct ttm_pool_dma - Helper object for coherent DMA mappings * @@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644); static atomic_long_t allocated_pages; -static struct ttm_pool_type global_write_combined[MAX_ORDER]; -static struct ttm_pool_type global_uncached[MAX_ORDER]; +static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER]; +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER]; -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER]; -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER]; +static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER]; +static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER]; static spinlock_t shrinker_lock; static struct list_head shrinker_list; @@ -444,7 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, else gfp_flags |= GFP_HIGHUSER; - for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages)); + for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages)); num_pages; order = min_t(unsigned int, order, __fls(num_pages))) { struct ttm_pool_type *pt; @@ -563,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, if (use_dma_alloc) { for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) - for (j = 0; j < MAX_ORDER; ++j) + for (j = 0; j < TTM_DIM_ORDER; ++j) ttm_pool_type_init(&pool->caching[i].orders[j], pool, i, j); } @@ -583,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool) if (pool->use_dma_alloc) { for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) - for (j = 0; j < MAX_ORDER; ++j) + for (j = 0; j < TTM_DIM_ORDER; ++j) ttm_pool_type_fini(&pool->caching[i].orders[j]); } @@ -637,7 +642,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m) unsigned int i; seq_puts(m, "\t "); - for (i = 0; i < MAX_ORDER; ++i) + for (i = 0; i < TTM_DIM_ORDER; ++i) seq_printf(m, " ---%2u---", i); seq_puts(m, "\n"); } @@ -648,7 +653,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, { unsigned int i; - for (i = 0; i < MAX_ORDER; ++i) + for (i = 0; i < TTM_DIM_ORDER; ++i) seq_printf(m, " %8u", ttm_pool_type_count(&pt[i])); seq_puts(m, "\n"); } @@ -751,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long num_pages) { unsigned int i; + BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER); + BUILD_BUG_ON(TTM_DIM_ORDER < 1); + if (!page_pool_size) page_pool_size = num_pages; spin_lock_init(&shrinker_lock); INIT_LIST_HEAD(&shrinker_list); - for (i = 0; i < MAX_ORDER; ++i) { + for (i = 0; i < TTM_DIM_ORDER; ++i) { ttm_pool_type_init(&global_write_combined[i], NULL, ttm_write_combined, i); ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i); @@ -790,7 +798,7 @@ void ttm_pool_mgr_fini(void) { unsigned int i; - for (i = 0; i < MAX_ORDER; ++i) { + for (i = 0; i < TTM_DIM_ORDER; ++i) { ttm_pool_type_fini(&global_write_combined[i]); ttm_pool_type_fini(&global_uncached[i]); -- cgit v1.2.3 From e44f18c6ff8beef7b2b10592287f0a9766376d9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Tue, 4 Apr 2023 22:06:50 +0200 Subject: drm/ttm: Make the call to ttm_tt_populate() interruptible when faulting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When swapping in, or under memory pressure ttm_tt_populate() may sleep for a substantiable amount of time. Allow interrupts during the sleep. This will also allow us to inject -EINTR errors during swapin in upcoming patches. Also avoid returning VM_FAULT_OOM, since that will confuse the core mm, making it print out a confused message and retrying the fault. Return VM_FAULT_SIGBUS also under OOM conditions. Signed-off-by: Thomas Hellström Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20230404200650.11043-4-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/ttm/ttm_bo_vm.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index ca7744b852f5..4bca6b54520a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -218,14 +218,21 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, prot = ttm_io_prot(bo, bo->resource, prot); if (!bo->resource->bus.is_iomem) { struct ttm_operation_ctx ctx = { - .interruptible = false, + .interruptible = true, .no_wait_gpu = false, .force_alloc = true }; ttm = bo->ttm; - if (ttm_tt_populate(bdev, bo->ttm, &ctx)) - return VM_FAULT_OOM; + err = ttm_tt_populate(bdev, bo->ttm, &ctx); + if (err) { + if (err == -EINTR || err == -ERESTARTSYS || + err == -EAGAIN) + return VM_FAULT_NOPAGE; + + pr_debug("TTM fault hit %pe.\n", ERR_PTR(err)); + return VM_FAULT_SIGBUS; + } } else { /* Iomem should not be marked encrypted */ prot = pgprot_decrypted(prot); -- cgit v1.2.3