diff options
254 files changed, 10358 insertions, 3241 deletions
diff --git a/Documentation/ABI/stable/sysfs-driver-qaic b/Documentation/ABI/stable/sysfs-driver-qaic new file mode 100644 index 000000000000..c767a93342b3 --- /dev/null +++ b/Documentation/ABI/stable/sysfs-driver-qaic @@ -0,0 +1,19 @@ +What: /sys/bus/pci/drivers/qaic/XXXX:XX:XX.X/accel/accel<minor_nr>/dbc<N>_state +Date: October 2025 +KernelVersion: 6.19 +Contact: Jeff Hugo <jeff.hugo@oss.qualcomm.com> +Description: Represents the current state of DMA Bridge channel (DBC). Below are the possible + states: + + =================== ========================================================== + IDLE (0) DBC is free and can be activated + ASSIGNED (1) DBC is activated and a workload is running on device + BEFORE_SHUTDOWN (2) Sub-system associated with this workload has crashed and + it will shutdown soon + AFTER_SHUTDOWN (3) Sub-system associated with this workload has crashed and + it has shutdown + BEFORE_POWER_UP (4) Sub-system associated with this workload is shutdown and + it will be powered up soon + AFTER_POWER_UP (5) Sub-system associated with this workload is now powered up + =================== ========================================================== +Users: Any userspace application or clients interested in DBC state. diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov b/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov new file mode 100644 index 000000000000..2fd7e9b7bacc --- /dev/null +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov @@ -0,0 +1,159 @@ +What: /sys/bus/pci/drivers/xe/.../sriov_admin/ +Date: October 2025 +KernelVersion: 6.19 +Contact: intel-xe@lists.freedesktop.org +Description: + This directory appears for the particular Intel Xe device when: + + - device supports SR-IOV, and + - device is a Physical Function (PF), and + - driver support for the SR-IOV PF is enabled on given device. + + This directory is used as a root for all attributes required to + manage both Physical Function (PF) and Virtual Functions (VFs). + + +What: /sys/bus/pci/drivers/xe/.../sriov_admin/pf/ +Date: October 2025 +KernelVersion: 6.19 +Contact: intel-xe@lists.freedesktop.org +Description: + This directory holds attributes related to the SR-IOV Physical + Function (PF). + + +What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf1/ +What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf2/ +What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf<N>/ +Date: October 2025 +KernelVersion: 6.19 +Contact: intel-xe@lists.freedesktop.org +Description: + These directories hold attributes related to the SR-IOV Virtual + Functions (VFs). + + Note that the VF number <N> is 1-based as described in PCI SR-IOV + specification as the Xe driver follows that naming schema. + + There could be "vf1", "vf2" and so on, up to "vf<N>", where <N> + matches the value of the "sriov_totalvfs" attribute. + + +What: /sys/bus/pci/drivers/xe/.../sriov_admin/pf/profile/exec_quantum_ms +What: /sys/bus/pci/drivers/xe/.../sriov_admin/pf/profile/preempt_timeout_us +What: /sys/bus/pci/drivers/xe/.../sriov_admin/pf/profile/sched_priority +What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf<n>/profile/exec_quantum_ms +What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf<n>/profile/preempt_timeout_us +What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf<n>/profile/sched_priority +Date: October 2025 +KernelVersion: 6.19 +Contact: intel-xe@lists.freedesktop.org +Description: + These files expose scheduling parameters for the PF and its VFs, and + are visible only on Intel Xe platforms that use time-sliced GPU sharing. + They can be changed even if VFs are enabled and running and reflect the + settings of all tiles/GTs assigned to the given function. + + exec_quantum_ms: (RW) unsigned integer + The GT execution quantum (EQ) in [ms] for the given function. + Actual quantum value might be aligned per HW/FW requirements. + + Default is 0 (unlimited). + + preempt_timeout_us: (RW) unsigned integer + The GT preemption timeout in [us] of the given function. + Actual timeout value might be aligned per HW/FW requirements. + + Default is 0 (unlimited). + + sched_priority: (RW/RO) string + The GT scheduling priority of the given function. + + "low" - function will be scheduled on the GPU for its EQ/PT + only if function has any work already submitted. + + "normal" - functions will be scheduled on the GPU for its EQ/PT + irrespective of whether it has submitted a work or not. + + "high" - function will be scheduled on the GPU for its EQ/PT + in the next time-slice after the current one completes + and function has a work submitted. + + Default is "low". + + When read, this file will display the current and available + scheduling priorities. The currently active priority level will + be enclosed in square brackets, like: + + [low] normal high + + This file can be read-only if changing the priority is not + supported. + + Writes to these attributes may fail with errors like: + -EINVAL if provided input is malformed or not recognized, + -EPERM if change is not applicable on given HW/FW, + -EIO if FW refuses to change the provisioning. + + Reads from these attributes may fail with: + -EUCLEAN if value is not consistent across all tiles/GTs. + + +What: /sys/bus/pci/drivers/xe/.../sriov_admin/.bulk_profile/exec_quantum_ms +What: /sys/bus/pci/drivers/xe/.../sriov_admin/.bulk_profile/preempt_timeout_us +What: /sys/bus/pci/drivers/xe/.../sriov_admin/.bulk_profile/sched_priority +Date: October 2025 +KernelVersion: 6.19 +Contact: intel-xe@lists.freedesktop.org +Description: + These files allows bulk reconfiguration of the scheduling parameters + of the PF or VFs and are available only for Intel Xe platforms with + GPU sharing based on the time-slice basis. These scheduling parameters + can be changed even if VFs are enabled and running. + + exec_quantum_ms: (WO) unsigned integer + The GT execution quantum (EQ) in [ms] to be applied to all functions. + See sriov_admin/{pf,vf<N>}/profile/exec_quantum_ms for more details. + + preempt_timeout_us: (WO) unsigned integer + The GT preemption timeout (PT) in [us] to be applied to all functions. + See sriov_admin/{pf,vf<N>}/profile/preempt_timeout_us for more details. + + sched_priority: (RW/RO) string + The GT scheduling priority to be applied for all functions. + See sriov_admin/{pf,vf<N>}/profile/sched_priority for more details. + + Writes to these attributes may fail with errors like: + -EINVAL if provided input is malformed or not recognized, + -EPERM if change is not applicable on given HW/FW, + -EIO if FW refuses to change the provisioning. + + +What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf<n>/stop +Date: October 2025 +KernelVersion: 6.19 +Contact: intel-xe@lists.freedesktop.org +Description: + This file allows to control scheduling of the VF on the Intel Xe GPU + platforms. It allows to implement custom policy mechanism in case VFs + are misbehaving or triggering adverse events above defined thresholds. + + stop: (WO) bool + All GT executions of given function shall be immediately stopped. + To allow scheduling this VF again, the VF FLR must be triggered. + + Writes to this attribute may fail with errors like: + -EINVAL if provided input is malformed or not recognized, + -EPERM if change is not applicable on given HW/FW, + -EIO if FW refuses to change the scheduling. + + +What: /sys/bus/pci/drivers/xe/.../sriov_admin/pf/device +What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf<n>/device +Date: October 2025 +KernelVersion: 6.19 +Contact: intel-xe@lists.freedesktop.org +Description: + These are symlinks to the underlying PCI device entry representing + given Xe SR-IOV function. For the PF, this link is always present. + For VFs, this link is present only for currently enabled VFs. diff --git a/Documentation/accel/qaic/aic100.rst b/Documentation/accel/qaic/aic100.rst index 273da6192fb3..41331cf580b1 100644 --- a/Documentation/accel/qaic/aic100.rst +++ b/Documentation/accel/qaic/aic100.rst @@ -487,8 +487,8 @@ one user crashes, the fallout of that should be limited to that workload and not impact other workloads. SSR accomplishes this. If a particular workload crashes, QSM notifies the host via the QAIC_SSR MHI -channel. This notification identifies the workload by it's assigned DBC. A -multi-stage recovery process is then used to cleanup both sides, and get the +channel. This notification identifies the workload by its assigned DBC. A +multi-stage recovery process is then used to cleanup both sides, and gets the DBC/NSPs into a working state. When SSR occurs, any state in the workload is lost. Any inputs that were in @@ -496,6 +496,27 @@ process, or queued by not yet serviced, are lost. The loaded artifacts will remain in on-card DDR, but the host will need to re-activate the workload if it desires to recover the workload. +When SSR occurs for a specific NSP, the assigned DBC goes through the +following state transactions in order: + +DBC_STATE_BEFORE_SHUTDOWN + Indicates that the affected NSP was found in an unrecoverable error + condition. +DBC_STATE_AFTER_SHUTDOWN + Indicates that the NSP is under reset. +DBC_STATE_BEFORE_POWER_UP + Indicates that the NSP's debug information has been collected, and is + ready to be collected by the host (if desired). At that stage the NSP + is restarted by QSM. +DBC_STATE_AFTER_POWER_UP + Indicates that the NSP has been restarted, fully operational and is + in idle state. + +SSR also has an optional crashdump collection feature. If enabled, the host can +collect the memory dump for the crashed NSP and dump it to the user space via +the dev_coredump subsystem. The host can also decline the crashdump collection +request from the device. + Reliability, Accessibility, Serviceability (RAS) ================================================ diff --git a/Documentation/devicetree/bindings/display/renesas,rzg2l-du.yaml b/Documentation/devicetree/bindings/display/renesas,rzg2l-du.yaml index 1e32d14b6edb..2cc66dcef870 100644 --- a/Documentation/devicetree/bindings/display/renesas,rzg2l-du.yaml +++ b/Documentation/devicetree/bindings/display/renesas,rzg2l-du.yaml @@ -25,6 +25,9 @@ properties: - enum: - renesas,r9a07g054-du # RZ/V2L - const: renesas,r9a07g044-du # RZ/G2L fallback + - items: + - const: renesas,r9a09g056-du # RZ/V2N + - const: renesas,r9a09g057-du # RZ/V2H(P) fallback reg: maxItems: 1 diff --git a/Documentation/gpu/xe/xe_gt_freq.rst b/Documentation/gpu/xe/xe_gt_freq.rst index c0811200e327..182d6aabeee1 100644 --- a/Documentation/gpu/xe/xe_gt_freq.rst +++ b/Documentation/gpu/xe/xe_gt_freq.rst @@ -7,6 +7,9 @@ Xe GT Frequency Management .. kernel-doc:: drivers/gpu/drm/xe/xe_gt_freq.c :doc: Xe GT Frequency Management +.. kernel-doc:: drivers/gpu/drm/xe/xe_gt_throttle.c + :doc: Xe GT Throttle + Internal API ============ diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index b78c47ed0d34..42d876a427c5 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -189,7 +189,6 @@ aie2_sched_notify(struct amdxdna_sched_job *job) up(&job->hwctx->priv->job_sem); job->job_done = true; - dma_fence_put(fence); mmput_async(job->mm); aie2_job_put(job); } @@ -691,17 +690,19 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx) xdna = hwctx->client->xdna; XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq); - drm_sched_entity_destroy(&hwctx->priv->entity); - aie2_hwctx_wait_for_idle(hwctx); /* Request fw to destroy hwctx and cancel the rest pending requests */ aie2_release_resource(hwctx); + mutex_unlock(&xdna->dev_lock); + drm_sched_entity_destroy(&hwctx->priv->entity); + /* Wait for all submitted jobs to be completed or canceled */ wait_event(hwctx->priv->job_free_wq, atomic64_read(&hwctx->job_submit_cnt) == atomic64_read(&hwctx->job_free_cnt)); + mutex_lock(&xdna->dev_lock); drm_sched_fini(&hwctx->priv->sched); aie2_ctx_syncobj_destroy(hwctx); diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index 69cdce9ff208..d493bb1c3360 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -210,6 +210,14 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct hwctx->fw_ctx_id = resp.context_id; WARN_ONCE(hwctx->fw_ctx_id == -1, "Unexpected context id"); + if (ndev->force_preempt_enabled) { + ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FORCE_PREEMPT, &hwctx->fw_ctx_id); + if (ret) { + XDNA_ERR(xdna, "failed to enable force preempt %d", ret); + return ret; + } + } + cq_pair = &resp.cq_pair[0]; x2i.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.head_addr); x2i.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.tail_addr); @@ -601,6 +609,11 @@ aie2_cmdlist_fill_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) return 0; } +static int aie2_cmdlist_unsupp(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) +{ + return -EOPNOTSUPP; +} + static u32 aie2_get_chain_msg_op(u32 cmd_op) { switch (cmd_op) { @@ -621,6 +634,8 @@ static struct aie2_exec_msg_ops legacy_exec_message_ops = { .init_chain_req = aie2_init_exec_chain_req, .fill_cf_slot = aie2_cmdlist_fill_cf, .fill_dpu_slot = aie2_cmdlist_fill_dpu, + .fill_preempt_slot = aie2_cmdlist_unsupp, + .fill_elf_slot = aie2_cmdlist_unsupp, .get_chain_msg_op = aie2_get_chain_msg_op, }; @@ -680,6 +695,74 @@ aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si return 0; } +static int +aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) +{ + struct cmd_chain_slot_npu *npu_slot = slot; + struct amdxdna_cmd_preempt_data *pd; + u32 cmd_len; + u32 arg_sz; + + pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + arg_sz = cmd_len - sizeof(*pd); + if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE) + return -EINVAL; + + if (*size < sizeof(*npu_slot) + arg_sz) + return -EINVAL; + + npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); + if (npu_slot->cu_idx == INVALID_CU_IDX) + return -EINVAL; + + memset(npu_slot, 0, sizeof(*npu_slot)); + npu_slot->type = EXEC_NPU_TYPE_PREEMPT; + npu_slot->inst_buf_addr = pd->inst_buf; + npu_slot->save_buf_addr = pd->save_buf; + npu_slot->restore_buf_addr = pd->restore_buf; + npu_slot->inst_size = pd->inst_size; + npu_slot->save_size = pd->save_size; + npu_slot->restore_size = pd->restore_size; + npu_slot->inst_prop_cnt = pd->inst_prop_cnt; + npu_slot->arg_cnt = arg_sz / sizeof(u32); + memcpy(npu_slot->args, pd->prop_args, arg_sz); + + *size = sizeof(*npu_slot) + arg_sz; + return 0; +} + +static int +aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) +{ + struct cmd_chain_slot_npu *npu_slot = slot; + struct amdxdna_cmd_preempt_data *pd; + u32 cmd_len; + u32 arg_sz; + + pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + arg_sz = cmd_len - sizeof(*pd); + if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE) + return -EINVAL; + + if (*size < sizeof(*npu_slot) + arg_sz) + return -EINVAL; + + memset(npu_slot, 0, sizeof(*npu_slot)); + npu_slot->type = EXEC_NPU_TYPE_ELF; + npu_slot->inst_buf_addr = pd->inst_buf; + npu_slot->save_buf_addr = pd->save_buf; + npu_slot->restore_buf_addr = pd->restore_buf; + npu_slot->inst_size = pd->inst_size; + npu_slot->save_size = pd->save_size; + npu_slot->restore_size = pd->restore_size; + npu_slot->inst_prop_cnt = pd->inst_prop_cnt; + npu_slot->arg_cnt = 1; + npu_slot->args[0] = AIE2_EXEC_BUFFER_KERNEL_OP_TXN; + + *size = struct_size(npu_slot, args, npu_slot->arg_cnt); + return 0; +} + static u32 aie2_get_npu_chain_msg_op(u32 cmd_op) { return MSG_OP_CHAIN_EXEC_NPU; @@ -691,6 +774,8 @@ static struct aie2_exec_msg_ops npu_exec_message_ops = { .init_chain_req = aie2_init_npu_chain_req, .fill_cf_slot = aie2_cmdlist_fill_npu_cf, .fill_dpu_slot = aie2_cmdlist_fill_npu_dpu, + .fill_preempt_slot = aie2_cmdlist_fill_npu_preempt, + .fill_elf_slot = aie2_cmdlist_fill_npu_elf, .get_chain_msg_op = aie2_get_npu_chain_msg_op, }; @@ -749,6 +834,16 @@ aie2_cmdlist_fill_slot(void *slot, struct amdxdna_gem_obj *cmd_abo, case ERT_START_NPU: ret = EXEC_MSG_OPS(xdna)->fill_dpu_slot(cmd_abo, slot, size); break; + case ERT_START_NPU_PREEMPT: + if (!AIE2_FEATURE_ON(xdna->dev_handle, AIE2_PREEMPT)) + return -EOPNOTSUPP; + ret = EXEC_MSG_OPS(xdna)->fill_preempt_slot(cmd_abo, slot, size); + break; + case ERT_START_NPU_PREEMPT_ELF: + if (!AIE2_FEATURE_ON(xdna->dev_handle, AIE2_PREEMPT)) + return -EOPNOTSUPP; + ret = EXEC_MSG_OPS(xdna)->fill_elf_slot(cmd_abo, slot, size); + break; default: XDNA_INFO(xdna, "Unsupported op %d", op); ret = -EOPNOTSUPP; diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h index 947daa63f064..1c957a6298d3 100644 --- a/drivers/accel/amdxdna/aie2_msg_priv.h +++ b/drivers/accel/amdxdna/aie2_msg_priv.h @@ -176,6 +176,8 @@ struct exec_dpu_req { enum exec_npu_type { EXEC_NPU_TYPE_NON_ELF = 0x1, EXEC_NPU_TYPE_PARTIAL_ELF = 0x2, + EXEC_NPU_TYPE_PREEMPT = 0x3, + EXEC_NPU_TYPE_ELF = 0x4, }; union exec_req { @@ -372,6 +374,7 @@ struct cmd_chain_slot_dpu { }; #define MAX_NPU_ARGS_SIZE (26 * sizeof(__u32)) +#define AIE2_EXEC_BUFFER_KERNEL_OP_TXN 3 struct cmd_chain_slot_npu { enum exec_npu_type type; u64 inst_buf_addr; diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index d7ccbdaf47f5..ceef1c502e9e 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -183,6 +183,10 @@ int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev, if (cfg->category != category) continue; + if (cfg->feature_mask && + bitmap_subset(&cfg->feature_mask, &ndev->feature_mask, AIE2_FEATURE_MAX)) + continue; + value = val ? *val : cfg->value; ret = aie2_set_runtime_cfg(ndev, cfg->type, value); if (ret) { @@ -932,6 +936,25 @@ static int aie2_get_telemetry(struct amdxdna_client *client, return 0; } +static int aie2_get_preempt_state(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_attribute_state state = {}; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_dev_hdl *ndev; + + ndev = xdna->dev_handle; + if (args->param == DRM_AMDXDNA_GET_FORCE_PREEMPT_STATE) + state.state = ndev->force_preempt_enabled; + else if (args->param == DRM_AMDXDNA_GET_FRAME_BOUNDARY_PREEMPT_STATE) + state.state = ndev->frame_boundary_preempt; + + if (copy_to_user(u64_to_user_ptr(args->buffer), &state, sizeof(state))) + return -EFAULT; + + return 0; +} + static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args) { struct amdxdna_dev *xdna = client->xdna; @@ -972,6 +995,10 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i case DRM_AMDXDNA_QUERY_RESOURCE_INFO: ret = aie2_query_resource_info(client, args); break; + case DRM_AMDXDNA_GET_FORCE_PREEMPT_STATE: + case DRM_AMDXDNA_GET_FRAME_BOUNDARY_PREEMPT_STATE: + ret = aie2_get_preempt_state(client, args); + break; default: XDNA_ERR(xdna, "Not supported request parameter %u", args->param); ret = -EOPNOTSUPP; @@ -1078,6 +1105,38 @@ static int aie2_set_power_mode(struct amdxdna_client *client, return aie2_pm_set_mode(xdna->dev_handle, power_mode); } +static int aie2_set_preempt_state(struct amdxdna_client *client, + struct amdxdna_drm_set_state *args) +{ + struct amdxdna_dev_hdl *ndev = client->xdna->dev_handle; + struct amdxdna_drm_attribute_state state; + u32 val; + int ret; + + if (copy_from_user(&state, u64_to_user_ptr(args->buffer), sizeof(state))) + return -EFAULT; + + if (state.state > 1) + return -EINVAL; + + if (XDNA_MBZ_DBG(client->xdna, state.pad, sizeof(state.pad))) + return -EINVAL; + + if (args->param == DRM_AMDXDNA_SET_FORCE_PREEMPT) { + ndev->force_preempt_enabled = state.state; + } else if (args->param == DRM_AMDXDNA_SET_FRAME_BOUNDARY_PREEMPT) { + val = state.state; + ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT, + &val); + if (ret) + return ret; + + ndev->frame_boundary_preempt = state.state; + } + + return 0; +} + static int aie2_set_state(struct amdxdna_client *client, struct amdxdna_drm_set_state *args) { @@ -1095,6 +1154,10 @@ static int aie2_set_state(struct amdxdna_client *client, case DRM_AMDXDNA_SET_POWER_MODE: ret = aie2_set_power_mode(client, args); break; + case DRM_AMDXDNA_SET_FORCE_PREEMPT: + case DRM_AMDXDNA_SET_FRAME_BOUNDARY_PREEMPT: + ret = aie2_set_preempt_state(client, args); + break; default: XDNA_ERR(xdna, "Not supported request parameter %u", args->param); ret = -EOPNOTSUPP; diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h index 9793cd1e0c55..a5f9c42155d1 100644 --- a/drivers/accel/amdxdna/aie2_pci.h +++ b/drivers/accel/amdxdna/aie2_pci.h @@ -110,12 +110,15 @@ struct aie_metadata { enum rt_config_category { AIE2_RT_CFG_INIT, AIE2_RT_CFG_CLK_GATING, + AIE2_RT_CFG_FORCE_PREEMPT, + AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT, }; struct rt_config { u32 type; u32 value; u32 category; + unsigned long feature_mask; }; struct dpm_clk_freq { @@ -164,6 +167,8 @@ struct aie2_exec_msg_ops { void (*init_chain_req)(void *req, u64 slot_addr, size_t size, u32 cmd_cnt); int (*fill_cf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); int (*fill_dpu_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); + int (*fill_preempt_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); + int (*fill_elf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); u32 (*get_chain_msg_op)(u32 cmd_op); }; @@ -197,6 +202,8 @@ struct amdxdna_dev_hdl { u32 hclk_freq; u32 max_tops; u32 curr_tops; + u32 force_preempt_enabled; + u32 frame_boundary_preempt; /* Mailbox and the management channel */ struct mailbox *mbox; @@ -223,6 +230,7 @@ struct aie2_hw_ops { enum aie2_fw_feature { AIE2_NPU_COMMAND, + AIE2_PREEMPT, AIE2_FEATURE_MAX }; diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c index 11c0e9e7b03a..bd94ee96c2bc 100644 --- a/drivers/accel/amdxdna/aie2_smu.c +++ b/drivers/accel/amdxdna/aie2_smu.c @@ -147,6 +147,16 @@ int aie2_smu_init(struct amdxdna_dev_hdl *ndev) { int ret; + /* + * Failing to set power off indicates an unrecoverable hardware or + * firmware error. + */ + ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0, NULL); + if (ret) { + XDNA_ERR(ndev->xdna, "Access power failed, ret %d", ret); + return ret; + } + ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0, NULL); if (ret) { XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret); diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c index 878cc955f56d..d17aef89a0ad 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.c +++ b/drivers/accel/amdxdna/amdxdna_ctx.c @@ -422,6 +422,7 @@ void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job) trace_amdxdna_debug_point(job->hwctx->name, job->seq, "job release"); amdxdna_arg_bos_put(job); amdxdna_gem_put_obj(job->cmd_bo); + dma_fence_put(job->fence); } int amdxdna_cmd_submit(struct amdxdna_client *client, diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h index d02fb32499fa..b6151244d64f 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.h +++ b/drivers/accel/amdxdna/amdxdna_ctx.h @@ -16,6 +16,8 @@ enum ert_cmd_opcode { ERT_START_CU = 0, ERT_CMD_CHAIN = 19, ERT_START_NPU = 20, + ERT_START_NPU_PREEMPT = 21, + ERT_START_NPU_PREEMPT_ELF = 22, ERT_INVALID_CMD = ~0U, }; @@ -55,6 +57,21 @@ struct amdxdna_cmd_chain { u64 data[] __counted_by(command_count); }; +/* + * Interpretation of the beginning of data payload for ERT_START_NPU_PREEMPT in + * amdxdna_cmd. The rest of the payload in amdxdna_cmd is regular kernel args. + */ +struct amdxdna_cmd_preempt_data { + u64 inst_buf; /* instruction buffer address */ + u64 save_buf; /* save buffer address */ + u64 restore_buf; /* restore buffer address */ + u32 inst_size; /* size of instruction buffer in bytes */ + u32 save_size; /* size of save buffer in bytes */ + u32 restore_size; /* size of restore buffer in bytes */ + u32 inst_prop_cnt; /* properties count */ + u32 prop_args[]; /* properties and regular kernel arguments */ +}; + /* Exec buffer command header format */ #define AMDXDNA_CMD_STATE GENMASK(3, 0) #define AMDXDNA_CMD_EXTRA_CU_MASK GENMASK(11, 10) diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c index 24258dcc18eb..858df97cd3fb 100644 --- a/drivers/accel/amdxdna/amdxdna_mailbox.c +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c @@ -516,6 +516,7 @@ xdna_mailbox_create_channel(struct mailbox *mb, } mb_chann->bad_state = false; + mailbox_reg_write(mb_chann, mb_chann->iohub_int_addr, 0); MB_DBG(mb_chann, "Mailbox channel created (irq: %d)", mb_chann->msix_irq); return mb_chann; diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c index 7590265d4485..1973ab67721b 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c @@ -31,9 +31,10 @@ MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin"); * 0.3: Support firmware debug buffer * 0.4: Support getting resource information * 0.5: Support getting telemetry data + * 0.6: Support preemption */ #define AMDXDNA_DRIVER_MAJOR 0 -#define AMDXDNA_DRIVER_MINOR 5 +#define AMDXDNA_DRIVER_MINOR 6 /* * Bind the driver base on (vendor_id, device_id) pair and later use the diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c index d90777275a9f..986a5f28ba24 100644 --- a/drivers/accel/amdxdna/npu4_regs.c +++ b/drivers/accel/amdxdna/npu4_regs.c @@ -64,10 +64,13 @@ const struct rt_config npu4_default_rt_cfg[] = { { 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */ { 10, 1, AIE2_RT_CFG_INIT }, /* DEBUG BUF */ + { 14, 0, AIE2_RT_CFG_INIT, BIT_U64(AIE2_PREEMPT) }, /* Frame boundary preemption */ { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ { 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ { 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ { 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ + { 13, 0, AIE2_RT_CFG_FORCE_PREEMPT }, + { 14, 0, AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT }, { 0 }, }; @@ -85,6 +88,7 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = { const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = { { .feature = AIE2_NPU_COMMAND, .min_minor = 15 }, + { .feature = AIE2_PREEMPT, .min_minor = 12 }, { 0 } }; diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index b305effcf003..3d6fccdefdd6 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -455,6 +455,9 @@ int ivpu_shutdown(struct ivpu_device *vdev) static const struct file_operations ivpu_fops = { .owner = THIS_MODULE, DRM_ACCEL_FOPS, +#ifdef CONFIG_PROC_FS + .show_fdinfo = drm_show_fdinfo, +#endif }; static const struct drm_driver driver = { @@ -469,6 +472,9 @@ static const struct drm_driver driver = { .ioctls = ivpu_drm_ioctls, .num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls), .fops = &ivpu_fops, +#ifdef CONFIG_PROC_FS + .show_fdinfo = drm_show_memory_stats, +#endif .name = DRIVER_NAME, .desc = DRIVER_DESC, diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index 74b12c7e6caf..ece68f570b7e 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -333,6 +333,17 @@ static void ivpu_gem_bo_free(struct drm_gem_object *obj) drm_gem_shmem_free(&bo->base); } +static enum drm_gem_object_status ivpu_gem_status(struct drm_gem_object *obj) +{ + struct ivpu_bo *bo = to_ivpu_bo(obj); + enum drm_gem_object_status status = 0; + + if (ivpu_bo_is_resident(bo)) + status |= DRM_GEM_OBJECT_RESIDENT; + + return status; +} + static const struct drm_gem_object_funcs ivpu_gem_funcs = { .free = ivpu_gem_bo_free, .open = ivpu_gem_bo_open, @@ -343,6 +354,7 @@ static const struct drm_gem_object_funcs ivpu_gem_funcs = { .vmap = drm_gem_shmem_object_vmap, .vunmap = drm_gem_shmem_object_vunmap, .mmap = drm_gem_shmem_object_mmap, + .status = ivpu_gem_status, .vm_ops = &drm_gem_shmem_vm_ops, }; diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h index 2dcd7eba9cb7..0c3350f22b55 100644 --- a/drivers/accel/ivpu/ivpu_gem.h +++ b/drivers/accel/ivpu/ivpu_gem.h @@ -82,6 +82,11 @@ static inline bool ivpu_bo_is_read_only(struct ivpu_bo *bo) return bo->flags & DRM_IVPU_BO_READ_ONLY; } +static inline bool ivpu_bo_is_resident(struct ivpu_bo *bo) +{ + return !!bo->base.pages; +} + static inline void *ivpu_to_cpu_addr(struct ivpu_bo *bo, u32 vpu_addr) { if (vpu_addr < bo->vpu_addr) diff --git a/drivers/accel/ivpu/ivpu_sysfs.c b/drivers/accel/ivpu/ivpu_sysfs.c index 268ab7744a8b..d250a10caca9 100644 --- a/drivers/accel/ivpu/ivpu_sysfs.c +++ b/drivers/accel/ivpu/ivpu_sysfs.c @@ -63,7 +63,8 @@ npu_memory_utilization_show(struct device *dev, struct device_attribute *attr, c mutex_lock(&vdev->bo_list_lock); list_for_each_entry(bo, &vdev->bo_list, bo_list_node) - total_npu_memory += bo->base.base.size; + if (ivpu_bo_is_resident(bo)) + total_npu_memory += ivpu_bo_size(bo); mutex_unlock(&vdev->bo_list_lock); return sysfs_emit(buf, "%lld\n", total_npu_memory); diff --git a/drivers/accel/qaic/Kconfig b/drivers/accel/qaic/Kconfig index 5e405a19c157..116e42d152ca 100644 --- a/drivers/accel/qaic/Kconfig +++ b/drivers/accel/qaic/Kconfig @@ -9,6 +9,7 @@ config DRM_ACCEL_QAIC depends on PCI && HAS_IOMEM depends on MHI_BUS select CRC32 + select WANT_DEV_COREDUMP help Enables driver for Qualcomm's Cloud AI accelerator PCIe cards that are designed to accelerate Deep Learning inference workloads. diff --git a/drivers/accel/qaic/Makefile b/drivers/accel/qaic/Makefile index 1106b876f737..71f727b74da3 100644 --- a/drivers/accel/qaic/Makefile +++ b/drivers/accel/qaic/Makefile @@ -11,6 +11,8 @@ qaic-y := \ qaic_data.o \ qaic_drv.o \ qaic_ras.o \ + qaic_ssr.o \ + qaic_sysfs.o \ qaic_timesync.o \ sahara.o diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h index 820d133236dd..fa7a8155658c 100644 --- a/drivers/accel/qaic/qaic.h +++ b/drivers/accel/qaic/qaic.h @@ -21,6 +21,7 @@ #define QAIC_DBC_BASE SZ_128K #define QAIC_DBC_SIZE SZ_4K +#define QAIC_SSR_DBC_SENTINEL U32_MAX /* No ongoing SSR sentinel */ #define QAIC_NO_PARTITION -1 @@ -47,6 +48,22 @@ enum __packed dev_states { QAIC_ONLINE, }; +enum dbc_states { + /* DBC is free and can be activated */ + DBC_STATE_IDLE, + /* DBC is activated and a workload is running on device */ + DBC_STATE_ASSIGNED, + /* Sub-system associated with this workload has crashed and it will shutdown soon */ + DBC_STATE_BEFORE_SHUTDOWN, + /* Sub-system associated with this workload has crashed and it has shutdown */ + DBC_STATE_AFTER_SHUTDOWN, + /* Sub-system associated with this workload is shutdown and it will be powered up soon */ + DBC_STATE_BEFORE_POWER_UP, + /* Sub-system associated with this workload is now powered up */ + DBC_STATE_AFTER_POWER_UP, + DBC_STATE_MAX, +}; + extern bool datapath_polling; struct qaic_user { @@ -114,6 +131,8 @@ struct dma_bridge_chan { unsigned int irq; /* Polling work item to simulate interrupts */ struct work_struct poll_work; + /* Represents various states of this DBC from enum dbc_states */ + unsigned int state; }; struct qaic_device { @@ -161,6 +180,8 @@ struct qaic_device { struct mhi_device *qts_ch; /* Work queue for tasks related to MHI "QAIC_TIMESYNC" channel */ struct workqueue_struct *qts_wq; + /* MHI "QAIC_TIMESYNC_PERIODIC" channel device */ + struct mhi_device *mqts_ch; /* Head of list of page allocated by MHI bootlog device */ struct list_head bootlog; /* MHI bootlog channel device */ @@ -177,6 +198,14 @@ struct qaic_device { unsigned int ue_count; /* Un-correctable non-fatal error count */ unsigned int ue_nf_count; + /* MHI SSR channel device */ + struct mhi_device *ssr_ch; + /* Work queue for tasks related to MHI SSR device */ + struct workqueue_struct *ssr_wq; + /* Buffer to collect SSR crashdump via SSR MHI channel */ + void *ssr_mhi_buf; + /* DBC which is under SSR. Sentinel U32_MAX would mean that no SSR in progress */ + u32 ssr_dbc; }; struct qaic_drm_device { @@ -195,6 +224,8 @@ struct qaic_drm_device { struct list_head users; /* Synchronizes access to users list */ struct mutex users_mutex; + /* Pointer to array of DBC sysfs attributes */ + void *sysfs_attrs; }; struct qaic_bo { @@ -317,6 +348,13 @@ int qaic_partial_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int qaic_detach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -void irq_polling_work(struct work_struct *work); +void qaic_irq_polling_work(struct work_struct *work); +void qaic_dbc_enter_ssr(struct qaic_device *qdev, u32 dbc_id); +void qaic_dbc_exit_ssr(struct qaic_device *qdev); + +/* qaic_sysfs.c */ +int qaic_sysfs_init(struct qaic_drm_device *qddev); +void qaic_sysfs_remove(struct qaic_drm_device *qddev); +void set_dbc_state(struct qaic_device *qdev, u32 dbc_id, unsigned int state); #endif /* _QAIC_H_ */ diff --git a/drivers/accel/qaic/qaic_control.c b/drivers/accel/qaic/qaic_control.c index 49b6e75ef82a..428d8f65bff3 100644 --- a/drivers/accel/qaic/qaic_control.c +++ b/drivers/accel/qaic/qaic_control.c @@ -310,6 +310,7 @@ static void save_dbc_buf(struct qaic_device *qdev, struct ioctl_resources *resou enable_dbc(qdev, dbc_id, usr); qdev->dbc[dbc_id].in_use = true; resources->buf = NULL; + set_dbc_state(qdev, dbc_id, DBC_STATE_ASSIGNED); } } @@ -923,6 +924,7 @@ static int decode_deactivate(struct qaic_device *qdev, void *trans, u32 *msg_len } release_dbc(qdev, dbc_id); + set_dbc_state(qdev, dbc_id, DBC_STATE_IDLE); *msg_len += sizeof(*in_trans); return 0; diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c index fa723a2bdfa9..60cb4d65d48e 100644 --- a/drivers/accel/qaic/qaic_data.c +++ b/drivers/accel/qaic/qaic_data.c @@ -1047,6 +1047,11 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi goto unlock_ch_srcu; } + if (dbc->id == qdev->ssr_dbc) { + ret = -EPIPE; + goto unlock_ch_srcu; + } + ret = qaic_prepare_bo(qdev, bo, &args->hdr); if (ret) goto unlock_ch_srcu; @@ -1370,6 +1375,11 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr goto release_ch_rcu; } + if (dbc->id == qdev->ssr_dbc) { + ret = -EPIPE; + goto release_ch_rcu; + } + ret = mutex_lock_interruptible(&dbc->req_lock); if (ret) goto release_ch_rcu; @@ -1504,7 +1514,7 @@ irqreturn_t dbc_irq_handler(int irq, void *data) return IRQ_WAKE_THREAD; } -void irq_polling_work(struct work_struct *work) +void qaic_irq_polling_work(struct work_struct *work) { struct dma_bridge_chan *dbc = container_of(work, struct dma_bridge_chan, poll_work); unsigned long flags; @@ -1722,6 +1732,11 @@ int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file goto unlock_ch_srcu; } + if (dbc->id == qdev->ssr_dbc) { + ret = -EPIPE; + goto unlock_ch_srcu; + } + obj = drm_gem_object_lookup(file_priv, args->handle); if (!obj) { ret = -ENOENT; @@ -1742,6 +1757,9 @@ int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file if (!dbc->usr) ret = -EPERM; + if (dbc->id == qdev->ssr_dbc) + ret = -EPIPE; + put_obj: drm_gem_object_put(obj); unlock_ch_srcu: @@ -1945,6 +1963,17 @@ static void empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *db spin_unlock_irqrestore(&dbc->xfer_lock, flags); } +static void sync_empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *dbc) +{ + empty_xfer_list(qdev, dbc); + synchronize_srcu(&dbc->ch_lock); + /* + * Threads holding channel lock, may add more elements in the xfer_list. + * Flush out these elements from xfer_list. + */ + empty_xfer_list(qdev, dbc); +} + int disable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr) { if (!qdev->dbc[dbc_id].usr || qdev->dbc[dbc_id].usr->handle != usr->handle) @@ -1973,13 +2002,7 @@ void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id) struct dma_bridge_chan *dbc = &qdev->dbc[dbc_id]; dbc->usr = NULL; - empty_xfer_list(qdev, dbc); - synchronize_srcu(&dbc->ch_lock); - /* - * Threads holding channel lock, may add more elements in the xfer_list. - * Flush out these elements from xfer_list. - */ - empty_xfer_list(qdev, dbc); + sync_empty_xfer_list(qdev, dbc); } void release_dbc(struct qaic_device *qdev, u32 dbc_id) @@ -2020,3 +2043,30 @@ void qaic_data_get_fifo_info(struct dma_bridge_chan *dbc, u32 *head, u32 *tail) *head = readl(dbc->dbc_base + REQHP_OFF); *tail = readl(dbc->dbc_base + REQTP_OFF); } + +/* + * qaic_dbc_enter_ssr - Prepare to enter in sub system reset(SSR) for given DBC ID. + * @qdev: qaic device handle + * @dbc_id: ID of the DBC which will enter SSR + * + * The device will automatically deactivate the workload as not + * all errors can be silently recovered. The user will be + * notified and will need to decide the required recovery + * action to take. + */ +void qaic_dbc_enter_ssr(struct qaic_device *qdev, u32 dbc_id) +{ + qdev->ssr_dbc = dbc_id; + release_dbc(qdev, dbc_id); +} + +/* + * qaic_dbc_exit_ssr - Prepare to exit from sub system reset(SSR) for given DBC ID. + * @qdev: qaic device handle + * + * The DBC returns to an operational state and begins accepting work after exiting SSR. + */ +void qaic_dbc_exit_ssr(struct qaic_device *qdev) +{ + qdev->ssr_dbc = QAIC_SSR_DBC_SENTINEL; +} diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c index e162f4b8a262..4c70bd949d53 100644 --- a/drivers/accel/qaic/qaic_drv.c +++ b/drivers/accel/qaic/qaic_drv.c @@ -30,6 +30,7 @@ #include "qaic.h" #include "qaic_debugfs.h" #include "qaic_ras.h" +#include "qaic_ssr.h" #include "qaic_timesync.h" #include "sahara.h" @@ -270,6 +271,13 @@ static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id) return ret; } + ret = qaic_sysfs_init(qddev); + if (ret) { + drm_dev_unregister(drm); + pci_dbg(qdev->pdev, "qaic_sysfs_init failed %d\n", ret); + return ret; + } + qaic_debugfs_init(qddev); return ret; @@ -281,6 +289,7 @@ static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id) struct drm_device *drm = to_drm(qddev); struct qaic_user *usr; + qaic_sysfs_remove(qddev); drm_dev_unregister(drm); qddev->partition_id = 0; /* @@ -382,6 +391,7 @@ void qaic_dev_reset_clean_local_state(struct qaic_device *qdev) qaic_notify_reset(qdev); /* start tearing things down */ + qaic_clean_up_ssr(qdev); for (i = 0; i < qdev->num_dbc; ++i) release_dbc(qdev, i); } @@ -431,11 +441,18 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, qdev->qts_wq = qaicm_wq_init(drm, "qaic_ts"); if (IS_ERR(qdev->qts_wq)) return NULL; + qdev->ssr_wq = qaicm_wq_init(drm, "qaic_ssr"); + if (IS_ERR(qdev->ssr_wq)) + return NULL; ret = qaicm_srcu_init(drm, &qdev->dev_lock); if (ret) return NULL; + ret = qaic_ssr_init(qdev, drm); + if (ret) + pci_info(pdev, "QAIC SSR crashdump collection not supported.\n"); + qdev->qddev = qddev; qdev->pdev = pdev; qddev->qdev = qdev; @@ -545,7 +562,7 @@ static int init_msi(struct qaic_device *qdev, struct pci_dev *pdev) qdev->dbc[i].irq = pci_irq_vector(pdev, qdev->single_msi ? 0 : i + 1); if (!qdev->single_msi) disable_irq_nosync(qdev->dbc[i].irq); - INIT_WORK(&qdev->dbc[i].poll_work, irq_polling_work); + INIT_WORK(&qdev->dbc[i].poll_work, qaic_irq_polling_work); } } @@ -660,6 +677,92 @@ static const struct pci_error_handlers qaic_pci_err_handler = { .reset_done = qaic_pci_reset_done, }; +static bool qaic_is_under_reset(struct qaic_device *qdev) +{ + int rcu_id; + bool ret; + + rcu_id = srcu_read_lock(&qdev->dev_lock); + ret = qdev->dev_state != QAIC_ONLINE; + srcu_read_unlock(&qdev->dev_lock, rcu_id); + return ret; +} + +static bool qaic_data_path_busy(struct qaic_device *qdev) +{ + bool ret = false; + int dev_rcu_id; + int i; + + dev_rcu_id = srcu_read_lock(&qdev->dev_lock); + if (qdev->dev_state != QAIC_ONLINE) { + srcu_read_unlock(&qdev->dev_lock, dev_rcu_id); + return false; + } + for (i = 0; i < qdev->num_dbc; i++) { + struct dma_bridge_chan *dbc = &qdev->dbc[i]; + unsigned long flags; + int ch_rcu_id; + + ch_rcu_id = srcu_read_lock(&dbc->ch_lock); + if (!dbc->usr || !dbc->in_use) { + srcu_read_unlock(&dbc->ch_lock, ch_rcu_id); + continue; + } + spin_lock_irqsave(&dbc->xfer_lock, flags); + ret = !list_empty(&dbc->xfer_list); + spin_unlock_irqrestore(&dbc->xfer_lock, flags); + srcu_read_unlock(&dbc->ch_lock, ch_rcu_id); + if (ret) + break; + } + srcu_read_unlock(&qdev->dev_lock, dev_rcu_id); + return ret; +} + +static int qaic_pm_suspend(struct device *dev) +{ + struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev)); + + dev_dbg(dev, "Suspending..\n"); + if (qaic_data_path_busy(qdev)) { + dev_dbg(dev, "Device's datapath is busy. Aborting suspend..\n"); + return -EBUSY; + } + if (qaic_is_under_reset(qdev)) { + dev_dbg(dev, "Device is under reset. Aborting suspend..\n"); + return -EBUSY; + } + qaic_mqts_ch_stop_timer(qdev->mqts_ch); + qaic_pci_reset_prepare(qdev->pdev); + pci_save_state(qdev->pdev); + pci_disable_device(qdev->pdev); + pci_set_power_state(qdev->pdev, PCI_D3hot); + return 0; +} + +static int qaic_pm_resume(struct device *dev) +{ + struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev)); + int ret; + + dev_dbg(dev, "Resuming..\n"); + pci_set_power_state(qdev->pdev, PCI_D0); + pci_restore_state(qdev->pdev); + ret = pci_enable_device(qdev->pdev); + if (ret) { + dev_err(dev, "pci_enable_device failed on resume %d\n", ret); + return ret; + } + pci_set_master(qdev->pdev); + qaic_pci_reset_done(qdev->pdev); + return 0; +} + +static const struct dev_pm_ops qaic_pm_ops = { + SYSTEM_SLEEP_PM_OPS(qaic_pm_suspend, qaic_pm_resume) +}; + static struct pci_driver qaic_pci_driver = { .name = QAIC_NAME, .id_table = qaic_ids, @@ -667,6 +770,9 @@ static struct pci_driver qaic_pci_driver = { .remove = qaic_pci_remove, .shutdown = qaic_pci_shutdown, .err_handler = &qaic_pci_err_handler, + .driver = { + .pm = pm_sleep_ptr(&qaic_pm_ops), + }, }; static int __init qaic_init(void) @@ -702,9 +808,16 @@ static int __init qaic_init(void) ret = qaic_ras_register(); if (ret) pr_debug("qaic: qaic_ras_register failed %d\n", ret); + ret = qaic_ssr_register(); + if (ret) { + pr_debug("qaic: qaic_ssr_register failed %d\n", ret); + goto free_bootlog; + } return 0; +free_bootlog: + qaic_bootlog_unregister(); free_mhi: mhi_driver_unregister(&qaic_mhi_driver); free_pci: @@ -730,6 +843,7 @@ static void __exit qaic_exit(void) * reinitializing the link_up state after the cleanup is done. */ link_up = true; + qaic_ssr_unregister(); qaic_ras_unregister(); qaic_bootlog_unregister(); qaic_timesync_deinit(); diff --git a/drivers/accel/qaic/qaic_ssr.c b/drivers/accel/qaic/qaic_ssr.c new file mode 100644 index 000000000000..9b662d690371 --- /dev/null +++ b/drivers/accel/qaic/qaic_ssr.c @@ -0,0 +1,815 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. */ +/* Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ + +#include <asm/byteorder.h> +#include <drm/drm_file.h> +#include <drm/drm_managed.h> +#include <linux/devcoredump.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/mhi.h> +#include <linux/workqueue.h> + +#include "qaic.h" +#include "qaic_ssr.h" + +#define SSR_RESP_MSG_SZ 32 +#define SSR_MHI_BUF_SIZE SZ_64K +#define SSR_MEM_READ_DATA_SIZE ((u64)SSR_MHI_BUF_SIZE - sizeof(struct ssr_crashdump)) +#define SSR_MEM_READ_CHUNK_SIZE ((u64)SSR_MEM_READ_DATA_SIZE - sizeof(struct ssr_memory_read_rsp)) + +#define DEBUG_TRANSFER_INFO BIT(0) +#define DEBUG_TRANSFER_INFO_RSP BIT(1) +#define MEMORY_READ BIT(2) +#define MEMORY_READ_RSP BIT(3) +#define DEBUG_TRANSFER_DONE BIT(4) +#define DEBUG_TRANSFER_DONE_RSP BIT(5) +#define SSR_EVENT BIT(8) +#define SSR_EVENT_RSP BIT(9) + +#define SSR_EVENT_NACK BIT(0) +#define BEFORE_SHUTDOWN BIT(1) +#define AFTER_SHUTDOWN BIT(2) +#define BEFORE_POWER_UP BIT(3) +#define AFTER_POWER_UP BIT(4) + +struct debug_info_table { + /* Save preferences. Default is mandatory */ + u64 save_perf; + /* Base address of the debug region */ + u64 mem_base; + /* Size of debug region in bytes */ + u64 len; + /* Description */ + char desc[20]; + /* Filename of debug region */ + char filename[20]; +}; + +struct _ssr_hdr { + __le32 cmd; + __le32 len; + __le32 dbc_id; +}; + +struct ssr_hdr { + u32 cmd; + u32 len; + u32 dbc_id; +}; + +struct ssr_debug_transfer_info { + struct ssr_hdr hdr; + u32 resv; + u64 tbl_addr; + u64 tbl_len; +} __packed; + +struct ssr_debug_transfer_info_rsp { + struct _ssr_hdr hdr; + __le32 ret; +} __packed; + +struct ssr_memory_read { + struct _ssr_hdr hdr; + __le32 resv; + __le64 addr; + __le64 len; +} __packed; + +struct ssr_memory_read_rsp { + struct _ssr_hdr hdr; + __le32 resv; + u8 data[]; +} __packed; + +struct ssr_debug_transfer_done { + struct _ssr_hdr hdr; + __le32 resv; +} __packed; + +struct ssr_debug_transfer_done_rsp { + struct _ssr_hdr hdr; + __le32 ret; +} __packed; + +struct ssr_event { + struct ssr_hdr hdr; + u32 event; +} __packed; + +struct ssr_event_rsp { + struct _ssr_hdr hdr; + __le32 event; +} __packed; + +struct ssr_resp { + /* Work struct to schedule work coming on QAIC_SSR channel */ + struct work_struct work; + /* Root struct of device, used to access device resources */ + struct qaic_device *qdev; + /* Buffer used by MHI for transfer requests */ + u8 data[] __aligned(8); +}; + +/* SSR crashdump book keeping structure */ +struct ssr_dump_info { + /* DBC associated with this SSR crashdump */ + struct dma_bridge_chan *dbc; + /* + * It will be used when we complete the crashdump download and switch + * to waiting on SSR events + */ + struct ssr_resp *resp; + /* MEMORY READ request MHI buffer.*/ + struct ssr_memory_read *read_buf_req; + /* TRUE: ->read_buf_req is queued for MHI transaction. FALSE: Otherwise */ + bool read_buf_req_queued; + /* Address of table in host */ + void *tbl_addr; + /* Total size of table */ + u64 tbl_len; + /* Offset of table(->tbl_addr) where the new chunk will be dumped */ + u64 tbl_off; + /* Address of table in device/target */ + u64 tbl_addr_dev; + /* Ptr to the entire dump */ + void *dump_addr; + /* Entire crashdump size */ + u64 dump_sz; + /* Offset of crashdump(->dump_addr) where the new chunk will be dumped */ + u64 dump_off; + /* Points to the table entry we are currently downloading */ + struct debug_info_table *tbl_ent; + /* Offset in the current table entry(->tbl_ent) for next chuck */ + u64 tbl_ent_off; +}; + +struct ssr_crashdump { + /* + * Points to a book keeping struct maintained by MHI SSR device while + * downloading a SSR crashdump. It is NULL when crashdump downloading + * not in progress. + */ + struct ssr_dump_info *dump_info; + /* Work struct to schedule work coming on QAIC_SSR channel */ + struct work_struct work; + /* Root struct of device, used to access device resources */ + struct qaic_device *qdev; + /* Buffer used by MHI for transfer requests */ + u8 data[]; +}; + +#define QAIC_SSR_DUMP_V1_MAGIC 0x1234567890abcdef +#define QAIC_SSR_DUMP_V1_VER 1 +struct dump_file_meta { + u64 magic; + u64 version; + u64 size; /* Total size of the entire dump */ + u64 tbl_len; /* Length of the table in byte */ +}; + +/* + * Layout of crashdump + * +------------------------------------------+ + * | Crashdump Meta structure | + * | type: struct dump_file_meta | + * +------------------------------------------+ + * | Crashdump Table | + * | type: array of struct debug_info_table | + * | | + * | | + * | | + * +------------------------------------------+ + * | Crashdump | + * | | + * | | + * | | + * | | + * | | + * +------------------------------------------+ + */ + +static void free_ssr_dump_info(struct ssr_crashdump *ssr_crash) +{ + struct ssr_dump_info *dump_info = ssr_crash->dump_info; + + ssr_crash->dump_info = NULL; + if (!dump_info) + return; + if (!dump_info->read_buf_req_queued) + kfree(dump_info->read_buf_req); + vfree(dump_info->tbl_addr); + vfree(dump_info->dump_addr); + kfree(dump_info); +} + +void qaic_clean_up_ssr(struct qaic_device *qdev) +{ + struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf; + + if (!ssr_crash) + return; + + qaic_dbc_exit_ssr(qdev); + free_ssr_dump_info(ssr_crash); +} + +static int alloc_dump(struct ssr_dump_info *dump_info) +{ + struct debug_info_table *tbl_ent = dump_info->tbl_addr; + struct dump_file_meta *dump_meta; + u64 tbl_sz_lp = 0; + u64 dump_size = 0; + + while (tbl_sz_lp < dump_info->tbl_len) { + le64_to_cpus(&tbl_ent->save_perf); + le64_to_cpus(&tbl_ent->mem_base); + le64_to_cpus(&tbl_ent->len); + + if (tbl_ent->len == 0) + return -EINVAL; + + dump_size += tbl_ent->len; + tbl_ent++; + tbl_sz_lp += sizeof(*tbl_ent); + } + + dump_info->dump_sz = dump_size + dump_info->tbl_len + sizeof(*dump_meta); + dump_info->dump_addr = vzalloc(dump_info->dump_sz); + if (!dump_info->dump_addr) + return -ENOMEM; + + /* Copy crashdump meta and table */ + dump_meta = dump_info->dump_addr; + dump_meta->magic = QAIC_SSR_DUMP_V1_MAGIC; + dump_meta->version = QAIC_SSR_DUMP_V1_VER; + dump_meta->size = dump_info->dump_sz; + dump_meta->tbl_len = dump_info->tbl_len; + memcpy(dump_info->dump_addr + sizeof(*dump_meta), dump_info->tbl_addr, dump_info->tbl_len); + /* Offset by crashdump meta and table (copied above) */ + dump_info->dump_off = dump_info->tbl_len + sizeof(*dump_meta); + + return 0; +} + +static int send_xfer_done(struct qaic_device *qdev, void *resp, u32 dbc_id) +{ + struct ssr_debug_transfer_done *xfer_done; + int ret; + + xfer_done = kmalloc(sizeof(*xfer_done), GFP_KERNEL); + if (!xfer_done) { + ret = -ENOMEM; + goto out; + } + + ret = mhi_queue_buf(qdev->ssr_ch, DMA_FROM_DEVICE, resp, SSR_RESP_MSG_SZ, MHI_EOT); + if (ret) + goto free_xfer_done; + + xfer_done->hdr.cmd = cpu_to_le32(DEBUG_TRANSFER_DONE); + xfer_done->hdr.len = cpu_to_le32(sizeof(*xfer_done)); + xfer_done->hdr.dbc_id = cpu_to_le32(dbc_id); + + ret = mhi_queue_buf(qdev->ssr_ch, DMA_TO_DEVICE, xfer_done, sizeof(*xfer_done), MHI_EOT); + if (ret) + goto free_xfer_done; + + return 0; + +free_xfer_done: + kfree(xfer_done); +out: + return ret; +} + +static int mem_read_req(struct qaic_device *qdev, u64 dest_addr, u64 dest_len) +{ + struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf; + struct ssr_memory_read *read_buf_req; + struct ssr_dump_info *dump_info; + int ret; + + dump_info = ssr_crash->dump_info; + ret = mhi_queue_buf(qdev->ssr_ch, DMA_FROM_DEVICE, ssr_crash->data, SSR_MEM_READ_DATA_SIZE, + MHI_EOT); + if (ret) + goto out; + + read_buf_req = dump_info->read_buf_req; + read_buf_req->hdr.cmd = cpu_to_le32(MEMORY_READ); + read_buf_req->hdr.len = cpu_to_le32(sizeof(*read_buf_req)); + read_buf_req->hdr.dbc_id = cpu_to_le32(qdev->ssr_dbc); + read_buf_req->addr = cpu_to_le64(dest_addr); + read_buf_req->len = cpu_to_le64(dest_len); + + ret = mhi_queue_buf(qdev->ssr_ch, DMA_TO_DEVICE, read_buf_req, sizeof(*read_buf_req), + MHI_EOT); + if (!ret) + dump_info->read_buf_req_queued = true; + +out: + return ret; +} + +static int ssr_copy_table(struct ssr_dump_info *dump_info, void *data, u64 len) +{ + if (len > dump_info->tbl_len - dump_info->tbl_off) + return -EINVAL; + + memcpy(dump_info->tbl_addr + dump_info->tbl_off, data, len); + dump_info->tbl_off += len; + + /* Entire table has been downloaded, alloc dump memory */ + if (dump_info->tbl_off == dump_info->tbl_len) { + dump_info->tbl_ent = dump_info->tbl_addr; + return alloc_dump(dump_info); + } + + return 0; +} + +static int ssr_copy_dump(struct ssr_dump_info *dump_info, void *data, u64 len) +{ + struct debug_info_table *tbl_ent; + + tbl_ent = dump_info->tbl_ent; + + if (len > tbl_ent->len - dump_info->tbl_ent_off) + return -EINVAL; + + memcpy(dump_info->dump_addr + dump_info->dump_off, data, len); + dump_info->dump_off += len; + dump_info->tbl_ent_off += len; + + /* + * Current segment (a entry in table) of the crashdump is complete, + * move to next one + */ + if (tbl_ent->len == dump_info->tbl_ent_off) { + dump_info->tbl_ent++; + dump_info->tbl_ent_off = 0; + } + + return 0; +} + +static void ssr_dump_worker(struct work_struct *work) +{ + struct ssr_crashdump *ssr_crash = container_of(work, struct ssr_crashdump, work); + struct qaic_device *qdev = ssr_crash->qdev; + struct ssr_memory_read_rsp *mem_rd_resp; + struct debug_info_table *tbl_ent; + struct ssr_dump_info *dump_info; + u64 dest_addr, dest_len; + struct _ssr_hdr *_hdr; + struct ssr_hdr hdr; + u64 data_len; + int ret; + + mem_rd_resp = (struct ssr_memory_read_rsp *)ssr_crash->data; + _hdr = &mem_rd_resp->hdr; + hdr.cmd = le32_to_cpu(_hdr->cmd); + hdr.len = le32_to_cpu(_hdr->len); + hdr.dbc_id = le32_to_cpu(_hdr->dbc_id); + + if (hdr.dbc_id != qdev->ssr_dbc) + goto reset_device; + + dump_info = ssr_crash->dump_info; + if (!dump_info) + goto reset_device; + + if (hdr.cmd != MEMORY_READ_RSP) + goto free_dump_info; + + if (hdr.len > SSR_MEM_READ_DATA_SIZE) + goto free_dump_info; + + data_len = hdr.len - sizeof(*mem_rd_resp); + + if (dump_info->tbl_off < dump_info->tbl_len) /* Chunk belongs to table */ + ret = ssr_copy_table(dump_info, mem_rd_resp->data, data_len); + else /* Chunk belongs to crashdump */ + ret = ssr_copy_dump(dump_info, mem_rd_resp->data, data_len); + + if (ret) + goto free_dump_info; + + if (dump_info->tbl_off < dump_info->tbl_len) { + /* Continue downloading table */ + dest_addr = dump_info->tbl_addr_dev + dump_info->tbl_off; + dest_len = min(SSR_MEM_READ_CHUNK_SIZE, dump_info->tbl_len - dump_info->tbl_off); + ret = mem_read_req(qdev, dest_addr, dest_len); + } else if (dump_info->dump_off < dump_info->dump_sz) { + /* Continue downloading crashdump */ + tbl_ent = dump_info->tbl_ent; + dest_addr = tbl_ent->mem_base + dump_info->tbl_ent_off; + dest_len = min(SSR_MEM_READ_CHUNK_SIZE, tbl_ent->len - dump_info->tbl_ent_off); + ret = mem_read_req(qdev, dest_addr, dest_len); + } else { + /* Crashdump download complete */ + ret = send_xfer_done(qdev, dump_info->resp->data, hdr.dbc_id); + } + + /* Most likely a MHI xfer has failed */ + if (ret) + goto free_dump_info; + + return; + +free_dump_info: + /* Free the allocated memory */ + free_ssr_dump_info(ssr_crash); +reset_device: + /* + * After subsystem crashes in device crashdump collection begins but + * something went wrong while collecting crashdump, now instead of + * handling this error we just reset the device as the best effort has + * been made + */ + mhi_soc_reset(qdev->mhi_cntrl); +} + +static struct ssr_dump_info *alloc_dump_info(struct qaic_device *qdev, + struct ssr_debug_transfer_info *debug_info) +{ + struct ssr_dump_info *dump_info; + int ret; + + le64_to_cpus(&debug_info->tbl_len); + le64_to_cpus(&debug_info->tbl_addr); + + if (debug_info->tbl_len == 0 || + debug_info->tbl_len % sizeof(struct debug_info_table) != 0) { + ret = -EINVAL; + goto out; + } + + /* Allocate SSR crashdump book keeping structure */ + dump_info = kzalloc(sizeof(*dump_info), GFP_KERNEL); + if (!dump_info) { + ret = -ENOMEM; + goto out; + } + + /* Buffer used to send MEMORY READ request to device via MHI */ + dump_info->read_buf_req = kzalloc(sizeof(*dump_info->read_buf_req), GFP_KERNEL); + if (!dump_info->read_buf_req) { + ret = -ENOMEM; + goto free_dump_info; + } + + /* Crashdump meta table buffer */ + dump_info->tbl_addr = vzalloc(debug_info->tbl_len); + if (!dump_info->tbl_addr) { + ret = -ENOMEM; + goto free_read_buf_req; + } + + dump_info->tbl_addr_dev = debug_info->tbl_addr; + dump_info->tbl_len = debug_info->tbl_len; + + return dump_info; + +free_read_buf_req: + kfree(dump_info->read_buf_req); +free_dump_info: + kfree(dump_info); +out: + return ERR_PTR(ret); +} + +static int dbg_xfer_info_rsp(struct qaic_device *qdev, struct dma_bridge_chan *dbc, + struct ssr_debug_transfer_info *debug_info) +{ + struct ssr_debug_transfer_info_rsp *debug_rsp; + struct ssr_crashdump *ssr_crash = NULL; + int ret = 0, ret2; + + debug_rsp = kmalloc(sizeof(*debug_rsp), GFP_KERNEL); + if (!debug_rsp) + return -ENOMEM; + + if (!qdev->ssr_mhi_buf) { + ret = -ENOMEM; + goto send_rsp; + } + + if (dbc->state != DBC_STATE_BEFORE_POWER_UP) { + ret = -EINVAL; + goto send_rsp; + } + + ssr_crash = qdev->ssr_mhi_buf; + ssr_crash->dump_info = alloc_dump_info(qdev, debug_info); + if (IS_ERR(ssr_crash->dump_info)) { + ret = PTR_ERR(ssr_crash->dump_info); + ssr_crash->dump_info = NULL; + } + +send_rsp: + debug_rsp->hdr.cmd = cpu_to_le32(DEBUG_TRANSFER_INFO_RSP); + debug_rsp->hdr.len = cpu_to_le32(sizeof(*debug_rsp)); + debug_rsp->hdr.dbc_id = cpu_to_le32(dbc->id); + /* + * 0 = Return an ACK confirming the host is ready to download crashdump + * 1 = Return an NACK confirming the host is not ready to download crashdump + */ + debug_rsp->ret = cpu_to_le32(ret ? 1 : 0); + + ret2 = mhi_queue_buf(qdev->ssr_ch, DMA_TO_DEVICE, debug_rsp, sizeof(*debug_rsp), MHI_EOT); + if (ret2) { + free_ssr_dump_info(ssr_crash); + kfree(debug_rsp); + return ret2; + } + + return ret; +} + +static void dbg_xfer_done_rsp(struct qaic_device *qdev, struct dma_bridge_chan *dbc, + struct ssr_debug_transfer_done_rsp *xfer_rsp) +{ + struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf; + u32 status = le32_to_cpu(xfer_rsp->ret); + struct device *dev = &qdev->pdev->dev; + struct ssr_dump_info *dump_info; + + dump_info = ssr_crash->dump_info; + if (!dump_info) + return; + + if (status) { + free_ssr_dump_info(ssr_crash); + return; + } + + dev_coredumpv(dev, dump_info->dump_addr, dump_info->dump_sz, GFP_KERNEL); + /* dev_coredumpv will free dump_info->dump_addr */ + dump_info->dump_addr = NULL; + free_ssr_dump_info(ssr_crash); +} + +static void ssr_worker(struct work_struct *work) +{ + struct ssr_resp *resp = container_of(work, struct ssr_resp, work); + struct ssr_hdr *hdr = (struct ssr_hdr *)resp->data; + struct ssr_dump_info *dump_info = NULL; + struct qaic_device *qdev = resp->qdev; + struct ssr_crashdump *ssr_crash; + struct ssr_event_rsp *event_rsp; + struct dma_bridge_chan *dbc; + struct ssr_event *event; + u32 ssr_event_ack; + int ret; + + le32_to_cpus(&hdr->cmd); + le32_to_cpus(&hdr->len); + le32_to_cpus(&hdr->dbc_id); + + if (hdr->len > SSR_RESP_MSG_SZ) + goto out; + + if (hdr->dbc_id >= qdev->num_dbc) + goto out; + + dbc = &qdev->dbc[hdr->dbc_id]; + + switch (hdr->cmd) { + case DEBUG_TRANSFER_INFO: + ret = dbg_xfer_info_rsp(qdev, dbc, (struct ssr_debug_transfer_info *)resp->data); + if (ret) + break; + + ssr_crash = qdev->ssr_mhi_buf; + dump_info = ssr_crash->dump_info; + dump_info->dbc = dbc; + dump_info->resp = resp; + + /* Start by downloading debug table */ + ret = mem_read_req(qdev, dump_info->tbl_addr_dev, + min(dump_info->tbl_len, SSR_MEM_READ_CHUNK_SIZE)); + if (ret) { + free_ssr_dump_info(ssr_crash); + break; + } + + /* + * Till now everything went fine, which means that we will be + * collecting crashdump chunk by chunk. Do not queue a response + * buffer for SSR cmds till the crashdump is complete. + */ + return; + case SSR_EVENT: + event = (struct ssr_event *)hdr; + le32_to_cpus(&event->event); + ssr_event_ack = event->event; + ssr_crash = qdev->ssr_mhi_buf; + + switch (event->event) { + case BEFORE_SHUTDOWN: + set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_BEFORE_SHUTDOWN); + qaic_dbc_enter_ssr(qdev, hdr->dbc_id); + break; + case AFTER_SHUTDOWN: + set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_AFTER_SHUTDOWN); + break; + case BEFORE_POWER_UP: + set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_BEFORE_POWER_UP); + break; + case AFTER_POWER_UP: + /* + * If dump info is a non NULL value it means that we + * have received this SSR event while downloading a + * crashdump for this DBC is still in progress. NACK + * the SSR event + */ + if (ssr_crash && ssr_crash->dump_info) { + free_ssr_dump_info(ssr_crash); + ssr_event_ack = SSR_EVENT_NACK; + break; + } + + set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_AFTER_POWER_UP); + break; + default: + break; + } + + event_rsp = kmalloc(sizeof(*event_rsp), GFP_KERNEL); + if (!event_rsp) + break; + + event_rsp->hdr.cmd = cpu_to_le32(SSR_EVENT_RSP); + event_rsp->hdr.len = cpu_to_le32(sizeof(*event_rsp)); + event_rsp->hdr.dbc_id = cpu_to_le32(hdr->dbc_id); + event_rsp->event = cpu_to_le32(ssr_event_ack); + + ret = mhi_queue_buf(qdev->ssr_ch, DMA_TO_DEVICE, event_rsp, sizeof(*event_rsp), + MHI_EOT); + if (ret) + kfree(event_rsp); + + if (event->event == AFTER_POWER_UP && ssr_event_ack != SSR_EVENT_NACK) { + qaic_dbc_exit_ssr(qdev); + set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_IDLE); + } + + break; + case DEBUG_TRANSFER_DONE_RSP: + dbg_xfer_done_rsp(qdev, dbc, (struct ssr_debug_transfer_done_rsp *)hdr); + break; + default: + break; + } + +out: + ret = mhi_queue_buf(qdev->ssr_ch, DMA_FROM_DEVICE, resp->data, SSR_RESP_MSG_SZ, MHI_EOT); + if (ret) + kfree(resp); +} + +static int qaic_ssr_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) +{ + struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev)); + struct ssr_resp *resp; + int ret; + + ret = mhi_prepare_for_transfer(mhi_dev); + if (ret) + return ret; + + resp = kzalloc(sizeof(*resp) + SSR_RESP_MSG_SZ, GFP_KERNEL); + if (!resp) { + mhi_unprepare_from_transfer(mhi_dev); + return -ENOMEM; + } + + resp->qdev = qdev; + INIT_WORK(&resp->work, ssr_worker); + + ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, resp->data, SSR_RESP_MSG_SZ, MHI_EOT); + if (ret) { + kfree(resp); + mhi_unprepare_from_transfer(mhi_dev); + return ret; + } + + dev_set_drvdata(&mhi_dev->dev, qdev); + qdev->ssr_ch = mhi_dev; + + return 0; +} + +static void qaic_ssr_mhi_remove(struct mhi_device *mhi_dev) +{ + struct qaic_device *qdev; + + qdev = dev_get_drvdata(&mhi_dev->dev); + mhi_unprepare_from_transfer(qdev->ssr_ch); + qdev->ssr_ch = NULL; +} + +static void qaic_ssr_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) +{ + struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev); + struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf; + struct _ssr_hdr *hdr = mhi_result->buf_addr; + struct ssr_dump_info *dump_info; + + if (mhi_result->transaction_status) { + kfree(mhi_result->buf_addr); + return; + } + + /* + * MEMORY READ is used to download crashdump. And crashdump is + * downloaded chunk by chunk in a series of MEMORY READ SSR commands. + * Hence to avoid too many kmalloc() and kfree() of the same MEMORY READ + * request buffer, we allocate only one such buffer and free it only + * once. + */ + if (le32_to_cpu(hdr->cmd) == MEMORY_READ) { + dump_info = ssr_crash->dump_info; + if (dump_info) { + dump_info->read_buf_req_queued = false; + return; + } + } + + kfree(mhi_result->buf_addr); +} + +static void qaic_ssr_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) +{ + struct ssr_resp *resp = container_of(mhi_result->buf_addr, struct ssr_resp, data); + struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev); + struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf; + bool memory_read_rsp = false; + + if (ssr_crash && ssr_crash->data == mhi_result->buf_addr) + memory_read_rsp = true; + + if (mhi_result->transaction_status) { + /* Do not free SSR crashdump buffer as it allocated via managed APIs */ + if (!memory_read_rsp) + kfree(resp); + return; + } + + if (memory_read_rsp) + queue_work(qdev->ssr_wq, &ssr_crash->work); + else + queue_work(qdev->ssr_wq, &resp->work); +} + +static const struct mhi_device_id qaic_ssr_mhi_match_table[] = { + { .chan = "QAIC_SSR", }, + {}, +}; + +static struct mhi_driver qaic_ssr_mhi_driver = { + .id_table = qaic_ssr_mhi_match_table, + .remove = qaic_ssr_mhi_remove, + .probe = qaic_ssr_mhi_probe, + .ul_xfer_cb = qaic_ssr_mhi_ul_xfer_cb, + .dl_xfer_cb = qaic_ssr_mhi_dl_xfer_cb, + .driver = { + .name = "qaic_ssr", + }, +}; + +int qaic_ssr_init(struct qaic_device *qdev, struct drm_device *drm) +{ + struct ssr_crashdump *ssr_crash; + + qdev->ssr_dbc = QAIC_SSR_DBC_SENTINEL; + + /* + * Device requests only one SSR at a time. So allocating only one + * buffer to download crashdump is good enough. + */ + ssr_crash = drmm_kzalloc(drm, SSR_MHI_BUF_SIZE, GFP_KERNEL); + if (!ssr_crash) + return -ENOMEM; + + ssr_crash->qdev = qdev; + INIT_WORK(&ssr_crash->work, ssr_dump_worker); + qdev->ssr_mhi_buf = ssr_crash; + + return 0; +} + +int qaic_ssr_register(void) +{ + return mhi_driver_register(&qaic_ssr_mhi_driver); +} + +void qaic_ssr_unregister(void) +{ + mhi_driver_unregister(&qaic_ssr_mhi_driver); +} diff --git a/drivers/accel/qaic/qaic_ssr.h b/drivers/accel/qaic/qaic_ssr.h new file mode 100644 index 000000000000..97ccff305750 --- /dev/null +++ b/drivers/accel/qaic/qaic_ssr.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * + * Copyright (c) 2020, The Linux Foundation. All rights reserved. + * Copyright (c) 2021, 2024 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef __QAIC_SSR_H__ +#define __QAIC_SSR_H__ + +struct drm_device; +struct qaic_device; + +int qaic_ssr_register(void); +void qaic_ssr_unregister(void); +void qaic_clean_up_ssr(struct qaic_device *qdev); +int qaic_ssr_init(struct qaic_device *qdev, struct drm_device *drm); +#endif /* __QAIC_SSR_H__ */ diff --git a/drivers/accel/qaic/qaic_sysfs.c b/drivers/accel/qaic/qaic_sysfs.c new file mode 100644 index 000000000000..e0afb0ffb589 --- /dev/null +++ b/drivers/accel/qaic/qaic_sysfs.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* Copyright (c) 2020-2025, The Linux Foundation. All rights reserved. */ + +#include <drm/drm_file.h> +#include <drm/drm_managed.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/kobject.h> +#include <linux/mutex.h> +#include <linux/sysfs.h> + +#include "qaic.h" + +#define NAME_LEN 14 + +struct dbc_attribute { + struct device_attribute dev_attr; + u32 dbc_id; + char name[NAME_LEN]; +}; + +static ssize_t dbc_state_show(struct device *dev, struct device_attribute *a, char *buf) +{ + struct dbc_attribute *dbc_attr = container_of(a, struct dbc_attribute, dev_attr); + struct drm_minor *minor = dev_get_drvdata(dev); + struct qaic_device *qdev; + + qdev = to_qaic_device(minor->dev); + return sysfs_emit(buf, "%d\n", qdev->dbc[dbc_attr->dbc_id].state); +} + +void set_dbc_state(struct qaic_device *qdev, u32 dbc_id, unsigned int state) +{ + struct device *kdev = to_accel_kdev(qdev->qddev); + char *envp[3] = {}; + char state_str[16]; + char id_str[12]; + + envp[0] = id_str; + envp[1] = state_str; + + if (state >= DBC_STATE_MAX) + return; + if (dbc_id >= qdev->num_dbc) + return; + if (state == qdev->dbc[dbc_id].state) + return; + + scnprintf(id_str, ARRAY_SIZE(id_str), "DBC_ID=%d", dbc_id); + scnprintf(state_str, ARRAY_SIZE(state_str), "DBC_STATE=%d", state); + + qdev->dbc[dbc_id].state = state; + kobject_uevent_env(&kdev->kobj, KOBJ_CHANGE, envp); +} + +int qaic_sysfs_init(struct qaic_drm_device *qddev) +{ + struct device *kdev = to_accel_kdev(qddev); + struct drm_device *drm = to_drm(qddev); + u32 num_dbc = qddev->qdev->num_dbc; + struct dbc_attribute *dbc_attrs; + int i, ret; + + dbc_attrs = drmm_kcalloc(drm, num_dbc, sizeof(*dbc_attrs), GFP_KERNEL); + if (!dbc_attrs) + return -ENOMEM; + + for (i = 0; i < num_dbc; ++i) { + struct dbc_attribute *dbc_attr = &dbc_attrs[i]; + + sysfs_attr_init(&dbc_attr->dev_attr.attr); + dbc_attr->dbc_id = i; + scnprintf(dbc_attr->name, NAME_LEN, "dbc%d_state", i); + dbc_attr->dev_attr.attr.name = dbc_attr->name; + dbc_attr->dev_attr.attr.mode = 0444; + dbc_attr->dev_attr.show = dbc_state_show; + ret = sysfs_create_file(&kdev->kobj, &dbc_attr->dev_attr.attr); + if (ret) { + int j; + + for (j = 0; j < i; ++j) { + dbc_attr = &dbc_attrs[j]; + sysfs_remove_file(&kdev->kobj, &dbc_attr->dev_attr.attr); + } + drmm_kfree(drm, dbc_attrs); + return ret; + } + } + + qddev->sysfs_attrs = dbc_attrs; + return 0; +} + +void qaic_sysfs_remove(struct qaic_drm_device *qddev) +{ + struct dbc_attribute *dbc_attrs = qddev->sysfs_attrs; + struct device *kdev = to_accel_kdev(qddev); + u32 num_dbc = qddev->qdev->num_dbc; + int i; + + if (!dbc_attrs) + return; + + qddev->sysfs_attrs = NULL; + for (i = 0; i < num_dbc; ++i) + sysfs_remove_file(&kdev->kobj, &dbc_attrs[i].dev_attr.attr); + drmm_kfree(to_drm(qddev), dbc_attrs); +} diff --git a/drivers/accel/qaic/qaic_timesync.c b/drivers/accel/qaic/qaic_timesync.c index 3fac540f8e03..8af2475f4f36 100644 --- a/drivers/accel/qaic/qaic_timesync.c +++ b/drivers/accel/qaic/qaic_timesync.c @@ -171,6 +171,13 @@ mod_timer: dev_err(mqtsdev->dev, "%s mod_timer error:%d\n", __func__, ret); } +void qaic_mqts_ch_stop_timer(struct mhi_device *mhi_dev) +{ + struct mqts_dev *mqtsdev = dev_get_drvdata(&mhi_dev->dev); + + timer_delete_sync(&mqtsdev->timer); +} + static int qaic_timesync_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) { struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev)); @@ -206,6 +213,7 @@ static int qaic_timesync_probe(struct mhi_device *mhi_dev, const struct mhi_devi timer->expires = jiffies + msecs_to_jiffies(timesync_delay_ms); add_timer(timer); dev_set_drvdata(&mhi_dev->dev, mqtsdev); + qdev->mqts_ch = mhi_dev; return 0; @@ -221,6 +229,7 @@ static void qaic_timesync_remove(struct mhi_device *mhi_dev) { struct mqts_dev *mqtsdev = dev_get_drvdata(&mhi_dev->dev); + mqtsdev->qdev->mqts_ch = NULL; timer_delete_sync(&mqtsdev->timer); mhi_unprepare_from_transfer(mqtsdev->mhi_dev); kfree(mqtsdev->sync_msg); diff --git a/drivers/accel/qaic/qaic_timesync.h b/drivers/accel/qaic/qaic_timesync.h index 851b7acd43bb..77b9c2b55057 100644 --- a/drivers/accel/qaic/qaic_timesync.h +++ b/drivers/accel/qaic/qaic_timesync.h @@ -6,6 +6,9 @@ #ifndef __QAIC_TIMESYNC_H__ #define __QAIC_TIMESYNC_H__ +#include <linux/mhi.h> + int qaic_timesync_init(void); void qaic_timesync_deinit(void); +void qaic_mqts_ch_stop_timer(struct mhi_device *mhi_dev); #endif /* __QAIC_TIMESYNC_H__ */ diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 3f78c56b58dc..35e241041c10 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -121,29 +121,27 @@ static const struct dma_fence_ops dma_fence_stub_ops = { .get_timeline_name = dma_fence_stub_get_name, }; +static int __init dma_fence_init_stub(void) +{ + dma_fence_init(&dma_fence_stub, &dma_fence_stub_ops, + &dma_fence_stub_lock, 0, 0); + + set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &dma_fence_stub.flags); + + dma_fence_signal(&dma_fence_stub); + return 0; +} +subsys_initcall(dma_fence_init_stub); + /** * dma_fence_get_stub - return a signaled fence * - * Return a stub fence which is already signaled. The fence's - * timestamp corresponds to the first time after boot this - * function is called. + * Return a stub fence which is already signaled. The fence's timestamp + * corresponds to the initialisation time of the linux kernel. */ struct dma_fence *dma_fence_get_stub(void) { - spin_lock(&dma_fence_stub_lock); - if (!dma_fence_stub.ops) { - dma_fence_init(&dma_fence_stub, - &dma_fence_stub_ops, - &dma_fence_stub_lock, - 0, 0); - - set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, - &dma_fence_stub.flags); - - dma_fence_signal_locked(&dma_fence_stub); - } - spin_unlock(&dma_fence_stub_lock); - return dma_fence_get(&dma_fence_stub); } EXPORT_SYMBOL(dma_fence_get_stub); diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index de7b6294ce40..cd08990a10f9 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -557,9 +557,14 @@ static void ast_primary_plane_helper_atomic_update(struct drm_plane *plane, ast_set_vbios_color_reg(ast, fb->format, ast_crtc_state->vmode); } - drm_atomic_helper_damage_iter_init(&iter, old_plane_state, plane_state); - drm_atomic_for_each_plane_damage(&iter, &damage) { - ast_handle_damage(ast_plane, shadow_plane_state->data, fb, &damage); + /* if the buffer comes from another device */ + if (drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE) == 0) { + drm_atomic_helper_damage_iter_init(&iter, old_plane_state, plane_state); + drm_atomic_for_each_plane_damage(&iter, &damage) { + ast_handle_damage(ast_plane, shadow_plane_state->data, fb, &damage); + } + + drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); } /* diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 5a473a274ff0..e641fcf8c568 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1831,10 +1831,12 @@ drm_atomic_helper_wait_for_vblanks(struct drm_device *dev, } for_each_old_crtc_in_state(state, crtc, old_crtc_state, i) { + wait_queue_head_t *queue = drm_crtc_vblank_waitqueue(crtc); + if (!(crtc_mask & drm_crtc_mask(crtc))) continue; - ret = wait_event_timeout(dev->vblank[i].queue, + ret = wait_event_timeout(*queue, state->crtcs[i].last_vblank_count != drm_crtc_vblank_count(crtc), msecs_to_jiffies(100)); diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index e2e85345aa9a..26bb7710a462 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -250,6 +250,9 @@ static const struct edid_quirk { EDID_QUIRK('S', 'V', 'R', 0x1019, BIT(EDID_QUIRK_NON_DESKTOP)), EDID_QUIRK('A', 'U', 'O', 0x1111, BIT(EDID_QUIRK_NON_DESKTOP)), + /* LQ116M1JW10 displays noise when 8 bpc, but display fine as 6 bpc */ + EDID_QUIRK('S', 'H', 'P', 0x154c, BIT(EDID_QUIRK_FORCE_6BPC)), + /* * @drm_edid_internal_quirk entries end here, following with the * @drm_edid_quirk entries. diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c index 61e211fd3c9c..32d013c5c8fc 100644 --- a/drivers/gpu/drm/drm_vblank.c +++ b/drivers/gpu/drm/drm_vblank.c @@ -1315,7 +1315,7 @@ void drm_wait_one_vblank(struct drm_device *dev, unsigned int pipe) ret = wait_event_timeout(vblank->queue, last != drm_vblank_count(dev, pipe), - msecs_to_jiffies(100)); + msecs_to_jiffies(1000)); drm_WARN(dev, ret == 0, "vblank wait timed out on crtc %i\n", pipe); @@ -2258,7 +2258,7 @@ int drm_crtc_vblank_start_timer(struct drm_crtc *crtc) EXPORT_SYMBOL(drm_crtc_vblank_start_timer); /** - * drm_crtc_vblank_start_timer - Cancels the given CRTC's vblank timer + * drm_crtc_vblank_cancel_timer - Cancels the given CRTC's vblank timer * @crtc: the CRTC * * Drivers should call this function from their CRTC's disable_vblank diff --git a/drivers/gpu/drm/drm_vblank_work.c b/drivers/gpu/drm/drm_vblank_work.c index e4e1873f0e1e..70f0199251ea 100644 --- a/drivers/gpu/drm/drm_vblank_work.c +++ b/drivers/gpu/drm/drm_vblank_work.c @@ -244,7 +244,7 @@ EXPORT_SYMBOL(drm_vblank_work_flush); void drm_vblank_work_flush_all(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; - struct drm_vblank_crtc *vblank = &dev->vblank[drm_crtc_index(crtc)]; + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); spin_lock_irq(&dev->event_lock); wait_event_lock_irq(vblank->work_wait_queue, diff --git a/drivers/gpu/drm/gma500/psb_irq.c b/drivers/gpu/drm/gma500/psb_irq.c index c224c7ff353c..3a946b472064 100644 --- a/drivers/gpu/drm/gma500/psb_irq.c +++ b/drivers/gpu/drm/gma500/psb_irq.c @@ -250,6 +250,7 @@ static irqreturn_t gma_irq_handler(int irq, void *arg) void gma_irq_preinstall(struct drm_device *dev) { struct drm_psb_private *dev_priv = to_drm_psb_private(dev); + struct drm_crtc *crtc; unsigned long irqflags; spin_lock_irqsave(&dev_priv->irqmask_lock, irqflags); @@ -260,10 +261,15 @@ void gma_irq_preinstall(struct drm_device *dev) PSB_WSGX32(0x00000000, PSB_CR_EVENT_HOST_ENABLE); PSB_RSGX32(PSB_CR_EVENT_HOST_ENABLE); - if (dev->vblank[0].enabled) - dev_priv->vdc_irq_mask |= _PSB_VSYNC_PIPEA_FLAG; - if (dev->vblank[1].enabled) - dev_priv->vdc_irq_mask |= _PSB_VSYNC_PIPEB_FLAG; + drm_for_each_crtc(crtc, dev) { + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); + + if (vblank->enabled) { + u32 mask = drm_crtc_index(crtc) ? _PSB_VSYNC_PIPEB_FLAG : + _PSB_VSYNC_PIPEA_FLAG; + dev_priv->vdc_irq_mask |= mask; + } + } /* Revisit this area - want per device masks ? */ if (dev_priv->ops->hotplug) @@ -278,8 +284,8 @@ void gma_irq_preinstall(struct drm_device *dev) void gma_irq_postinstall(struct drm_device *dev) { struct drm_psb_private *dev_priv = to_drm_psb_private(dev); + struct drm_crtc *crtc; unsigned long irqflags; - unsigned int i; spin_lock_irqsave(&dev_priv->irqmask_lock, irqflags); @@ -292,11 +298,13 @@ void gma_irq_postinstall(struct drm_device *dev) PSB_WVDC32(dev_priv->vdc_irq_mask, PSB_INT_ENABLE_R); PSB_WVDC32(0xFFFFFFFF, PSB_HWSTAM); - for (i = 0; i < dev->num_crtcs; ++i) { - if (dev->vblank[i].enabled) - gma_enable_pipestat(dev_priv, i, PIPE_VBLANK_INTERRUPT_ENABLE); + drm_for_each_crtc(crtc, dev) { + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); + + if (vblank->enabled) + gma_enable_pipestat(dev_priv, drm_crtc_index(crtc), PIPE_VBLANK_INTERRUPT_ENABLE); else - gma_disable_pipestat(dev_priv, i, PIPE_VBLANK_INTERRUPT_ENABLE); + gma_disable_pipestat(dev_priv, drm_crtc_index(crtc), PIPE_VBLANK_INTERRUPT_ENABLE); } if (dev_priv->ops->hotplug_enable) @@ -337,8 +345,8 @@ void gma_irq_uninstall(struct drm_device *dev) { struct drm_psb_private *dev_priv = to_drm_psb_private(dev); struct pci_dev *pdev = to_pci_dev(dev->dev); + struct drm_crtc *crtc; unsigned long irqflags; - unsigned int i; if (!dev_priv->irq_enabled) return; @@ -350,9 +358,11 @@ void gma_irq_uninstall(struct drm_device *dev) PSB_WVDC32(0xFFFFFFFF, PSB_HWSTAM); - for (i = 0; i < dev->num_crtcs; ++i) { - if (dev->vblank[i].enabled) - gma_disable_pipestat(dev_priv, i, PIPE_VBLANK_INTERRUPT_ENABLE); + drm_for_each_crtc(crtc, dev) { + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); + + if (vblank->enabled) + gma_disable_pipestat(dev_priv, drm_crtc_index(crtc), PIPE_VBLANK_INTERRUPT_ENABLE); } dev_priv->vdc_irq_mask &= _PSB_IRQ_SGX_FLAG | diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 84ec79b64960..49c86a0d44f9 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -156,6 +156,7 @@ gem-y += \ gem/i915_gem_lmem.o \ gem/i915_gem_mman.o \ gem/i915_gem_object.o \ + gem/i915_gem_object_frontbuffer.o \ gem/i915_gem_pages.o \ gem/i915_gem_phys.o \ gem/i915_gem_pm.o \ diff --git a/drivers/gpu/drm/i915/display/hsw_ips.c b/drivers/gpu/drm/i915/display/hsw_ips.c index f444c5b7a27b..008d339d5c21 100644 --- a/drivers/gpu/drm/i915/display/hsw_ips.c +++ b/drivers/gpu/drm/i915/display/hsw_ips.c @@ -56,7 +56,7 @@ static void hsw_ips_enable(const struct intel_crtc_state *crtc_state) * the HW state readout code will complain that the expected * IPS_CTL value is not the one we read. */ - if (intel_de_wait_for_set(display, IPS_CTL, IPS_ENABLE, 50)) + if (intel_de_wait_for_set_ms(display, IPS_CTL, IPS_ENABLE, 50)) drm_err(display->drm, "Timed out waiting for IPS enable\n"); } @@ -78,7 +78,7 @@ bool hsw_ips_disable(const struct intel_crtc_state *crtc_state) * 42ms timeout value leads to occasional timeouts so use 100ms * instead. */ - if (intel_de_wait_for_clear(display, IPS_CTL, IPS_ENABLE, 100)) + if (intel_de_wait_for_clear_ms(display, IPS_CTL, IPS_ENABLE, 100)) drm_err(display->drm, "Timed out waiting for IPS disable\n"); } else { diff --git a/drivers/gpu/drm/i915/display/i9xx_plane.c b/drivers/gpu/drm/i915/display/i9xx_plane.c index 6e39d7f2e0c2..51ccc6bd5f21 100644 --- a/drivers/gpu/drm/i915/display/i9xx_plane.c +++ b/drivers/gpu/drm/i915/display/i9xx_plane.c @@ -754,10 +754,9 @@ static bool i9xx_plane_get_hw_state(struct intel_plane *plane, static unsigned int hsw_primary_max_stride(struct intel_plane *plane, - u32 pixel_format, u64 modifier, - unsigned int rotation) + const struct drm_format_info *info, + u64 modifier, unsigned int rotation) { - const struct drm_format_info *info = drm_format_info(pixel_format); int cpp = info->cpp[0]; /* Limit to 8k pixels to guarantee OFFSET.x doesn't get too big. */ @@ -766,10 +765,9 @@ hsw_primary_max_stride(struct intel_plane *plane, static unsigned int ilk_primary_max_stride(struct intel_plane *plane, - u32 pixel_format, u64 modifier, - unsigned int rotation) + const struct drm_format_info *info, + u64 modifier, unsigned int rotation) { - const struct drm_format_info *info = drm_format_info(pixel_format); int cpp = info->cpp[0]; /* Limit to 4k pixels to guarantee TILEOFF.x doesn't get too big. */ @@ -781,10 +779,9 @@ ilk_primary_max_stride(struct intel_plane *plane, unsigned int i965_plane_max_stride(struct intel_plane *plane, - u32 pixel_format, u64 modifier, - unsigned int rotation) + const struct drm_format_info *info, + u64 modifier, unsigned int rotation) { - const struct drm_format_info *info = drm_format_info(pixel_format); int cpp = info->cpp[0]; /* Limit to 4k pixels to guarantee TILEOFF.x doesn't get too big. */ @@ -796,8 +793,8 @@ i965_plane_max_stride(struct intel_plane *plane, static unsigned int i915_plane_max_stride(struct intel_plane *plane, - u32 pixel_format, u64 modifier, - unsigned int rotation) + const struct drm_format_info *info, + u64 modifier, unsigned int rotation) { if (modifier == I915_FORMAT_MOD_X_TILED) return 8 * 1024; @@ -807,8 +804,8 @@ i915_plane_max_stride(struct intel_plane *plane, static unsigned int i8xx_plane_max_stride(struct intel_plane *plane, - u32 pixel_format, u64 modifier, - unsigned int rotation) + const struct drm_format_info *info, + u64 modifier, unsigned int rotation) { if (plane->i9xx_plane == PLANE_C) return 4 * 1024; @@ -1191,10 +1188,8 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc, val = intel_de_read(display, DSPCNTR(display, i9xx_plane)); if (DISPLAY_VER(display) >= 4) { - if (val & DISP_TILED) { - plane_config->tiling = I915_TILING_X; + if (val & DISP_TILED) fb->modifier = I915_FORMAT_MOD_X_TILED; - } if (val & DISP_ROTATE_180) plane_config->rotation = DRM_MODE_ROTATE_180; @@ -1206,14 +1201,15 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc, pixel_format = val & DISP_FORMAT_MASK; fourcc = i9xx_format_to_fourcc(pixel_format); - fb->format = drm_format_info(fourcc); + + fb->format = drm_get_format_info(display->drm, fourcc, fb->modifier); if (display->platform.haswell || display->platform.broadwell) { offset = intel_de_read(display, DSPOFFSET(display, i9xx_plane)); base = intel_de_read(display, DSPSURF(display, i9xx_plane)) & DISP_ADDR_MASK; } else if (DISPLAY_VER(display) >= 4) { - if (plane_config->tiling) + if (fb->modifier == I915_FORMAT_MOD_X_TILED) offset = intel_de_read(display, DSPTILEOFF(display, i9xx_plane)); else diff --git a/drivers/gpu/drm/i915/display/i9xx_plane.h b/drivers/gpu/drm/i915/display/i9xx_plane.h index 565dab751301..ec78bf4dd35e 100644 --- a/drivers/gpu/drm/i915/display/i9xx_plane.h +++ b/drivers/gpu/drm/i915/display/i9xx_plane.h @@ -9,6 +9,7 @@ #include <linux/types.h> enum pipe; +struct drm_format_info; struct drm_framebuffer; struct intel_crtc; struct intel_display; @@ -18,8 +19,8 @@ struct intel_plane_state; #ifdef I915 unsigned int i965_plane_max_stride(struct intel_plane *plane, - u32 pixel_format, u64 modifier, - unsigned int rotation); + const struct drm_format_info *info, + u64 modifier, unsigned int rotation); unsigned int vlv_plane_min_alignment(struct intel_plane *plane, const struct drm_framebuffer *fb, int colot_plane); diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 70d4c1bc70fc..9230792960f2 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -148,9 +148,9 @@ static void wait_for_cmds_dispatched_to_panel(struct intel_encoder *encoder) for_each_dsi_port(port, intel_dsi->ports) { dsi_trans = dsi_port_to_transcoder(port); - ret = intel_de_wait_custom(display, DSI_LP_MSG(dsi_trans), - LPTX_IN_PROGRESS, 0, - 20, 0, NULL); + ret = intel_de_wait_for_clear_us(display, + DSI_LP_MSG(dsi_trans), + LPTX_IN_PROGRESS, 20); if (ret) drm_err(display->drm, "LPTX bit not cleared\n"); } @@ -534,9 +534,8 @@ static void gen11_dsi_enable_ddi_buffer(struct intel_encoder *encoder) for_each_dsi_port(port, intel_dsi->ports) { intel_de_rmw(display, DDI_BUF_CTL(port), 0, DDI_BUF_CTL_ENABLE); - ret = intel_de_wait_custom(display, DDI_BUF_CTL(port), - DDI_BUF_IS_IDLE, 0, - 500, 0, NULL); + ret = intel_de_wait_for_clear_us(display, DDI_BUF_CTL(port), + DDI_BUF_IS_IDLE, 500); if (ret) drm_err(display->drm, "DDI port:%c buffer idle\n", port_name(port)); @@ -857,9 +856,9 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder, dsi_trans = dsi_port_to_transcoder(port); - ret = intel_de_wait_custom(display, DSI_TRANS_FUNC_CONF(dsi_trans), - LINK_READY, LINK_READY, - 2500, 0, NULL); + ret = intel_de_wait_for_set_us(display, + DSI_TRANS_FUNC_CONF(dsi_trans), + LINK_READY, 2500); if (ret) drm_err(display->drm, "DSI link not ready\n"); } @@ -1048,8 +1047,8 @@ static void gen11_dsi_enable_transcoder(struct intel_encoder *encoder) TRANSCONF_ENABLE); /* wait for transcoder to be enabled */ - if (intel_de_wait_for_set(display, TRANSCONF(display, dsi_trans), - TRANSCONF_STATE_ENABLE, 10)) + if (intel_de_wait_for_set_ms(display, TRANSCONF(display, dsi_trans), + TRANSCONF_STATE_ENABLE, 10)) drm_err(display->drm, "DSI transcoder not enabled\n"); } @@ -1317,8 +1316,8 @@ static void gen11_dsi_disable_transcoder(struct intel_encoder *encoder) TRANSCONF_ENABLE, 0); /* wait for transcoder to be disabled */ - if (intel_de_wait_for_clear(display, TRANSCONF(display, dsi_trans), - TRANSCONF_STATE_ENABLE, 50)) + if (intel_de_wait_for_clear_ms(display, TRANSCONF(display, dsi_trans), + TRANSCONF_STATE_ENABLE, 50)) drm_err(display->drm, "DSI trancoder not disabled\n"); } @@ -1358,9 +1357,8 @@ static void gen11_dsi_deconfigure_trancoder(struct intel_encoder *encoder) tmp &= ~LINK_ULPS_TYPE_LP11; intel_de_write(display, DSI_LP_MSG(dsi_trans), tmp); - ret = intel_de_wait_custom(display, DSI_LP_MSG(dsi_trans), - LINK_IN_ULPS, LINK_IN_ULPS, - 10, 0, NULL); + ret = intel_de_wait_for_set_us(display, DSI_LP_MSG(dsi_trans), + LINK_IN_ULPS, 10); if (ret) drm_err(display->drm, "DSI link not in ULPS\n"); } @@ -1395,9 +1393,8 @@ static void gen11_dsi_disable_port(struct intel_encoder *encoder) for_each_dsi_port(port, intel_dsi->ports) { intel_de_rmw(display, DDI_BUF_CTL(port), DDI_BUF_CTL_ENABLE, 0); - ret = intel_de_wait_custom(display, DDI_BUF_CTL(port), - DDI_BUF_IS_IDLE, DDI_BUF_IS_IDLE, - 8, 0, NULL); + ret = intel_de_wait_for_set_us(display, DDI_BUF_CTL(port), + DDI_BUF_IS_IDLE, 8); if (ret) drm_err(display->drm, diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 852e4d6db8a3..4b41068e9e35 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -36,7 +36,6 @@ #include "soc/intel_rom.h" -#include "i915_drv.h" #include "intel_display.h" #include "intel_display_core.h" #include "intel_display_rpm.h" @@ -3145,7 +3144,6 @@ err_free_rom: static const struct vbt_header *intel_bios_get_vbt(struct intel_display *display, size_t *sizep) { - struct drm_i915_private *i915 = to_i915(display->drm); const struct vbt_header *vbt = NULL; vbt = firmware_get_vbt(display, sizep); @@ -3159,11 +3157,11 @@ static const struct vbt_header *intel_bios_get_vbt(struct intel_display *display */ if (!vbt && display->platform.dgfx) with_intel_display_rpm(display) - vbt = oprom_get_vbt(display, intel_rom_spi(i915), sizep, "SPI flash"); + vbt = oprom_get_vbt(display, intel_rom_spi(display->drm), sizep, "SPI flash"); if (!vbt) with_intel_display_rpm(display) - vbt = oprom_get_vbt(display, intel_rom_pci(i915), sizep, "PCI ROM"); + vbt = oprom_get_vbt(display, intel_rom_pci(display->drm), sizep, "PCI ROM"); return vbt; } diff --git a/drivers/gpu/drm/i915/display/intel_bo.c b/drivers/gpu/drm/i915/display/intel_bo.c index 6ae1374d5c2b..f3687eb63467 100644 --- a/drivers/gpu/drm/i915/display/intel_bo.c +++ b/drivers/gpu/drm/i915/display/intel_bo.c @@ -29,11 +29,6 @@ bool intel_bo_is_protected(struct drm_gem_object *obj) return i915_gem_object_is_protected(to_intel_bo(obj)); } -void intel_bo_flush_if_display(struct drm_gem_object *obj) -{ - i915_gem_object_flush_if_display(to_intel_bo(obj)); -} - int intel_bo_fb_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) { return i915_gem_fb_mmap(to_intel_bo(obj), vma); @@ -44,15 +39,40 @@ int intel_bo_read_from_page(struct drm_gem_object *obj, u64 offset, void *dst, i return i915_gem_object_read_from_page(to_intel_bo(obj), offset, dst, size); } -struct intel_frontbuffer *intel_bo_get_frontbuffer(struct drm_gem_object *obj) +struct intel_frontbuffer *intel_bo_frontbuffer_get(struct drm_gem_object *_obj) +{ + struct drm_i915_gem_object *obj = to_intel_bo(_obj); + struct i915_frontbuffer *front; + + front = i915_gem_object_frontbuffer_get(obj); + if (!front) + return NULL; + + return &front->base; +} + +void intel_bo_frontbuffer_ref(struct intel_frontbuffer *_front) { - return i915_gem_object_get_frontbuffer(to_intel_bo(obj)); + struct i915_frontbuffer *front = + container_of(_front, typeof(*front), base); + + i915_gem_object_frontbuffer_ref(front); } -struct intel_frontbuffer *intel_bo_set_frontbuffer(struct drm_gem_object *obj, - struct intel_frontbuffer *front) +void intel_bo_frontbuffer_put(struct intel_frontbuffer *_front) { - return i915_gem_object_set_frontbuffer(to_intel_bo(obj), front); + struct i915_frontbuffer *front = + container_of(_front, typeof(*front), base); + + return i915_gem_object_frontbuffer_put(front); +} + +void intel_bo_frontbuffer_flush_for_display(struct intel_frontbuffer *_front) +{ + struct i915_frontbuffer *front = + container_of(_front, typeof(*front), base); + + i915_gem_object_flush_if_display(front->obj); } void intel_bo_describe(struct seq_file *m, struct drm_gem_object *obj) diff --git a/drivers/gpu/drm/i915/display/intel_bo.h b/drivers/gpu/drm/i915/display/intel_bo.h index 48d87019e48a..fc05f680dc76 100644 --- a/drivers/gpu/drm/i915/display/intel_bo.h +++ b/drivers/gpu/drm/i915/display/intel_bo.h @@ -16,13 +16,13 @@ bool intel_bo_is_tiled(struct drm_gem_object *obj); bool intel_bo_is_userptr(struct drm_gem_object *obj); bool intel_bo_is_shmem(struct drm_gem_object *obj); bool intel_bo_is_protected(struct drm_gem_object *obj); -void intel_bo_flush_if_display(struct drm_gem_object *obj); int intel_bo_fb_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); int intel_bo_read_from_page(struct drm_gem_object *obj, u64 offset, void *dst, int size); -struct intel_frontbuffer *intel_bo_get_frontbuffer(struct drm_gem_object *obj); -struct intel_frontbuffer *intel_bo_set_frontbuffer(struct drm_gem_object *obj, - struct intel_frontbuffer *front); +struct intel_frontbuffer *intel_bo_frontbuffer_get(struct drm_gem_object *obj); +void intel_bo_frontbuffer_ref(struct intel_frontbuffer *front); +void intel_bo_frontbuffer_put(struct intel_frontbuffer *front); +void intel_bo_frontbuffer_flush_for_display(struct intel_frontbuffer *front); void intel_bo_describe(struct seq_file *m, struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index f97ccc1a96a7..1f6461be50ef 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -805,29 +805,40 @@ void intel_bw_init_hw(struct intel_display *display) if (!HAS_DISPLAY(display)) return; - if (DISPLAY_VERx100(display) >= 3002) - tgl_get_bw_info(display, dram_info, &xe3lpd_3002_sa_info); - else if (DISPLAY_VER(display) >= 30) - tgl_get_bw_info(display, dram_info, &xe3lpd_sa_info); - else if (DISPLAY_VERx100(display) >= 1401 && display->platform.dgfx && - dram_info->type == INTEL_DRAM_GDDR_ECC) - xe2_hpd_get_bw_info(display, dram_info, &xe2_hpd_ecc_sa_info); - else if (DISPLAY_VERx100(display) >= 1401 && display->platform.dgfx) - xe2_hpd_get_bw_info(display, dram_info, &xe2_hpd_sa_info); - else if (DISPLAY_VER(display) >= 14) + /* + * Starting with Xe3p_LPD, the hardware tells us whether memory has ECC + * enabled that would impact display bandwidth. However, so far there + * are no instructions in Bspec on how to handle that case. Let's + * complain if we ever find such a scenario. + */ + if (DISPLAY_VER(display) >= 35) + drm_WARN_ON(display->drm, dram_info->ecc_impacting_de_bw); + + if (DISPLAY_VER(display) >= 30) { + if (DISPLAY_VERx100(display) == 3002) + tgl_get_bw_info(display, dram_info, &xe3lpd_3002_sa_info); + else + tgl_get_bw_info(display, dram_info, &xe3lpd_sa_info); + } else if (DISPLAY_VERx100(display) >= 1401 && display->platform.dgfx) { + if (dram_info->type == INTEL_DRAM_GDDR_ECC) + xe2_hpd_get_bw_info(display, dram_info, &xe2_hpd_ecc_sa_info); + else + xe2_hpd_get_bw_info(display, dram_info, &xe2_hpd_sa_info); + } else if (DISPLAY_VER(display) >= 14) { tgl_get_bw_info(display, dram_info, &mtl_sa_info); - else if (display->platform.dg2) + } else if (display->platform.dg2) { dg2_get_bw_info(display); - else if (display->platform.alderlake_p) + } else if (display->platform.alderlake_p) { tgl_get_bw_info(display, dram_info, &adlp_sa_info); - else if (display->platform.alderlake_s) + } else if (display->platform.alderlake_s) { tgl_get_bw_info(display, dram_info, &adls_sa_info); - else if (display->platform.rocketlake) + } else if (display->platform.rocketlake) { tgl_get_bw_info(display, dram_info, &rkl_sa_info); - else if (DISPLAY_VER(display) == 12) + } else if (DISPLAY_VER(display) == 12) { tgl_get_bw_info(display, dram_info, &tgl_sa_info); - else if (DISPLAY_VER(display) == 11) + } else if (DISPLAY_VER(display) == 11) { icl_get_bw_info(display, dram_info, &icl_sa_info); + } } static unsigned int intel_bw_num_active_planes(struct intel_display *display, diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index bdb42fcc4cb2..37801c744b05 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -902,9 +902,8 @@ static void bdw_set_cdclk(struct intel_display *display, * According to the spec, it should be enough to poll for this 1 us. * However, extensive testing shows that this can take longer. */ - ret = intel_de_wait_custom(display, LCPLL_CTL, - LCPLL_CD_SOURCE_FCLK_DONE, LCPLL_CD_SOURCE_FCLK_DONE, - 100, 0, NULL); + ret = intel_de_wait_for_set_us(display, LCPLL_CTL, + LCPLL_CD_SOURCE_FCLK_DONE, 100); if (ret) drm_err(display->drm, "Switching to FCLK failed\n"); @@ -914,9 +913,8 @@ static void bdw_set_cdclk(struct intel_display *display, intel_de_rmw(display, LCPLL_CTL, LCPLL_CD_SOURCE_FCLK, 0); - ret = intel_de_wait_custom(display, LCPLL_CTL, - LCPLL_CD_SOURCE_FCLK_DONE, 0, - 1, 0, NULL); + ret = intel_de_wait_for_clear_us(display, LCPLL_CTL, + LCPLL_CD_SOURCE_FCLK_DONE, 1); if (ret) drm_err(display->drm, "Switching back to LCPLL failed\n"); @@ -1114,7 +1112,7 @@ static void skl_dpll0_enable(struct intel_display *display, int vco) intel_de_rmw(display, LCPLL1_CTL, 0, LCPLL_PLL_ENABLE); - if (intel_de_wait_for_set(display, LCPLL1_CTL, LCPLL_PLL_LOCK, 5)) + if (intel_de_wait_for_set_ms(display, LCPLL1_CTL, LCPLL_PLL_LOCK, 5)) drm_err(display->drm, "DPLL0 not locked\n"); display->cdclk.hw.vco = vco; @@ -1128,7 +1126,7 @@ static void skl_dpll0_disable(struct intel_display *display) intel_de_rmw(display, LCPLL1_CTL, LCPLL_PLL_ENABLE, 0); - if (intel_de_wait_for_clear(display, LCPLL1_CTL, LCPLL_PLL_LOCK, 1)) + if (intel_de_wait_for_clear_ms(display, LCPLL1_CTL, LCPLL_PLL_LOCK, 1)) drm_err(display->drm, "Couldn't disable DPLL0\n"); display->cdclk.hw.vco = 0; @@ -1535,6 +1533,41 @@ static const struct intel_cdclk_vals xe3lpd_cdclk_table[] = { {} }; +static const struct intel_cdclk_vals xe3p_lpd_cdclk_table[] = { + { .refclk = 38400, .cdclk = 151200, .ratio = 21, .waveform = 0xa4a4 }, + { .refclk = 38400, .cdclk = 176400, .ratio = 21, .waveform = 0xaa54 }, + { .refclk = 38400, .cdclk = 201600, .ratio = 21, .waveform = 0xaaaa }, + { .refclk = 38400, .cdclk = 226800, .ratio = 21, .waveform = 0xad5a }, + { .refclk = 38400, .cdclk = 252000, .ratio = 21, .waveform = 0xb6b6 }, + { .refclk = 38400, .cdclk = 277200, .ratio = 21, .waveform = 0xdbb6 }, + { .refclk = 38400, .cdclk = 302400, .ratio = 21, .waveform = 0xeeee }, + { .refclk = 38400, .cdclk = 327600, .ratio = 21, .waveform = 0xf7de }, + { .refclk = 38400, .cdclk = 352800, .ratio = 21, .waveform = 0xfefe }, + { .refclk = 38400, .cdclk = 378000, .ratio = 21, .waveform = 0xfffe }, + { .refclk = 38400, .cdclk = 403200, .ratio = 21, .waveform = 0xffff }, + { .refclk = 38400, .cdclk = 422400, .ratio = 22, .waveform = 0xffff }, + { .refclk = 38400, .cdclk = 441600, .ratio = 23, .waveform = 0xffff }, + { .refclk = 38400, .cdclk = 460800, .ratio = 24, .waveform = 0xffff }, + { .refclk = 38400, .cdclk = 480000, .ratio = 25, .waveform = 0xffff }, + { .refclk = 38400, .cdclk = 499200, .ratio = 26, .waveform = 0xffff }, + { .refclk = 38400, .cdclk = 518400, .ratio = 27, .waveform = 0xffff }, + { .refclk = 38400, .cdclk = 537600, .ratio = 28, .waveform = 0xffff }, + { .refclk = 38400, .cdclk = 556800, .ratio = 29, .waveform = 0xffff }, + { .refclk = 38400, .cdclk = 576000, .ratio = 30, .waveform = 0xffff }, + { .refclk = 38400, .cdclk = 595200, .ratio = 31, .waveform = 0xffff }, + { .refclk = 38400, .cdclk = 614400, .ratio = 32, .waveform = 0xffff }, + { .refclk = 38400, .cdclk = 633600, .ratio = 33, .waveform = 0xffff }, + { .refclk = 38400, .cdclk = 652800, .ratio = 34, .waveform = 0xffff }, + { .refclk = 38400, .cdclk = 672000, .ratio = 35, .waveform = 0xffff }, + { .refclk = 38400, .cdclk = 691200, .ratio = 36, .waveform = 0xffff }, + { .refclk = 38400, .cdclk = 710400, .ratio = 37, .waveform = 0xffff }, + { .refclk = 38400, .cdclk = 729600, .ratio = 38, .waveform = 0xffff }, + { .refclk = 38400, .cdclk = 748800, .ratio = 39, .waveform = 0xffff }, + { .refclk = 38400, .cdclk = 768000, .ratio = 40, .waveform = 0xffff }, + { .refclk = 38400, .cdclk = 787200, .ratio = 41, .waveform = 0xffff }, + {} +}; + static const int cdclk_squash_len = 16; static int cdclk_squash_divider(u16 waveform) @@ -1800,8 +1833,8 @@ static void bxt_de_pll_disable(struct intel_display *display) intel_de_write(display, BXT_DE_PLL_ENABLE, 0); /* Timeout 200us */ - if (intel_de_wait_for_clear(display, - BXT_DE_PLL_ENABLE, BXT_DE_PLL_LOCK, 1)) + if (intel_de_wait_for_clear_ms(display, + BXT_DE_PLL_ENABLE, BXT_DE_PLL_LOCK, 1)) drm_err(display->drm, "timeout waiting for DE PLL unlock\n"); display->cdclk.hw.vco = 0; @@ -1817,8 +1850,8 @@ static void bxt_de_pll_enable(struct intel_display *display, int vco) intel_de_write(display, BXT_DE_PLL_ENABLE, BXT_DE_PLL_PLL_ENABLE); /* Timeout 200us */ - if (intel_de_wait_for_set(display, - BXT_DE_PLL_ENABLE, BXT_DE_PLL_LOCK, 1)) + if (intel_de_wait_for_set_ms(display, + BXT_DE_PLL_ENABLE, BXT_DE_PLL_LOCK, 1)) drm_err(display->drm, "timeout waiting for DE PLL lock\n"); display->cdclk.hw.vco = vco; @@ -1830,7 +1863,7 @@ static void icl_cdclk_pll_disable(struct intel_display *display) BXT_DE_PLL_PLL_ENABLE, 0); /* Timeout 200us */ - if (intel_de_wait_for_clear(display, BXT_DE_PLL_ENABLE, BXT_DE_PLL_LOCK, 1)) + if (intel_de_wait_for_clear_ms(display, BXT_DE_PLL_ENABLE, BXT_DE_PLL_LOCK, 1)) drm_err(display->drm, "timeout waiting for CDCLK PLL unlock\n"); display->cdclk.hw.vco = 0; @@ -1848,7 +1881,7 @@ static void icl_cdclk_pll_enable(struct intel_display *display, int vco) intel_de_write(display, BXT_DE_PLL_ENABLE, val); /* Timeout 200us */ - if (intel_de_wait_for_set(display, BXT_DE_PLL_ENABLE, BXT_DE_PLL_LOCK, 1)) + if (intel_de_wait_for_set_ms(display, BXT_DE_PLL_ENABLE, BXT_DE_PLL_LOCK, 1)) drm_err(display->drm, "timeout waiting for CDCLK PLL lock\n"); display->cdclk.hw.vco = vco; @@ -1868,8 +1901,8 @@ static void adlp_cdclk_pll_crawl(struct intel_display *display, int vco) intel_de_write(display, BXT_DE_PLL_ENABLE, val); /* Timeout 200us */ - if (intel_de_wait_for_set(display, BXT_DE_PLL_ENABLE, - BXT_DE_PLL_LOCK | BXT_DE_PLL_FREQ_REQ_ACK, 1)) + if (intel_de_wait_for_set_ms(display, BXT_DE_PLL_ENABLE, + BXT_DE_PLL_LOCK | BXT_DE_PLL_FREQ_REQ_ACK, 1)) drm_err(display->drm, "timeout waiting for FREQ change request ack\n"); val &= ~BXT_DE_PLL_FREQ_REQ; @@ -3561,7 +3594,9 @@ static int intel_compute_max_dotclk(struct intel_display *display) */ void intel_update_max_cdclk(struct intel_display *display) { - if (DISPLAY_VERx100(display) >= 3002) { + if (DISPLAY_VER(display) >= 35) { + display->cdclk.max_cdclk_freq = 787200; + } else if (DISPLAY_VERx100(display) >= 3002) { display->cdclk.max_cdclk_freq = 480000; } else if (DISPLAY_VER(display) >= 30) { display->cdclk.max_cdclk_freq = 691200; @@ -3912,7 +3947,10 @@ static const struct intel_cdclk_funcs i830_cdclk_funcs = { */ void intel_init_cdclk_hooks(struct intel_display *display) { - if (DISPLAY_VER(display) >= 30) { + if (DISPLAY_VER(display) >= 35) { + display->funcs.cdclk = &xe3lpd_cdclk_funcs; + display->cdclk.table = xe3p_lpd_cdclk_table; + } else if (DISPLAY_VER(display) >= 30) { display->funcs.cdclk = &xe3lpd_cdclk_funcs; display->cdclk.table = xe3lpd_cdclk_table; } else if (DISPLAY_VER(display) >= 20) { diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 1e97020e7304..a217a67ceb43 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1090,18 +1090,19 @@ static void skl_get_config(struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - u32 tmp; crtc_state->gamma_mode = hsw_read_gamma_mode(crtc); crtc_state->csc_mode = ilk_read_csc_mode(crtc); - tmp = intel_de_read(display, SKL_BOTTOM_COLOR(crtc->pipe)); + if (DISPLAY_VER(display) < 35) { + u32 tmp = intel_de_read(display, SKL_BOTTOM_COLOR(crtc->pipe)); - if (tmp & SKL_BOTTOM_COLOR_GAMMA_ENABLE) - crtc_state->gamma_enable = true; + if (tmp & SKL_BOTTOM_COLOR_GAMMA_ENABLE) + crtc_state->gamma_enable = true; - if (tmp & SKL_BOTTOM_COLOR_CSC_ENABLE) - crtc_state->csc_enable = true; + if (tmp & SKL_BOTTOM_COLOR_CSC_ENABLE) + crtc_state->csc_enable = true; + } } static void skl_color_commit_arm(struct intel_dsb *dsb, diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index 31e68047f217..82e89cdbe5a5 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -498,10 +498,10 @@ static bool ilk_crt_detect_hotplug(struct drm_connector *connector) intel_de_write(display, crt->adpa_reg, adpa); - if (intel_de_wait_for_clear(display, - crt->adpa_reg, - ADPA_CRT_HOTPLUG_FORCE_TRIGGER, - 1000)) + if (intel_de_wait_for_clear_ms(display, + crt->adpa_reg, + ADPA_CRT_HOTPLUG_FORCE_TRIGGER, + 1000)) drm_dbg_kms(display->drm, "timed out waiting for FORCE_TRIGGER"); @@ -553,8 +553,8 @@ static bool valleyview_crt_detect_hotplug(struct drm_connector *connector) intel_de_write(display, crt->adpa_reg, adpa); - if (intel_de_wait_for_clear(display, crt->adpa_reg, - ADPA_CRT_HOTPLUG_FORCE_TRIGGER, 1000)) { + if (intel_de_wait_for_clear_ms(display, crt->adpa_reg, + ADPA_CRT_HOTPLUG_FORCE_TRIGGER, 1000)) { drm_dbg_kms(display->drm, "timed out waiting for FORCE_TRIGGER"); intel_de_write(display, crt->adpa_reg, save_adpa); @@ -604,8 +604,8 @@ static bool intel_crt_detect_hotplug(struct drm_connector *connector) CRT_HOTPLUG_FORCE_DETECT, CRT_HOTPLUG_FORCE_DETECT); /* wait for FORCE_DETECT to go off */ - if (intel_de_wait_for_clear(display, PORT_HOTPLUG_EN(display), - CRT_HOTPLUG_FORCE_DETECT, 1000)) + if (intel_de_wait_for_clear_ms(display, PORT_HOTPLUG_EN(display), + CRT_HOTPLUG_FORCE_DETECT, 1000)) drm_dbg_kms(display->drm, "timed out waiting for FORCE_DETECT to go off"); } diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c index 7aa14348aa6d..a10b2425b94d 100644 --- a/drivers/gpu/drm/i915/display/intel_cursor.c +++ b/drivers/gpu/drm/i915/display/intel_cursor.c @@ -182,8 +182,8 @@ static int intel_check_cursor(struct intel_crtc_state *crtc_state, static unsigned int i845_cursor_max_stride(struct intel_plane *plane, - u32 pixel_format, u64 modifier, - unsigned int rotation) + const struct drm_format_info *info, + u64 modifier, unsigned int rotation) { return 2048; } @@ -343,8 +343,8 @@ static bool i845_cursor_get_hw_state(struct intel_plane *plane, static unsigned int i9xx_cursor_max_stride(struct intel_plane *plane, - u32 pixel_format, u64 modifier, - unsigned int rotation) + const struct drm_format_info *info, + u64 modifier, unsigned int rotation) { return plane->base.dev->mode_config.cursor_width * 4; } @@ -1092,3 +1092,23 @@ fail: return ERR_PTR(ret); } + +void intel_cursor_mode_config_init(struct intel_display *display) +{ + struct drm_mode_config *mode_config = &display->drm->mode_config; + + if (display->platform.i845g) { + mode_config->cursor_width = 64; + mode_config->cursor_height = 1023; + } else if (display->platform.i865g) { + mode_config->cursor_width = 512; + mode_config->cursor_height = 1023; + } else if (display->platform.i830 || display->platform.i85x || + display->platform.i915g || display->platform.i915gm) { + mode_config->cursor_width = 64; + mode_config->cursor_height = 64; + } else { + mode_config->cursor_width = 256; + mode_config->cursor_height = 256; + } +} diff --git a/drivers/gpu/drm/i915/display/intel_cursor.h b/drivers/gpu/drm/i915/display/intel_cursor.h index 65a9e7eb88c2..7c269d7381ad 100644 --- a/drivers/gpu/drm/i915/display/intel_cursor.h +++ b/drivers/gpu/drm/i915/display/intel_cursor.h @@ -17,4 +17,6 @@ intel_cursor_plane_create(struct intel_display *display, void intel_cursor_unpin_work(struct kthread_work *base); +void intel_cursor_mode_config_init(struct intel_display *display); + #endif diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index b3b506d0e040..d98b4cf6b60e 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -145,9 +145,9 @@ void intel_cx0_bus_reset(struct intel_encoder *encoder, int lane) intel_de_write(display, XELPDP_PORT_M2P_MSGBUS_CTL(display, port, lane), XELPDP_PORT_M2P_TRANSACTION_RESET); - if (intel_de_wait_for_clear(display, XELPDP_PORT_M2P_MSGBUS_CTL(display, port, lane), - XELPDP_PORT_M2P_TRANSACTION_RESET, - XELPDP_MSGBUS_TIMEOUT_SLOW)) { + if (intel_de_wait_for_clear_ms(display, XELPDP_PORT_M2P_MSGBUS_CTL(display, port, lane), + XELPDP_PORT_M2P_TRANSACTION_RESET, + XELPDP_MSGBUS_TIMEOUT_MS)) { drm_err_once(display->drm, "Failed to bring PHY %c to idle.\n", phy_name(phy)); @@ -164,12 +164,10 @@ int intel_cx0_wait_for_ack(struct intel_encoder *encoder, enum port port = encoder->port; enum phy phy = intel_encoder_to_phy(encoder); - if (intel_de_wait_custom(display, - XELPDP_PORT_P2M_MSGBUS_STATUS(display, port, lane), - XELPDP_PORT_P2M_RESPONSE_READY, - XELPDP_PORT_P2M_RESPONSE_READY, - XELPDP_MSGBUS_TIMEOUT_FAST_US, - XELPDP_MSGBUS_TIMEOUT_SLOW, val)) { + if (intel_de_wait_ms(display, XELPDP_PORT_P2M_MSGBUS_STATUS(display, port, lane), + XELPDP_PORT_P2M_RESPONSE_READY, + XELPDP_PORT_P2M_RESPONSE_READY, + XELPDP_MSGBUS_TIMEOUT_MS, val)) { drm_dbg_kms(display->drm, "PHY %c Timeout waiting for message ACK. Status: 0x%x\n", phy_name(phy), *val); @@ -214,9 +212,9 @@ static int __intel_cx0_read_once(struct intel_encoder *encoder, int ack; u32 val; - if (intel_de_wait_for_clear(display, XELPDP_PORT_M2P_MSGBUS_CTL(display, port, lane), - XELPDP_PORT_M2P_TRANSACTION_PENDING, - XELPDP_MSGBUS_TIMEOUT_SLOW)) { + if (intel_de_wait_for_clear_ms(display, XELPDP_PORT_M2P_MSGBUS_CTL(display, port, lane), + XELPDP_PORT_M2P_TRANSACTION_PENDING, + XELPDP_MSGBUS_TIMEOUT_MS)) { drm_dbg_kms(display->drm, "PHY %c Timeout waiting for previous transaction to complete. Reset the bus and retry.\n", phy_name(phy)); intel_cx0_bus_reset(encoder, lane); @@ -285,9 +283,9 @@ static int __intel_cx0_write_once(struct intel_encoder *encoder, int ack; u32 val; - if (intel_de_wait_for_clear(display, XELPDP_PORT_M2P_MSGBUS_CTL(display, port, lane), - XELPDP_PORT_M2P_TRANSACTION_PENDING, - XELPDP_MSGBUS_TIMEOUT_SLOW)) { + if (intel_de_wait_for_clear_ms(display, XELPDP_PORT_M2P_MSGBUS_CTL(display, port, lane), + XELPDP_PORT_M2P_TRANSACTION_PENDING, + XELPDP_MSGBUS_TIMEOUT_MS)) { drm_dbg_kms(display->drm, "PHY %c Timeout waiting for previous transaction to complete. Resetting the bus.\n", phy_name(phy)); intel_cx0_bus_reset(encoder, lane); @@ -301,9 +299,9 @@ static int __intel_cx0_write_once(struct intel_encoder *encoder, XELPDP_PORT_M2P_DATA(data) | XELPDP_PORT_M2P_ADDRESS(addr)); - if (intel_de_wait_for_clear(display, XELPDP_PORT_M2P_MSGBUS_CTL(display, port, lane), - XELPDP_PORT_M2P_TRANSACTION_PENDING, - XELPDP_MSGBUS_TIMEOUT_SLOW)) { + if (intel_de_wait_for_clear_ms(display, XELPDP_PORT_M2P_MSGBUS_CTL(display, port, lane), + XELPDP_PORT_M2P_TRANSACTION_PENDING, + XELPDP_MSGBUS_TIMEOUT_MS)) { drm_dbg_kms(display->drm, "PHY %c Timeout waiting for write to complete. Resetting the bus.\n", phy_name(phy)); intel_cx0_bus_reset(encoder, lane); @@ -2814,9 +2812,9 @@ void intel_cx0_powerdown_change_sequence(struct intel_encoder *encoder, /* Wait for pending transactions.*/ for_each_cx0_lane_in_mask(lane_mask, lane) - if (intel_de_wait_for_clear(display, XELPDP_PORT_M2P_MSGBUS_CTL(display, port, lane), - XELPDP_PORT_M2P_TRANSACTION_PENDING, - XELPDP_MSGBUS_TIMEOUT_SLOW)) { + if (intel_de_wait_for_clear_ms(display, XELPDP_PORT_M2P_MSGBUS_CTL(display, port, lane), + XELPDP_PORT_M2P_TRANSACTION_PENDING, + XELPDP_MSGBUS_TIMEOUT_MS)) { drm_dbg_kms(display->drm, "PHY %c Timeout waiting for previous transaction to complete. Reset the bus.\n", phy_name(phy)); @@ -2828,12 +2826,12 @@ void intel_cx0_powerdown_change_sequence(struct intel_encoder *encoder, intel_cx0_get_powerdown_update(lane_mask)); /* Update Timeout Value */ - if (intel_de_wait_custom(display, buf_ctl2_reg, - intel_cx0_get_powerdown_update(lane_mask), 0, - XELPDP_PORT_POWERDOWN_UPDATE_TIMEOUT_US, 2, NULL)) + if (intel_de_wait_for_clear_ms(display, buf_ctl2_reg, + intel_cx0_get_powerdown_update(lane_mask), + XELPDP_PORT_POWERDOWN_UPDATE_TIMEOUT_MS)) drm_warn(display->drm, - "PHY %c failed to bring out of Lane reset after %dus.\n", - phy_name(phy), XELPDP_PORT_RESET_START_TIMEOUT_US); + "PHY %c failed to bring out of lane reset\n", + phy_name(phy)); } void intel_cx0_setup_powerdown(struct intel_encoder *encoder) @@ -2889,35 +2887,34 @@ static void intel_cx0_phy_lane_reset(struct intel_encoder *encoder, XELPDP_LANE_PHY_CURRENT_STATUS(1)) : XELPDP_LANE_PHY_CURRENT_STATUS(0); - if (intel_de_wait_custom(display, XELPDP_PORT_BUF_CTL1(display, port), - XELPDP_PORT_BUF_SOC_PHY_READY, - XELPDP_PORT_BUF_SOC_PHY_READY, - XELPDP_PORT_BUF_SOC_READY_TIMEOUT_US, 0, NULL)) + if (intel_de_wait_for_set_us(display, XELPDP_PORT_BUF_CTL1(display, port), + XELPDP_PORT_BUF_SOC_PHY_READY, + XELPDP_PORT_BUF_SOC_READY_TIMEOUT_US)) drm_warn(display->drm, - "PHY %c failed to bring out of SOC reset after %dus.\n", - phy_name(phy), XELPDP_PORT_BUF_SOC_READY_TIMEOUT_US); + "PHY %c failed to bring out of SOC reset\n", + phy_name(phy)); intel_de_rmw(display, XELPDP_PORT_BUF_CTL2(display, port), lane_pipe_reset, lane_pipe_reset); - if (intel_de_wait_custom(display, XELPDP_PORT_BUF_CTL2(display, port), - lane_phy_current_status, lane_phy_current_status, - XELPDP_PORT_RESET_START_TIMEOUT_US, 0, NULL)) + if (intel_de_wait_for_set_us(display, XELPDP_PORT_BUF_CTL2(display, port), + lane_phy_current_status, + XELPDP_PORT_RESET_START_TIMEOUT_US)) drm_warn(display->drm, - "PHY %c failed to bring out of Lane reset after %dus.\n", - phy_name(phy), XELPDP_PORT_RESET_START_TIMEOUT_US); + "PHY %c failed to bring out of lane reset\n", + phy_name(phy)); intel_de_rmw(display, XELPDP_PORT_CLOCK_CTL(display, port), intel_cx0_get_pclk_refclk_request(owned_lane_mask), intel_cx0_get_pclk_refclk_request(lane_mask)); - if (intel_de_wait_custom(display, XELPDP_PORT_CLOCK_CTL(display, port), - intel_cx0_get_pclk_refclk_ack(owned_lane_mask), - intel_cx0_get_pclk_refclk_ack(lane_mask), - XELPDP_REFCLK_ENABLE_TIMEOUT_US, 0, NULL)) + if (intel_de_wait_us(display, XELPDP_PORT_CLOCK_CTL(display, port), + intel_cx0_get_pclk_refclk_ack(owned_lane_mask), + intel_cx0_get_pclk_refclk_ack(lane_mask), + XELPDP_REFCLK_ENABLE_TIMEOUT_US, NULL)) drm_warn(display->drm, - "PHY %c failed to request refclk after %dus.\n", - phy_name(phy), XELPDP_REFCLK_ENABLE_TIMEOUT_US); + "PHY %c failed to request refclk\n", + phy_name(phy)); intel_cx0_powerdown_change_sequence(encoder, INTEL_CX0_BOTH_LANES, XELPDP_P2_STATE_RESET); @@ -2925,12 +2922,12 @@ static void intel_cx0_phy_lane_reset(struct intel_encoder *encoder, intel_de_rmw(display, XELPDP_PORT_BUF_CTL2(display, port), lane_pipe_reset, 0); - if (intel_de_wait_for_clear(display, XELPDP_PORT_BUF_CTL2(display, port), - lane_phy_current_status, - XELPDP_PORT_RESET_END_TIMEOUT)) + if (intel_de_wait_for_clear_ms(display, XELPDP_PORT_BUF_CTL2(display, port), + lane_phy_current_status, + XELPDP_PORT_RESET_END_TIMEOUT_MS)) drm_warn(display->drm, - "PHY %c failed to bring out of Lane reset after %dms.\n", - phy_name(phy), XELPDP_PORT_RESET_END_TIMEOUT); + "PHY %c failed to bring out of lane reset\n", + phy_name(phy)); } static void intel_cx0_program_phy_lane(struct intel_encoder *encoder, int lane_count, @@ -3065,12 +3062,12 @@ static void __intel_cx0pll_enable(struct intel_encoder *encoder, intel_cx0_get_pclk_pll_request(maxpclk_lane)); /* 10. Poll on PORT_CLOCK_CTL PCLK PLL Ack LN<Lane for maxPCLK> == "1". */ - if (intel_de_wait_custom(display, XELPDP_PORT_CLOCK_CTL(display, encoder->port), - intel_cx0_get_pclk_pll_ack(INTEL_CX0_BOTH_LANES), - intel_cx0_get_pclk_pll_ack(maxpclk_lane), - XELPDP_PCLK_PLL_ENABLE_TIMEOUT_US, 0, NULL)) - drm_warn(display->drm, "Port %c PLL not locked after %dus.\n", - phy_name(phy), XELPDP_PCLK_PLL_ENABLE_TIMEOUT_US); + if (intel_de_wait_us(display, XELPDP_PORT_CLOCK_CTL(display, encoder->port), + intel_cx0_get_pclk_pll_ack(INTEL_CX0_BOTH_LANES), + intel_cx0_get_pclk_pll_ack(maxpclk_lane), + XELPDP_PCLK_PLL_ENABLE_TIMEOUT_US, NULL)) + drm_warn(display->drm, "Port %c PLL not locked\n", + phy_name(phy)); /* * 11. Follow the Display Voltage Frequency Switching Sequence After @@ -3189,12 +3186,9 @@ void intel_mtl_tbt_pll_enable(struct intel_encoder *encoder, intel_de_write(display, XELPDP_PORT_CLOCK_CTL(display, encoder->port), val); /* 5. Poll on PORT_CLOCK_CTL TBT CLOCK Ack == "1". */ - if (intel_de_wait_custom(display, XELPDP_PORT_CLOCK_CTL(display, encoder->port), - XELPDP_TBT_CLOCK_ACK, - XELPDP_TBT_CLOCK_ACK, - 100, 0, NULL)) - drm_warn(display->drm, - "[ENCODER:%d:%s][%c] PHY PLL not locked after 100us.\n", + if (intel_de_wait_for_set_us(display, XELPDP_PORT_CLOCK_CTL(display, encoder->port), + XELPDP_TBT_CLOCK_ACK, 100)) + drm_warn(display->drm, "[ENCODER:%d:%s][%c] PHY PLL not locked\n", encoder->base.base.id, encoder->base.name, phy_name(phy)); /* @@ -3304,13 +3298,12 @@ static void intel_cx0pll_disable(struct intel_encoder *encoder) /* * 5. Poll on PORT_CLOCK_CTL PCLK PLL Ack LN<Lane for maxPCLK**> == "0". */ - if (intel_de_wait_custom(display, XELPDP_PORT_CLOCK_CTL(display, encoder->port), - intel_cx0_get_pclk_pll_ack(INTEL_CX0_BOTH_LANES) | - intel_cx0_get_pclk_refclk_ack(INTEL_CX0_BOTH_LANES), 0, - XELPDP_PCLK_PLL_DISABLE_TIMEOUT_US, 0, NULL)) - drm_warn(display->drm, - "Port %c PLL not unlocked after %dus.\n", - phy_name(phy), XELPDP_PCLK_PLL_DISABLE_TIMEOUT_US); + if (intel_de_wait_for_clear_us(display, XELPDP_PORT_CLOCK_CTL(display, encoder->port), + intel_cx0_get_pclk_pll_ack(INTEL_CX0_BOTH_LANES) | + intel_cx0_get_pclk_refclk_ack(INTEL_CX0_BOTH_LANES), + XELPDP_PCLK_PLL_DISABLE_TIMEOUT_US)) + drm_warn(display->drm, "Port %c PLL not unlocked\n", + phy_name(phy)); /* * 6. Follow the Display Voltage Frequency Switching Sequence After @@ -3353,10 +3346,9 @@ void intel_mtl_tbt_pll_disable(struct intel_encoder *encoder) XELPDP_TBT_CLOCK_REQUEST, 0); /* 3. Poll on PORT_CLOCK_CTL TBT CLOCK Ack == "0". */ - if (intel_de_wait_custom(display, XELPDP_PORT_CLOCK_CTL(display, encoder->port), - XELPDP_TBT_CLOCK_ACK, 0, 10, 0, NULL)) - drm_warn(display->drm, - "[ENCODER:%d:%s][%c] PHY PLL not unlocked after 10us.\n", + if (intel_de_wait_for_clear_us(display, XELPDP_PORT_CLOCK_CTL(display, encoder->port), + XELPDP_TBT_CLOCK_ACK, 10)) + drm_warn(display->drm, "[ENCODER:%d:%s][%c] PHY PLL not unlocked\n", encoder->base.base.id, encoder->base.name, phy_name(phy)); /* diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h b/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h index 635b35669348..8df5cd5ce418 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h @@ -74,14 +74,13 @@ #define XELPDP_PORT_P2M_DATA(val) REG_FIELD_PREP(XELPDP_PORT_P2M_DATA_MASK, val) #define XELPDP_PORT_P2M_ERROR_SET REG_BIT(15) -#define XELPDP_MSGBUS_TIMEOUT_SLOW 1 -#define XELPDP_MSGBUS_TIMEOUT_FAST_US 2 +#define XELPDP_MSGBUS_TIMEOUT_MS 1 #define XELPDP_PCLK_PLL_ENABLE_TIMEOUT_US 3200 #define XELPDP_PCLK_PLL_DISABLE_TIMEOUT_US 20 #define XELPDP_PORT_BUF_SOC_READY_TIMEOUT_US 100 #define XELPDP_PORT_RESET_START_TIMEOUT_US 5 -#define XELPDP_PORT_POWERDOWN_UPDATE_TIMEOUT_US 100 -#define XELPDP_PORT_RESET_END_TIMEOUT 15 +#define XELPDP_PORT_POWERDOWN_UPDATE_TIMEOUT_MS 2 +#define XELPDP_PORT_RESET_END_TIMEOUT_MS 15 #define XELPDP_REFCLK_ENABLE_TIMEOUT_US 1 #define _XELPDP_PORT_BUF_CTL1_LN0_A 0x64004 diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 733ef4559131..002ccd47856d 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -210,8 +210,8 @@ void intel_wait_ddi_buf_idle(struct intel_display *display, enum port port) } static_assert(DDI_BUF_IS_IDLE == XELPDP_PORT_BUF_PHY_IDLE); - if (intel_de_wait_for_set(display, intel_ddi_buf_status_reg(display, port), - DDI_BUF_IS_IDLE, 10)) + if (intel_de_wait_for_set_ms(display, intel_ddi_buf_status_reg(display, port), + DDI_BUF_IS_IDLE, 10)) drm_err(display->drm, "Timeout waiting for DDI BUF %c to get idle\n", port_name(port)); } @@ -235,8 +235,8 @@ static void intel_wait_ddi_buf_active(struct intel_encoder *encoder) } static_assert(DDI_BUF_IS_IDLE == XELPDP_PORT_BUF_PHY_IDLE); - if (intel_de_wait_for_clear(display, intel_ddi_buf_status_reg(display, port), - DDI_BUF_IS_IDLE, 10)) + if (intel_de_wait_for_clear_ms(display, intel_ddi_buf_status_reg(display, port), + DDI_BUF_IS_IDLE, 10)) drm_err(display->drm, "Timeout waiting for DDI BUF %c to get active\n", port_name(port)); } @@ -2307,8 +2307,8 @@ void intel_ddi_wait_for_act_sent(struct intel_encoder *encoder, { struct intel_display *display = to_intel_display(encoder); - if (intel_de_wait_for_set(display, dp_tp_status_reg(encoder, crtc_state), - DP_TP_STATUS_ACT_SENT, 1)) + if (intel_de_wait_for_set_ms(display, dp_tp_status_reg(encoder, crtc_state), + DP_TP_STATUS_ACT_SENT, 1)) drm_err(display->drm, "Timed out waiting for ACT sent\n"); } @@ -2383,11 +2383,11 @@ int intel_ddi_wait_for_fec_status(struct intel_encoder *encoder, return 0; if (enabled) - ret = intel_de_wait_for_set(display, dp_tp_status_reg(encoder, crtc_state), - DP_TP_STATUS_FEC_ENABLE_LIVE, 1); + ret = intel_de_wait_for_set_ms(display, dp_tp_status_reg(encoder, crtc_state), + DP_TP_STATUS_FEC_ENABLE_LIVE, 1); else - ret = intel_de_wait_for_clear(display, dp_tp_status_reg(encoder, crtc_state), - DP_TP_STATUS_FEC_ENABLE_LIVE, 1); + ret = intel_de_wait_for_clear_ms(display, dp_tp_status_reg(encoder, crtc_state), + DP_TP_STATUS_FEC_ENABLE_LIVE, 1); if (ret) { drm_err(display->drm, @@ -2577,9 +2577,7 @@ mtl_ddi_enable_d2d(struct intel_encoder *encoder) intel_de_rmw(display, reg, 0, set_bits); - ret = intel_de_wait_custom(display, reg, - wait_bits, wait_bits, - 100, 0, NULL); + ret = intel_de_wait_for_set_us(display, reg, wait_bits, 100); if (ret) { drm_err(display->drm, "Timeout waiting for D2D Link enable for DDI/PORT_BUF_CTL %c\n", port_name(port)); @@ -3079,9 +3077,7 @@ mtl_ddi_disable_d2d(struct intel_encoder *encoder) intel_de_rmw(display, reg, clr_bits, 0); - ret = intel_de_wait_custom(display, reg, - wait_bits, 0, - 100, 0, NULL); + ret = intel_de_wait_for_clear_us(display, reg, wait_bits, 100); if (ret) drm_err(display->drm, "Timeout waiting for D2D Link disable for DDI/PORT_BUF_CTL %c\n", port_name(port)); @@ -3868,9 +3864,9 @@ static void intel_ddi_set_idle_link_train(struct intel_dp *intel_dp, if (port == PORT_A && DISPLAY_VER(display) < 12) return; - if (intel_de_wait_for_set(display, - dp_tp_status_reg(encoder, crtc_state), - DP_TP_STATUS_IDLE_DONE, 2)) + if (intel_de_wait_for_set_ms(display, + dp_tp_status_reg(encoder, crtc_state), + DP_TP_STATUS_IDLE_DONE, 2)) drm_err(display->drm, "Timed out waiting for DP idle patterns\n"); } diff --git a/drivers/gpu/drm/i915/display/intel_de.h b/drivers/gpu/drm/i915/display/intel_de.h index 9ecdcf6b73e4..a7ce3b875e06 100644 --- a/drivers/gpu/drm/i915/display/intel_de.h +++ b/drivers/gpu/drm/i915/display/intel_de.h @@ -84,20 +84,13 @@ intel_de_write(struct intel_display *display, i915_reg_t reg, u32 val) } static inline u32 -__intel_de_rmw_nowl(struct intel_display *display, i915_reg_t reg, - u32 clear, u32 set) -{ - return intel_uncore_rmw(__to_uncore(display), reg, clear, set); -} - -static inline u32 intel_de_rmw(struct intel_display *display, i915_reg_t reg, u32 clear, u32 set) { u32 val; intel_dmc_wl_get(display, reg); - val = __intel_de_rmw_nowl(display, reg, clear, set); + val = intel_uncore_rmw(__to_uncore(display), reg, clear, set); intel_dmc_wl_put(display, reg); @@ -105,34 +98,16 @@ intel_de_rmw(struct intel_display *display, i915_reg_t reg, u32 clear, u32 set) } static inline int -__intel_de_wait_for_register_nowl(struct intel_display *display, - i915_reg_t reg, - u32 mask, u32 value, unsigned int timeout_ms) -{ - return intel_wait_for_register(__to_uncore(display), reg, mask, - value, timeout_ms); -} - -static inline int -__intel_de_wait_for_register_atomic_nowl(struct intel_display *display, - i915_reg_t reg, - u32 mask, u32 value, - unsigned int fast_timeout_us) -{ - return __intel_wait_for_register(__to_uncore(display), reg, mask, - value, fast_timeout_us, 0, NULL); -} - -static inline int -intel_de_wait(struct intel_display *display, i915_reg_t reg, - u32 mask, u32 value, unsigned int timeout_ms) +intel_de_wait_us(struct intel_display *display, i915_reg_t reg, + u32 mask, u32 value, unsigned int timeout_us, + u32 *out_value) { int ret; intel_dmc_wl_get(display, reg); - ret = __intel_de_wait_for_register_nowl(display, reg, mask, value, - timeout_ms); + ret = __intel_wait_for_register(__to_uncore(display), reg, mask, + value, timeout_us, 0, out_value); intel_dmc_wl_put(display, reg); @@ -140,15 +115,16 @@ intel_de_wait(struct intel_display *display, i915_reg_t reg, } static inline int -intel_de_wait_fw(struct intel_display *display, i915_reg_t reg, - u32 mask, u32 value, unsigned int timeout_ms, u32 *out_value) +intel_de_wait_ms(struct intel_display *display, i915_reg_t reg, + u32 mask, u32 value, unsigned int timeout_ms, + u32 *out_value) { int ret; intel_dmc_wl_get(display, reg); - ret = intel_wait_for_register_fw(__to_uncore(display), reg, mask, - value, timeout_ms, out_value); + ret = __intel_wait_for_register(__to_uncore(display), reg, mask, + value, 2, timeout_ms, out_value); intel_dmc_wl_put(display, reg); @@ -156,36 +132,49 @@ intel_de_wait_fw(struct intel_display *display, i915_reg_t reg, } static inline int -intel_de_wait_custom(struct intel_display *display, i915_reg_t reg, - u32 mask, u32 value, - unsigned int fast_timeout_us, - unsigned int slow_timeout_ms, u32 *out_value) +intel_de_wait_fw_ms(struct intel_display *display, i915_reg_t reg, + u32 mask, u32 value, unsigned int timeout_ms, + u32 *out_value) { - int ret; - - intel_dmc_wl_get(display, reg); + return __intel_wait_for_register_fw(__to_uncore(display), reg, mask, + value, 2, timeout_ms, out_value); +} - ret = __intel_wait_for_register(__to_uncore(display), reg, mask, - value, - fast_timeout_us, slow_timeout_ms, out_value); +static inline int +intel_de_wait_fw_us_atomic(struct intel_display *display, i915_reg_t reg, + u32 mask, u32 value, unsigned int timeout_us, + u32 *out_value) +{ + return __intel_wait_for_register_fw(__to_uncore(display), reg, mask, + value, timeout_us, 0, out_value); +} - intel_dmc_wl_put(display, reg); +static inline int +intel_de_wait_for_set_us(struct intel_display *display, i915_reg_t reg, + u32 mask, unsigned int timeout_us) +{ + return intel_de_wait_us(display, reg, mask, mask, timeout_us, NULL); +} - return ret; +static inline int +intel_de_wait_for_clear_us(struct intel_display *display, i915_reg_t reg, + u32 mask, unsigned int timeout_us) +{ + return intel_de_wait_us(display, reg, mask, 0, timeout_us, NULL); } static inline int -intel_de_wait_for_set(struct intel_display *display, i915_reg_t reg, - u32 mask, unsigned int timeout_ms) +intel_de_wait_for_set_ms(struct intel_display *display, i915_reg_t reg, + u32 mask, unsigned int timeout_ms) { - return intel_de_wait(display, reg, mask, mask, timeout_ms); + return intel_de_wait_ms(display, reg, mask, mask, timeout_ms, NULL); } static inline int -intel_de_wait_for_clear(struct intel_display *display, i915_reg_t reg, - u32 mask, unsigned int timeout_ms) +intel_de_wait_for_clear_ms(struct intel_display *display, i915_reg_t reg, + u32 mask, unsigned int timeout_ms) { - return intel_de_wait(display, reg, mask, 0, timeout_ms); + return intel_de_wait_ms(display, reg, mask, 0, timeout_ms, NULL); } /* @@ -215,6 +204,18 @@ intel_de_write_fw(struct intel_display *display, i915_reg_t reg, u32 val) } static inline u32 +intel_de_rmw_fw(struct intel_display *display, i915_reg_t reg, u32 clear, u32 set) +{ + u32 old, val; + + old = intel_de_read_fw(display, reg); + val = (old & ~clear) | set; + intel_de_write_fw(display, reg, val); + + return old; +} + +static inline u32 intel_de_read_notrace(struct intel_display *display, i915_reg_t reg) { return intel_uncore_read_notrace(__to_uncore(display), reg); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 42ec78798666..069967114bd9 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -359,8 +359,8 @@ intel_wait_for_pipe_off(const struct intel_crtc_state *old_crtc_state) enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder; /* Wait for the Pipe State to go off */ - if (intel_de_wait_for_clear(display, TRANSCONF(display, cpu_transcoder), - TRANSCONF_STATE_ENABLE, 100)) + if (intel_de_wait_for_clear_ms(display, TRANSCONF(display, cpu_transcoder), + TRANSCONF_STATE_ENABLE, 100)) drm_WARN(display->drm, 1, "pipe_off wait timed out\n"); } else { intel_wait_for_pipe_scanline_stopped(crtc); @@ -547,16 +547,13 @@ void intel_disable_transcoder(const struct intel_crtc_state *old_crtc_state) intel_wait_for_pipe_off(old_crtc_state); } -u32 intel_plane_fb_max_stride(struct drm_device *drm, - u32 pixel_format, u64 modifier) +u32 intel_plane_fb_max_stride(struct intel_display *display, + const struct drm_format_info *info, + u64 modifier) { - struct intel_display *display = to_intel_display(drm); struct intel_crtc *crtc; struct intel_plane *plane; - if (!HAS_DISPLAY(display)) - return 0; - /* * We assume the primary plane for pipe A has * the highest stride limits of them all, @@ -568,10 +565,23 @@ u32 intel_plane_fb_max_stride(struct drm_device *drm, plane = to_intel_plane(crtc->base.primary); - return plane->max_stride(plane, pixel_format, modifier, + return plane->max_stride(plane, info, modifier, DRM_MODE_ROTATE_0); } +u32 intel_dumb_fb_max_stride(struct drm_device *drm, + u32 pixel_format, u64 modifier) +{ + struct intel_display *display = to_intel_display(drm); + + if (!HAS_DISPLAY(display)) + return 0; + + return intel_plane_fb_max_stride(display, + drm_get_format_info(drm, pixel_format, modifier), + modifier); +} + void intel_set_plane_visible(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state, bool visible) @@ -2658,7 +2668,7 @@ static void intel_set_transcoder_timings(const struct intel_crtc_state *crtc_sta crtc_vblank_start = crtc_vdisplay + crtc_state->set_context_latency; } - if (DISPLAY_VER(display) >= 4) + if (DISPLAY_VER(display) >= 4 && DISPLAY_VER(display) < 35) intel_de_write(display, TRANS_VSYNCSHIFT(display, cpu_transcoder), vsyncshift); @@ -2799,7 +2809,7 @@ static bool intel_pipe_is_interlaced(const struct intel_crtc_state *crtc_state) struct intel_display *display = to_intel_display(crtc_state); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; - if (DISPLAY_VER(display) == 2) + if (DISPLAY_VER(display) == 2 || DISPLAY_VER(display) >= 35) return false; if (DISPLAY_VER(display) >= 9 || @@ -3190,10 +3200,12 @@ static void hsw_set_transconf(const struct intel_crtc_state *crtc_state) if (display->platform.haswell && crtc_state->dither) val |= TRANSCONF_DITHER_EN | TRANSCONF_DITHER_TYPE_SP; - if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) - val |= TRANSCONF_INTERLACE_IF_ID_ILK; - else - val |= TRANSCONF_INTERLACE_PF_PD_ILK; + if (DISPLAY_VER(display) < 35) { + if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) + val |= TRANSCONF_INTERLACE_IF_ID_ILK; + else + val |= TRANSCONF_INTERLACE_PF_PD_ILK; + } if (display->platform.haswell && crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB) diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index fc2ef92ccf68..bcc6ccb69d2b 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -34,6 +34,7 @@ struct drm_atomic_state; struct drm_device; struct drm_display_mode; struct drm_encoder; +struct drm_format_info; struct drm_modeset_acquire_ctx; struct intel_atomic_state; struct intel_crtc; @@ -402,8 +403,11 @@ void intel_link_compute_m_n(u16 bpp, int nlanes, int pixel_clock, int link_clock, int bw_overhead, struct intel_link_m_n *m_n); -u32 intel_plane_fb_max_stride(struct drm_device *drm, - u32 pixel_format, u64 modifier); +u32 intel_plane_fb_max_stride(struct intel_display *display, + const struct drm_format_info *info, + u64 modifier); +u32 intel_dumb_fb_max_stride(struct drm_device *drm, + u32 pixel_format, u64 modifier); enum drm_mode_status intel_mode_valid_max_plane_size(struct intel_display *display, const struct drm_display_mode *mode, diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h index 893279be8409..9b8414b77c15 100644 --- a/drivers/gpu/drm/i915/display/intel_display_core.h +++ b/drivers/gpu/drm/i915/display/intel_display_core.h @@ -142,14 +142,13 @@ struct intel_dpll_global { }; struct intel_frontbuffer_tracking { + /* protects busy_bits */ spinlock_t lock; /* - * Tracking bits for delayed frontbuffer flushing du to gpu activity or - * scheduled flips. + * Tracking bits for delayed frontbuffer flushing due to gpu activity. */ unsigned busy_bits; - unsigned flip_bits; }; struct intel_hotplug { diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 0b3fd65dac0f..9bbfdae8d024 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -78,9 +78,6 @@ static int i915_frontbuffer_tracking(struct seq_file *m, void *unused) seq_printf(m, "FB tracking busy bits: 0x%08x\n", display->fb_tracking.busy_bits); - seq_printf(m, "FB tracking flip bits: 0x%08x\n", - display->fb_tracking.flip_bits); - spin_unlock(&display->fb_tracking.lock); return 0; diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c index 328447a5e5e8..1170afaa8680 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.c +++ b/drivers/gpu/drm/i915/display/intel_display_device.c @@ -1507,6 +1507,7 @@ static const struct { { 20, 0, &xe2_lpd_display }, { 30, 0, &xe2_lpd_display }, { 30, 2, &wcl_display }, + { 35, 0, &xe2_lpd_display }, }; static const struct intel_display_device_info * diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c index 63942ebf46fb..7e000ba3e08b 100644 --- a/drivers/gpu/drm/i915/display/intel_display_driver.c +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c @@ -29,6 +29,7 @@ #include "intel_cdclk.h" #include "intel_color.h" #include "intel_crtc.h" +#include "intel_cursor.h" #include "intel_dbuf_bw.h" #include "intel_display_core.h" #include "intel_display_debugfs.h" @@ -148,17 +149,7 @@ static void intel_mode_config_init(struct intel_display *display) mode_config->max_height = 2048; } - if (display->platform.i845g || display->platform.i865g) { - mode_config->cursor_width = display->platform.i845g ? 64 : 512; - mode_config->cursor_height = 1023; - } else if (display->platform.i830 || display->platform.i85x || - display->platform.i915g || display->platform.i915gm) { - mode_config->cursor_width = 64; - mode_config->cursor_height = 64; - } else { - mode_config->cursor_width = 256; - mode_config->cursor_height = 256; - } + intel_cursor_mode_config_init(display); } static void intel_mode_config_cleanup(struct intel_display *display) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index fbfa823b6dce..2a4cc1dcc293 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -1292,9 +1292,8 @@ static void hsw_disable_lcpll(struct intel_display *display, val |= LCPLL_CD_SOURCE_FCLK; intel_de_write(display, LCPLL_CTL, val); - ret = intel_de_wait_custom(display, LCPLL_CTL, - LCPLL_CD_SOURCE_FCLK_DONE, LCPLL_CD_SOURCE_FCLK_DONE, - 1, 0, NULL); + ret = intel_de_wait_for_set_us(display, LCPLL_CTL, + LCPLL_CD_SOURCE_FCLK_DONE, 1); if (ret) drm_err(display->drm, "Switching to FCLK failed\n"); @@ -1305,7 +1304,7 @@ static void hsw_disable_lcpll(struct intel_display *display, intel_de_write(display, LCPLL_CTL, val); intel_de_posting_read(display, LCPLL_CTL); - if (intel_de_wait_for_clear(display, LCPLL_CTL, LCPLL_PLL_LOCK, 1)) + if (intel_de_wait_for_clear_ms(display, LCPLL_CTL, LCPLL_PLL_LOCK, 1)) drm_err(display->drm, "LCPLL still locked\n"); val = hsw_read_dcomp(display); @@ -1362,15 +1361,14 @@ static void hsw_restore_lcpll(struct intel_display *display) val &= ~LCPLL_PLL_DISABLE; intel_de_write(display, LCPLL_CTL, val); - if (intel_de_wait_for_set(display, LCPLL_CTL, LCPLL_PLL_LOCK, 5)) + if (intel_de_wait_for_set_ms(display, LCPLL_CTL, LCPLL_PLL_LOCK, 5)) drm_err(display->drm, "LCPLL not locked yet\n"); if (val & LCPLL_CD_SOURCE_FCLK) { intel_de_rmw(display, LCPLL_CTL, LCPLL_CD_SOURCE_FCLK, 0); - ret = intel_de_wait_custom(display, LCPLL_CTL, - LCPLL_CD_SOURCE_FCLK_DONE, 0, - 1, 0, NULL); + ret = intel_de_wait_for_clear_us(display, LCPLL_CTL, + LCPLL_CD_SOURCE_FCLK_DONE, 1); if (ret) drm_err(display->drm, "Switching back to LCPLL failed\n"); @@ -1438,6 +1436,9 @@ static void intel_pch_reset_handshake(struct intel_display *display, i915_reg_t reg; u32 reset_bits; + if (DISPLAY_VER(display) >= 35) + return; + if (display->platform.ivybridge) { reg = GEN7_MSG_CTL; reset_bits = WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK; diff --git a/drivers/gpu/drm/i915/display/intel_display_power_well.c b/drivers/gpu/drm/i915/display/intel_display_power_well.c index eab7019f2252..f4f7e73acc87 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power_well.c +++ b/drivers/gpu/drm/i915/display/intel_display_power_well.c @@ -293,8 +293,8 @@ static void hsw_wait_for_power_well_enable(struct intel_display *display, } /* Timeout for PW1:10 us, AUX:not specified, other PWs:20 us. */ - if (intel_de_wait_for_set(display, regs->driver, - HSW_PWR_WELL_CTL_STATE(pw_idx), timeout)) { + if (intel_de_wait_for_set_ms(display, regs->driver, + HSW_PWR_WELL_CTL_STATE(pw_idx), timeout)) { drm_dbg_kms(display->drm, "%s power well enable timeout\n", intel_power_well_name(power_well)); @@ -338,9 +338,9 @@ static void hsw_wait_for_power_well_disable(struct intel_display *display, */ reqs = hsw_power_well_requesters(display, regs, pw_idx); - ret = intel_de_wait_for_clear(display, regs->driver, - HSW_PWR_WELL_CTL_STATE(pw_idx), - reqs ? 0 : 1); + ret = intel_de_wait_for_clear_ms(display, regs->driver, + HSW_PWR_WELL_CTL_STATE(pw_idx), + reqs ? 0 : 1); if (!ret) return; @@ -359,8 +359,8 @@ static void gen9_wait_for_power_well_fuses(struct intel_display *display, { /* Timeout 5us for PG#0, for other PGs 1us */ drm_WARN_ON(display->drm, - intel_de_wait_for_set(display, SKL_FUSE_STATUS, - SKL_FUSE_PG_DIST_STATUS(pg), 1)); + intel_de_wait_for_set_ms(display, SKL_FUSE_STATUS, + SKL_FUSE_PG_DIST_STATUS(pg), 1)); } static void hsw_power_well_enable(struct intel_display *display, @@ -1358,6 +1358,7 @@ static void assert_chv_phy_status(struct intel_display *display) u32 phy_control = display->power.chv_phy_control; u32 phy_status = 0; u32 phy_status_mask = 0xffffffff; + u32 val; /* * The BIOS can leave the PHY is some weird state @@ -1445,12 +1446,11 @@ static void assert_chv_phy_status(struct intel_display *display) * The PHY may be busy with some initial calibration and whatnot, * so the power state can take a while to actually change. */ - if (intel_de_wait(display, DISPLAY_PHY_STATUS, - phy_status_mask, phy_status, 10)) + if (intel_de_wait_ms(display, DISPLAY_PHY_STATUS, + phy_status_mask, phy_status, 10, &val)) drm_err(display->drm, "Unexpected PHY_STATUS 0x%08x, expected 0x%08x (PHY_CONTROL=0x%08x)\n", - intel_de_read(display, DISPLAY_PHY_STATUS) & phy_status_mask, - phy_status, display->power.chv_phy_control); + val & phy_status_mask, phy_status, display->power.chv_phy_control); } #undef BITS_SET @@ -1476,8 +1476,8 @@ static void chv_dpio_cmn_power_well_enable(struct intel_display *display, vlv_set_power_well(display, power_well, true); /* Poll for phypwrgood signal */ - if (intel_de_wait_for_set(display, DISPLAY_PHY_STATUS, - PHY_POWERGOOD(phy), 1)) + if (intel_de_wait_for_set_ms(display, DISPLAY_PHY_STATUS, + PHY_POWERGOOD(phy), 1)) drm_err(display->drm, "Display PHY %d is not power up\n", phy); @@ -1867,8 +1867,8 @@ static void xelpdp_aux_power_well_enable(struct intel_display *display, * bit. */ if (DISPLAY_VER(display) >= 35) { - if (intel_de_wait_for_set(display, XELPDP_DP_AUX_CH_CTL(display, aux_ch), - XELPDP_DP_AUX_CH_CTL_POWER_STATUS, 2)) + if (intel_de_wait_for_set_ms(display, XELPDP_DP_AUX_CH_CTL(display, aux_ch), + XELPDP_DP_AUX_CH_CTL_POWER_STATUS, 2)) drm_warn(display->drm, "Timeout waiting for PHY %c AUX channel power to be up\n", phy_name(phy)); @@ -1888,8 +1888,8 @@ static void xelpdp_aux_power_well_disable(struct intel_display *display, 0); if (DISPLAY_VER(display) >= 35) { - if (intel_de_wait_for_clear(display, XELPDP_DP_AUX_CH_CTL(display, aux_ch), - XELPDP_DP_AUX_CH_CTL_POWER_STATUS, 1)) + if (intel_de_wait_for_clear_ms(display, XELPDP_DP_AUX_CH_CTL(display, aux_ch), + XELPDP_DP_AUX_CH_CTL_POWER_STATUS, 1)) drm_warn(display->drm, "Timeout waiting for PHY %c AUX channel to powerdown\n", phy_name(phy)); @@ -1913,8 +1913,8 @@ static void xe2lpd_pica_power_well_enable(struct intel_display *display, intel_de_write(display, XE2LPD_PICA_PW_CTL, XE2LPD_PICA_CTL_POWER_REQUEST); - if (intel_de_wait_for_set(display, XE2LPD_PICA_PW_CTL, - XE2LPD_PICA_CTL_POWER_STATUS, 1)) { + if (intel_de_wait_for_set_ms(display, XE2LPD_PICA_PW_CTL, + XE2LPD_PICA_CTL_POWER_STATUS, 1)) { drm_dbg_kms(display->drm, "pica power well enable timeout\n"); drm_WARN(display->drm, 1, "Power well PICA timeout when enabled"); @@ -1926,8 +1926,8 @@ static void xe2lpd_pica_power_well_disable(struct intel_display *display, { intel_de_write(display, XE2LPD_PICA_PW_CTL, 0); - if (intel_de_wait_for_clear(display, XE2LPD_PICA_PW_CTL, - XE2LPD_PICA_CTL_POWER_STATUS, 1)) { + if (intel_de_wait_for_clear_ms(display, XE2LPD_PICA_PW_CTL, + XE2LPD_PICA_CTL_POWER_STATUS, 1)) { drm_dbg_kms(display->drm, "pica power well disable timeout\n"); drm_WARN(display->drm, 1, "Power well PICA timeout when disabled"); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 00600134bda0..38702a9e0f50 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -726,7 +726,6 @@ struct intel_initial_plane_config { struct intel_memory_region *mem; resource_size_t phys_base; struct i915_vma *vma; - unsigned int tiling; int size; u32 base; u8 rotation; @@ -1564,8 +1563,8 @@ struct intel_plane { const struct drm_framebuffer *fb, int color_plane); unsigned int (*max_stride)(struct intel_plane *plane, - u32 pixel_format, u64 modifier, - unsigned int rotation); + const struct drm_format_info *info, + u64 modifier, unsigned int rotation); bool (*can_async_flip)(u64 modifier); /* Write all non-self arming plane registers */ void (*update_noarm)(struct intel_dsb *dsb, diff --git a/drivers/gpu/drm/i915/display/intel_display_wa.c b/drivers/gpu/drm/i915/display/intel_display_wa.c index c528aaa679ca..e38e5e87877c 100644 --- a/drivers/gpu/drm/i915/display/intel_display_wa.c +++ b/drivers/gpu/drm/i915/display/intel_display_wa.c @@ -49,7 +49,8 @@ void intel_display_wa_apply(struct intel_display *display) */ static bool intel_display_needs_wa_16025573575(struct intel_display *display) { - return DISPLAY_VERx100(display) == 3000 || DISPLAY_VERx100(display) == 3002; + return DISPLAY_VERx100(display) == 3000 || DISPLAY_VERx100(display) == 3002 || + DISPLAY_VERx100(display) == 3500; } /* diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 0bddb20a7c86..6ebbd97e6351 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -127,6 +127,9 @@ static bool dmc_firmware_param_disabled(struct intel_display *display) #define DISPLAY_VER13_DMC_MAX_FW_SIZE 0x20000 #define DISPLAY_VER12_DMC_MAX_FW_SIZE ICL_DMC_MAX_FW_SIZE +#define XE3P_LPD_DMC_PATH DMC_PATH(xe3p_lpd) +MODULE_FIRMWARE(XE3P_LPD_DMC_PATH); + #define XE3LPD_3002_DMC_PATH DMC_PATH(xe3lpd_3002) MODULE_FIRMWARE(XE3LPD_3002_DMC_PATH); @@ -186,7 +189,11 @@ static const char *dmc_firmware_default(struct intel_display *display, u32 *size { const char *fw_path = NULL; u32 max_fw_size = 0; - if (DISPLAY_VERx100(display) == 3002) { + + if (DISPLAY_VERx100(display) == 3500) { + fw_path = XE3P_LPD_DMC_PATH; + max_fw_size = XE2LPD_DMC_MAX_FW_SIZE; + } else if (DISPLAY_VERx100(display) == 3002) { fw_path = XE3LPD_3002_DMC_PATH; max_fw_size = XE2LPD_DMC_MAX_FW_SIZE; } else if (DISPLAY_VERx100(display) == 3000) { @@ -711,11 +718,11 @@ static bool need_pipedmc_load_program(struct intel_display *display) static bool need_pipedmc_load_mmio(struct intel_display *display, enum pipe pipe) { /* - * PTL: + * Xe3_LPD/Xe3p_LPD: * - pipe A/B DMC doesn't need save/restore * - pipe C/D DMC is in PG0, needs manual save/restore */ - if (DISPLAY_VER(display) == 30) + if (IS_DISPLAY_VER(display, 30, 35)) return pipe >= PIPE_C; /* @@ -1712,14 +1719,14 @@ void intel_pipedmc_irq_handler(struct intel_display *display, enum pipe pipe) drm_err_ratelimited(display->drm, "[CRTC:%d:%s] PIPEDMC GTT fault\n", crtc->base.base.id, crtc->base.name); if (tmp & PIPEDMC_ERROR) - drm_err(display->drm, "[CRTC:%d:%s]] PIPEDMC error\n", + drm_err(display->drm, "[CRTC:%d:%s] PIPEDMC error\n", crtc->base.base.id, crtc->base.name); } int_vector = intel_de_read(display, PIPEDMC_STATUS(pipe)) & PIPEDMC_INT_VECTOR_MASK; if (tmp == 0 && int_vector != 0) - drm_err(display->drm, "[CRTC:%d:%s]] PIPEDMC interrupt vector 0x%x\n", - crtc->base.base.id, crtc->base.name, tmp); + drm_err(display->drm, "[CRTC:%d:%s] PIPEDMC interrupt vector 0x%x\n", + crtc->base.base.id, crtc->base.name, int_vector); } void intel_pipedmc_enable_event(struct intel_crtc *crtc, diff --git a/drivers/gpu/drm/i915/display/intel_dmc_wl.c b/drivers/gpu/drm/i915/display/intel_dmc_wl.c index b3bb89ba34f9..73a3101514f3 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc_wl.c +++ b/drivers/gpu/drm/i915/display/intel_dmc_wl.c @@ -179,11 +179,11 @@ static void intel_dmc_wl_work(struct work_struct *work) if (refcount_read(&wl->refcount)) goto out_unlock; - __intel_de_rmw_nowl(display, DMC_WAKELOCK1_CTL, DMC_WAKELOCK_CTL_REQ, 0); + intel_de_rmw_fw(display, DMC_WAKELOCK1_CTL, DMC_WAKELOCK_CTL_REQ, 0); - if (__intel_de_wait_for_register_atomic_nowl(display, DMC_WAKELOCK1_CTL, - DMC_WAKELOCK_CTL_ACK, 0, - DMC_WAKELOCK_CTL_TIMEOUT_US)) { + if (intel_de_wait_fw_us_atomic(display, DMC_WAKELOCK1_CTL, + DMC_WAKELOCK_CTL_ACK, 0, + DMC_WAKELOCK_CTL_TIMEOUT_US, NULL)) { WARN_RATELIMIT(1, "DMC wakelock release timed out"); goto out_unlock; } @@ -207,17 +207,16 @@ static void __intel_dmc_wl_take(struct intel_display *display) if (wl->taken) return; - __intel_de_rmw_nowl(display, DMC_WAKELOCK1_CTL, 0, - DMC_WAKELOCK_CTL_REQ); + intel_de_rmw_fw(display, DMC_WAKELOCK1_CTL, 0, DMC_WAKELOCK_CTL_REQ); /* * We need to use the atomic variant of the waiting routine * because the DMC wakelock is also taken in atomic context. */ - if (__intel_de_wait_for_register_atomic_nowl(display, DMC_WAKELOCK1_CTL, - DMC_WAKELOCK_CTL_ACK, - DMC_WAKELOCK_CTL_ACK, - DMC_WAKELOCK_CTL_TIMEOUT_US)) { + if (intel_de_wait_fw_us_atomic(display, DMC_WAKELOCK1_CTL, + DMC_WAKELOCK_CTL_ACK, + DMC_WAKELOCK_CTL_ACK, + DMC_WAKELOCK_CTL_TIMEOUT_US, NULL)) { WARN_RATELIMIT(1, "DMC wakelock ack timed out"); return; } @@ -360,7 +359,7 @@ void intel_dmc_wl_enable(struct intel_display *display, u32 dc_state) * wakelock, because we're just enabling it, so call the * non-locking version directly here. */ - __intel_de_rmw_nowl(display, DMC_WAKELOCK_CFG, 0, DMC_WAKELOCK_CFG_ENABLE); + intel_de_rmw_fw(display, DMC_WAKELOCK_CFG, 0, DMC_WAKELOCK_CFG_ENABLE); wl->enabled = true; @@ -402,7 +401,7 @@ void intel_dmc_wl_disable(struct intel_display *display) goto out_unlock; /* Disable wakelock in DMC */ - __intel_de_rmw_nowl(display, DMC_WAKELOCK_CFG, DMC_WAKELOCK_CFG_ENABLE, 0); + intel_de_rmw_fw(display, DMC_WAKELOCK_CFG, DMC_WAKELOCK_CFG_ENABLE, 0); wl->enabled = false; @@ -414,7 +413,7 @@ void intel_dmc_wl_disable(struct intel_display *display) * * TODO: Get the correct expectation from the hardware team. */ - __intel_de_rmw_nowl(display, DMC_WAKELOCK1_CTL, DMC_WAKELOCK_CTL_REQ, 0); + intel_de_rmw_fw(display, DMC_WAKELOCK1_CTL, DMC_WAKELOCK_CTL_REQ, 0); wl->taken = false; diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c index 2e7dbaf511b9..809799f63e32 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c @@ -62,9 +62,9 @@ intel_dp_aux_wait_done(struct intel_dp *intel_dp) u32 status; int ret; - ret = intel_de_wait_custom(display, ch_ctl, DP_AUX_CH_CTL_SEND_BUSY, - 0, - 2, timeout_ms, &status); + ret = intel_de_wait_ms(display, ch_ctl, + DP_AUX_CH_CTL_SEND_BUSY, 0, + timeout_ms, &status); if (ret == -ETIMEDOUT) drm_err(display->drm, diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c index bd757db85927..14ed0ea22dd3 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c @@ -782,9 +782,9 @@ intel_dp_mst_hdcp_stream_encryption(struct intel_connector *connector, return -EINVAL; /* Wait for encryption confirmation */ - if (intel_de_wait(display, HDCP_STATUS(display, cpu_transcoder, port), - stream_enc_status, enable ? stream_enc_status : 0, - HDCP_ENCRYPT_STATUS_CHANGE_TIMEOUT_MS)) { + if (intel_de_wait_ms(display, HDCP_STATUS(display, cpu_transcoder, port), + stream_enc_status, enable ? stream_enc_status : 0, + HDCP_ENCRYPT_STATUS_CHANGE_TIMEOUT_MS, NULL)) { drm_err(display->drm, "Timed out waiting for transcoder: %s stream encryption %s\n", transcoder_name(cpu_transcoder), str_enabled_disabled(enable)); return -ETIMEDOUT; @@ -821,10 +821,10 @@ intel_dp_mst_hdcp2_stream_encryption(struct intel_connector *connector, return ret; /* Wait for encryption confirmation */ - if (intel_de_wait(display, HDCP2_STREAM_STATUS(display, cpu_transcoder, pipe), - STREAM_ENCRYPTION_STATUS, - enable ? STREAM_ENCRYPTION_STATUS : 0, - HDCP_ENCRYPT_STATUS_CHANGE_TIMEOUT_MS)) { + if (intel_de_wait_ms(display, HDCP2_STREAM_STATUS(display, cpu_transcoder, pipe), + STREAM_ENCRYPTION_STATUS, + enable ? STREAM_ENCRYPTION_STATUS : 0, + HDCP_ENCRYPT_STATUS_CHANGE_TIMEOUT_MS, NULL)) { drm_err(display->drm, "Timed out waiting for transcoder: %s stream encryption %s\n", transcoder_name(cpu_transcoder), str_enabled_disabled(enable)); return -ETIMEDOUT; diff --git a/drivers/gpu/drm/i915/display/intel_dpio_phy.c b/drivers/gpu/drm/i915/display/intel_dpio_phy.c index 5df6347a420d..8027bab2951b 100644 --- a/drivers/gpu/drm/i915/display/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/display/intel_dpio_phy.c @@ -390,7 +390,7 @@ static u32 bxt_get_grc(struct intel_display *display, enum dpio_phy phy) static void bxt_phy_wait_grc_done(struct intel_display *display, enum dpio_phy phy) { - if (intel_de_wait_for_set(display, BXT_PORT_REF_DW3(phy), GRC_DONE, 10)) + if (intel_de_wait_for_set_ms(display, BXT_PORT_REF_DW3(phy), GRC_DONE, 10)) drm_err(display->drm, "timeout waiting for PHY%d GRC\n", phy); } @@ -427,7 +427,7 @@ static void _bxt_dpio_phy_init(struct intel_display *display, enum dpio_phy phy) * The flag should get set in 100us according to the HW team, but * use 1ms due to occasional timeouts observed with that. */ - if (intel_de_wait_fw(display, BXT_PORT_CL1CM_DW0(phy), + if (intel_de_wait_ms(display, BXT_PORT_CL1CM_DW0(phy), PHY_RESERVED | PHY_POWER_GOOD, PHY_POWER_GOOD, 1, NULL)) drm_err(display->drm, "timeout during PHY%d power on\n", phy); @@ -1173,6 +1173,7 @@ void vlv_wait_port_ready(struct intel_encoder *encoder, struct intel_display *display = to_intel_display(encoder); u32 port_mask; i915_reg_t dpll_reg; + u32 val; switch (encoder->port) { default: @@ -1193,10 +1194,9 @@ void vlv_wait_port_ready(struct intel_encoder *encoder, break; } - if (intel_de_wait(display, dpll_reg, port_mask, expected_mask, 1000)) + if (intel_de_wait_ms(display, dpll_reg, port_mask, expected_mask, 1000, &val)) drm_WARN(display->drm, 1, "timed out waiting for [ENCODER:%d:%s] port ready: got 0x%x, expected 0x%x\n", encoder->base.base.id, encoder->base.name, - intel_de_read(display, dpll_reg) & port_mask, - expected_mask); + val & port_mask, expected_mask); } diff --git a/drivers/gpu/drm/i915/display/intel_dpll.c b/drivers/gpu/drm/i915/display/intel_dpll.c index 2e1f67be8eda..4f1db8493a2e 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll.c +++ b/drivers/gpu/drm/i915/display/intel_dpll.c @@ -2019,7 +2019,7 @@ static void _vlv_enable_pll(const struct intel_crtc_state *crtc_state) intel_de_posting_read(display, DPLL(display, pipe)); udelay(150); - if (intel_de_wait_for_set(display, DPLL(display, pipe), DPLL_LOCK_VLV, 1)) + if (intel_de_wait_for_set_ms(display, DPLL(display, pipe), DPLL_LOCK_VLV, 1)) drm_err(display->drm, "DPLL %d failed to lock\n", pipe); } @@ -2165,7 +2165,7 @@ static void _chv_enable_pll(const struct intel_crtc_state *crtc_state) intel_de_write(display, DPLL(display, pipe), hw_state->dpll); /* Check PLL is locked */ - if (intel_de_wait_for_set(display, DPLL(display, pipe), DPLL_LOCK_VLV, 1)) + if (intel_de_wait_for_set_ms(display, DPLL(display, pipe), DPLL_LOCK_VLV, 1)) drm_err(display->drm, "PLL %d failed to lock\n", pipe); } diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 92c433f7b7e2..9c7cf03cf022 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -1395,7 +1395,7 @@ static void skl_ddi_pll_enable(struct intel_display *display, /* the enable bit is always bit 31 */ intel_de_rmw(display, regs[id].ctl, 0, LCPLL_PLL_ENABLE); - if (intel_de_wait_for_set(display, DPLL_STATUS, DPLL_LOCK(id), 5)) + if (intel_de_wait_for_set_ms(display, DPLL_STATUS, DPLL_LOCK(id), 5)) drm_err(display->drm, "DPLL %d not locked\n", id); } @@ -2057,9 +2057,9 @@ static void bxt_ddi_pll_enable(struct intel_display *display, intel_de_rmw(display, BXT_PORT_PLL_ENABLE(port), 0, PORT_PLL_POWER_ENABLE); - ret = intel_de_wait_custom(display, BXT_PORT_PLL_ENABLE(port), - PORT_PLL_POWER_STATE, PORT_PLL_POWER_STATE, - 200, 0, NULL); + ret = intel_de_wait_for_set_us(display, + BXT_PORT_PLL_ENABLE(port), + PORT_PLL_POWER_STATE, 200); if (ret) drm_err(display->drm, "Power state not set for PLL:%d\n", port); @@ -2122,9 +2122,8 @@ static void bxt_ddi_pll_enable(struct intel_display *display, intel_de_rmw(display, BXT_PORT_PLL_ENABLE(port), 0, PORT_PLL_ENABLE); intel_de_posting_read(display, BXT_PORT_PLL_ENABLE(port)); - ret = intel_de_wait_custom(display, BXT_PORT_PLL_ENABLE(port), - PORT_PLL_LOCK, PORT_PLL_LOCK, - 200, 0, NULL); + ret = intel_de_wait_for_set_us(display, BXT_PORT_PLL_ENABLE(port), + PORT_PLL_LOCK, 200); if (ret) drm_err(display->drm, "PLL %d not locked\n", port); @@ -2158,9 +2157,9 @@ static void bxt_ddi_pll_disable(struct intel_display *display, intel_de_rmw(display, BXT_PORT_PLL_ENABLE(port), PORT_PLL_POWER_ENABLE, 0); - ret = intel_de_wait_custom(display, BXT_PORT_PLL_ENABLE(port), - PORT_PLL_POWER_STATE, 0, - 200, 0, NULL); + ret = intel_de_wait_for_clear_us(display, + BXT_PORT_PLL_ENABLE(port), + PORT_PLL_POWER_STATE, 200); if (ret) drm_err(display->drm, "Power state not reset for PLL:%d\n", port); @@ -3921,7 +3920,7 @@ static void icl_pll_power_enable(struct intel_display *display, * The spec says we need to "wait" but it also says it should be * immediate. */ - if (intel_de_wait_for_set(display, enable_reg, PLL_POWER_STATE, 1)) + if (intel_de_wait_for_set_ms(display, enable_reg, PLL_POWER_STATE, 1)) drm_err(display->drm, "PLL %d Power not enabled\n", pll->info->id); } @@ -3933,7 +3932,7 @@ static void icl_pll_enable(struct intel_display *display, intel_de_rmw(display, enable_reg, 0, PLL_ENABLE); /* Timeout is actually 600us. */ - if (intel_de_wait_for_set(display, enable_reg, PLL_LOCK, 1)) + if (intel_de_wait_for_set_ms(display, enable_reg, PLL_LOCK, 1)) drm_err(display->drm, "PLL %d not locked\n", pll->info->id); } @@ -4046,7 +4045,7 @@ static void icl_pll_disable(struct intel_display *display, intel_de_rmw(display, enable_reg, PLL_ENABLE, 0); /* Timeout is actually 1us. */ - if (intel_de_wait_for_clear(display, enable_reg, PLL_LOCK, 1)) + if (intel_de_wait_for_clear_ms(display, enable_reg, PLL_LOCK, 1)) drm_err(display->drm, "PLL %d locked\n", pll->info->id); /* DVFS post sequence would be here. See the comment above. */ @@ -4057,7 +4056,7 @@ static void icl_pll_disable(struct intel_display *display, * The spec says we need to "wait" but it also says it should be * immediate. */ - if (intel_de_wait_for_clear(display, enable_reg, PLL_POWER_STATE, 1)) + if (intel_de_wait_for_clear_ms(display, enable_reg, PLL_POWER_STATE, 1)) drm_err(display->drm, "PLL %d Power not disabled\n", pll->info->id); } diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index 31edf57a296f..4b815ce6b1fe 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -171,6 +171,9 @@ static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi, case MIPI_DSI_DCS_LONG_WRITE: ret = mipi_dsi_dcs_write_buffer(dsi_device, data, len); break; + default: + ret = -EINVAL; + break; } if (ret < 0) diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 064c0d3e8177..b34b4961fe1c 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -1326,7 +1326,7 @@ static bool intel_plane_needs_remap(const struct intel_plane_state *plane_state) * unclear in Bspec, for now no checking. */ stride = intel_fb_pitch(fb, 0, rotation); - max_stride = plane->max_stride(plane, fb->base.format->format, + max_stride = plane->max_stride(plane, fb->base.format, fb->base.modifier, rotation); return stride > max_stride; @@ -1972,7 +1972,8 @@ void intel_add_fb_offsets(int *x, int *y, static u32 intel_fb_max_stride(struct intel_display *display, - u32 pixel_format, u64 modifier) + const struct drm_format_info *info, + u64 modifier) { /* * Arbitrary limit for gen4+ chosen to match the @@ -1982,7 +1983,7 @@ u32 intel_fb_max_stride(struct intel_display *display, */ if (DISPLAY_VER(display) < 4 || intel_fb_is_ccs_modifier(modifier) || intel_fb_modifier_uses_dpt(display, modifier)) - return intel_plane_fb_max_stride(display->drm, pixel_format, modifier); + return intel_plane_fb_max_stride(display, info, modifier); else if (DISPLAY_VER(display) >= 7) return 256 * 1024; else @@ -1996,8 +1997,8 @@ intel_fb_stride_alignment(const struct drm_framebuffer *fb, int color_plane) unsigned int tile_width; if (is_surface_linear(fb, color_plane)) { - unsigned int max_stride = intel_plane_fb_max_stride(display->drm, - fb->format->format, + unsigned int max_stride = intel_plane_fb_max_stride(display, + fb->format, fb->modifier); /* @@ -2055,7 +2056,7 @@ static int intel_plane_check_stride(const struct intel_plane_state *plane_state) /* FIXME other color planes? */ stride = plane_state->view.color_plane[0].mapping_stride; - max_stride = plane->max_stride(plane, fb->format->format, + max_stride = plane->max_stride(plane, fb->format, fb->modifier, rotation); if (stride > max_stride) { @@ -2194,7 +2195,6 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb, return ret; flush: - intel_bo_flush_if_display(obj); intel_frontbuffer_flush(front, ORIGIN_DIRTYFB); return ret; } @@ -2234,24 +2234,24 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (ret) goto err_frontbuffer_put; - ret = -EINVAL; if (!drm_any_plane_has_format(display->drm, mode_cmd->pixel_format, mode_cmd->modifier[0])) { drm_dbg_kms(display->drm, "unsupported pixel format %p4cc / modifier 0x%llx\n", &mode_cmd->pixel_format, mode_cmd->modifier[0]); + ret = -EINVAL; goto err_bo_framebuffer_fini; } - max_stride = intel_fb_max_stride(display, mode_cmd->pixel_format, - mode_cmd->modifier[0]); + max_stride = intel_fb_max_stride(display, info, mode_cmd->modifier[0]); if (mode_cmd->pitches[0] > max_stride) { drm_dbg_kms(display->drm, "%s pitch (%u) must be at most %d\n", mode_cmd->modifier[0] != DRM_FORMAT_MOD_LINEAR ? "tiled" : "linear", mode_cmd->pitches[0], max_stride); + ret = -EINVAL; goto err_bo_framebuffer_fini; } @@ -2260,6 +2260,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(display->drm, "plane 0 offset (0x%08x) must be 0\n", mode_cmd->offsets[0]); + ret = -EINVAL; goto err_bo_framebuffer_fini; } @@ -2270,6 +2271,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (mode_cmd->handles[i] != mode_cmd->handles[0]) { drm_dbg_kms(display->drm, "bad plane %d handle\n", i); + ret = -EINVAL; goto err_bo_framebuffer_fini; } @@ -2278,6 +2280,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(display->drm, "plane %d pitch (%d) must be at least %u byte aligned\n", i, fb->pitches[i], stride_alignment); + ret = -EINVAL; goto err_bo_framebuffer_fini; } @@ -2288,6 +2291,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(display->drm, "ccs aux plane %d pitch (%d) must be %d\n", i, fb->pitches[i], ccs_aux_stride); + ret = -EINVAL; goto err_bo_framebuffer_fini; } } diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index a1e3083022ee..437d2fda20a7 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -328,8 +328,8 @@ static void i8xx_fbc_deactivate(struct intel_fbc *fbc) intel_de_write(display, FBC_CONTROL, fbc_ctl); /* Wait for compressing bit to clear */ - if (intel_de_wait_for_clear(display, FBC_STATUS, - FBC_STAT_COMPRESSING, 10)) { + if (intel_de_wait_for_clear_ms(display, FBC_STATUS, + FBC_STAT_COMPRESSING, 10)) { drm_dbg_kms(display->drm, "FBC idle timed out\n"); return; } diff --git a/drivers/gpu/drm/i915/display/intel_flipq.c b/drivers/gpu/drm/i915/display/intel_flipq.c index f162614a925d..1e9550cb66a3 100644 --- a/drivers/gpu/drm/i915/display/intel_flipq.c +++ b/drivers/gpu/drm/i915/display/intel_flipq.c @@ -163,10 +163,10 @@ static void intel_flipq_preempt(struct intel_crtc *crtc, bool preempt) PIPEDMC_FQ_CTRL_PREEMPT, preempt ? PIPEDMC_FQ_CTRL_PREEMPT : 0); if (preempt && - intel_de_wait_for_clear(display, - PIPEDMC_FQ_STATUS(crtc->pipe), - PIPEDMC_FQ_STATUS_BUSY, - intel_flipq_preempt_timeout_ms(display))) + intel_de_wait_for_clear_ms(display, + PIPEDMC_FQ_STATUS(crtc->pipe), + PIPEDMC_FQ_STATUS_BUSY, + intel_flipq_preempt_timeout_ms(display))) drm_err(display->drm, "[CRTC:%d:%s] flip queue preempt timeout\n", crtc->base.base.id, crtc->base.name); } diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c index c88bef22d366..03c4978fa5ec 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c @@ -58,8 +58,6 @@ #include <drm/drm_gem.h> #include <drm/drm_print.h> -#include "i915_active.h" -#include "i915_vma.h" #include "intel_bo.h" #include "intel_display_trace.h" #include "intel_display_types.h" @@ -104,51 +102,6 @@ static void frontbuffer_flush(struct intel_display *display, } /** - * intel_frontbuffer_flip_prepare - prepare asynchronous frontbuffer flip - * @display: display device - * @frontbuffer_bits: frontbuffer plane tracking bits - * - * This function gets called after scheduling a flip on @obj. The actual - * frontbuffer flushing will be delayed until completion is signalled with - * intel_frontbuffer_flip_complete. If an invalidate happens in between this - * flush will be cancelled. - * - * Can be called without any locks held. - */ -void intel_frontbuffer_flip_prepare(struct intel_display *display, - unsigned frontbuffer_bits) -{ - spin_lock(&display->fb_tracking.lock); - display->fb_tracking.flip_bits |= frontbuffer_bits; - /* Remove stale busy bits due to the old buffer. */ - display->fb_tracking.busy_bits &= ~frontbuffer_bits; - spin_unlock(&display->fb_tracking.lock); -} - -/** - * intel_frontbuffer_flip_complete - complete asynchronous frontbuffer flip - * @display: display device - * @frontbuffer_bits: frontbuffer plane tracking bits - * - * This function gets called after the flip has been latched and will complete - * on the next vblank. It will execute the flush if it hasn't been cancelled yet. - * - * Can be called without any locks held. - */ -void intel_frontbuffer_flip_complete(struct intel_display *display, - unsigned frontbuffer_bits) -{ - spin_lock(&display->fb_tracking.lock); - /* Mask any cancelled flips. */ - frontbuffer_bits &= display->fb_tracking.flip_bits; - display->fb_tracking.flip_bits &= ~frontbuffer_bits; - spin_unlock(&display->fb_tracking.lock); - - if (frontbuffer_bits) - frontbuffer_flush(display, frontbuffer_bits, ORIGIN_FLIP); -} - -/** * intel_frontbuffer_flip - synchronous frontbuffer flip * @display: display device * @frontbuffer_bits: frontbuffer plane tracking bits @@ -174,12 +127,11 @@ void __intel_fb_invalidate(struct intel_frontbuffer *front, enum fb_op_origin origin, unsigned int frontbuffer_bits) { - struct intel_display *display = to_intel_display(front->obj->dev); + struct intel_display *display = front->display; if (origin == ORIGIN_CS) { spin_lock(&display->fb_tracking.lock); display->fb_tracking.busy_bits |= frontbuffer_bits; - display->fb_tracking.flip_bits &= ~frontbuffer_bits; spin_unlock(&display->fb_tracking.lock); } @@ -195,7 +147,10 @@ void __intel_fb_flush(struct intel_frontbuffer *front, enum fb_op_origin origin, unsigned int frontbuffer_bits) { - struct intel_display *display = to_intel_display(front->obj->dev); + struct intel_display *display = front->display; + + if (origin == ORIGIN_DIRTYFB) + intel_bo_frontbuffer_flush_for_display(front); if (origin == ORIGIN_CS) { spin_lock(&display->fb_tracking.lock); @@ -209,12 +164,16 @@ void __intel_fb_flush(struct intel_frontbuffer *front, frontbuffer_flush(display, frontbuffer_bits, origin); } +static void intel_frontbuffer_ref(struct intel_frontbuffer *front) +{ + intel_bo_frontbuffer_ref(front); +} + static void intel_frontbuffer_flush_work(struct work_struct *work) { struct intel_frontbuffer *front = container_of(work, struct intel_frontbuffer, flush_work); - intel_bo_flush_if_display(front->obj); intel_frontbuffer_flush(front, ORIGIN_DIRTYFB); intel_frontbuffer_put(front); } @@ -231,93 +190,31 @@ void intel_frontbuffer_queue_flush(struct intel_frontbuffer *front) if (!front) return; - kref_get(&front->ref); + intel_frontbuffer_ref(front); if (!schedule_work(&front->flush_work)) intel_frontbuffer_put(front); } -static int frontbuffer_active(struct i915_active *ref) +void intel_frontbuffer_init(struct intel_frontbuffer *front, struct drm_device *drm) { - struct intel_frontbuffer *front = - container_of(ref, typeof(*front), write); - - kref_get(&front->ref); - return 0; + front->display = to_intel_display(drm); + atomic_set(&front->bits, 0); + INIT_WORK(&front->flush_work, intel_frontbuffer_flush_work); } -static void frontbuffer_retire(struct i915_active *ref) +void intel_frontbuffer_fini(struct intel_frontbuffer *front) { - struct intel_frontbuffer *front = - container_of(ref, typeof(*front), write); - - intel_frontbuffer_flush(front, ORIGIN_CS); - intel_frontbuffer_put(front); + drm_WARN_ON(front->display->drm, atomic_read(&front->bits)); } -static void frontbuffer_release(struct kref *ref) - __releases(&to_intel_display(front->obj->dev)->fb_tracking.lock) +struct intel_frontbuffer *intel_frontbuffer_get(struct drm_gem_object *obj) { - struct intel_frontbuffer *ret, *front = - container_of(ref, typeof(*front), ref); - struct drm_gem_object *obj = front->obj; - struct intel_display *display = to_intel_display(obj->dev); - - drm_WARN_ON(display->drm, atomic_read(&front->bits)); - - i915_ggtt_clear_scanout(to_intel_bo(obj)); - - ret = intel_bo_set_frontbuffer(obj, NULL); - drm_WARN_ON(display->drm, ret); - spin_unlock(&display->fb_tracking.lock); - - i915_active_fini(&front->write); - - drm_gem_object_put(obj); - kfree_rcu(front, rcu); -} - -struct intel_frontbuffer * -intel_frontbuffer_get(struct drm_gem_object *obj) -{ - struct intel_display *display = to_intel_display(obj->dev); - struct intel_frontbuffer *front, *cur; - - front = intel_bo_get_frontbuffer(obj); - if (front) - return front; - - front = kmalloc(sizeof(*front), GFP_KERNEL); - if (!front) - return NULL; - - drm_gem_object_get(obj); - - front->obj = obj; - kref_init(&front->ref); - atomic_set(&front->bits, 0); - i915_active_init(&front->write, - frontbuffer_active, - frontbuffer_retire, - I915_ACTIVE_RETIRE_SLEEPS); - INIT_WORK(&front->flush_work, intel_frontbuffer_flush_work); - - spin_lock(&display->fb_tracking.lock); - cur = intel_bo_set_frontbuffer(obj, front); - spin_unlock(&display->fb_tracking.lock); - - if (cur != front) { - drm_gem_object_put(obj); - kfree(front); - } - - return cur; + return intel_bo_frontbuffer_get(obj); } void intel_frontbuffer_put(struct intel_frontbuffer *front) { - kref_put_lock(&front->ref, - frontbuffer_release, - &to_intel_display(front->obj->dev)->fb_tracking.lock); + intel_bo_frontbuffer_put(front); } /** @@ -346,17 +243,13 @@ void intel_frontbuffer_track(struct intel_frontbuffer *old, BUILD_BUG_ON(I915_MAX_PLANES > INTEL_FRONTBUFFER_BITS_PER_PIPE); if (old) { - struct intel_display *display = to_intel_display(old->obj->dev); - - drm_WARN_ON(display->drm, + drm_WARN_ON(old->display->drm, !(atomic_read(&old->bits) & frontbuffer_bits)); atomic_andnot(frontbuffer_bits, &old->bits); } if (new) { - struct intel_display *display = to_intel_display(new->obj->dev); - - drm_WARN_ON(display->drm, + drm_WARN_ON(new->display->drm, atomic_read(&new->bits) & frontbuffer_bits); atomic_or(frontbuffer_bits, &new->bits); } diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.h b/drivers/gpu/drm/i915/display/intel_frontbuffer.h index 2fee12eaf9b6..22677acb4c06 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.h +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.h @@ -26,10 +26,9 @@ #include <linux/atomic.h> #include <linux/bits.h> -#include <linux/kref.h> - -#include "i915_active_types.h" +#include <linux/workqueue_types.h> +struct drm_device; struct drm_gem_object; struct intel_display; @@ -42,12 +41,8 @@ enum fb_op_origin { }; struct intel_frontbuffer { - struct kref ref; + struct intel_display *display; atomic_t bits; - struct i915_active write; - struct drm_gem_object *obj; - struct rcu_head rcu; - struct work_struct flush_work; }; @@ -68,10 +63,6 @@ struct intel_frontbuffer { GENMASK(INTEL_FRONTBUFFER_BITS_PER_PIPE * ((pipe) + 1) - 1, \ INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)) -void intel_frontbuffer_flip_prepare(struct intel_display *display, - unsigned frontbuffer_bits); -void intel_frontbuffer_flip_complete(struct intel_display *display, - unsigned frontbuffer_bits); void intel_frontbuffer_flip(struct intel_display *display, unsigned frontbuffer_bits); @@ -144,4 +135,7 @@ void intel_frontbuffer_track(struct intel_frontbuffer *old, struct intel_frontbuffer *new, unsigned int frontbuffer_bits); +void intel_frontbuffer_init(struct intel_frontbuffer *front, struct drm_device *drm); +void intel_frontbuffer_fini(struct intel_frontbuffer *front); + #endif /* __INTEL_FRONTBUFFER_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index 82f3a40ecac7..795012d7c24c 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -449,7 +449,7 @@ gmbus_wait_idle(struct intel_display *display) add_wait_queue(&display->gmbus.wait_queue, &wait); intel_de_write_fw(display, GMBUS4(display), irq_enable); - ret = intel_de_wait_fw(display, GMBUS2(display), GMBUS_ACTIVE, 0, 10, NULL); + ret = intel_de_wait_fw_ms(display, GMBUS2(display), GMBUS_ACTIVE, 0, 10, NULL); intel_de_write_fw(display, GMBUS4(display), 0); remove_wait_queue(&display->gmbus.wait_queue, &wait); diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index 7195e8cf671c..5e1a96223a9c 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -410,9 +410,8 @@ static int intel_hdcp_load_keys(struct intel_display *display) } /* Wait for the keys to load (500us) */ - ret = intel_de_wait_custom(display, HDCP_KEY_STATUS, - HDCP_KEY_LOAD_DONE, HDCP_KEY_LOAD_DONE, - 10, 1, &val); + ret = intel_de_wait_ms(display, HDCP_KEY_STATUS, HDCP_KEY_LOAD_DONE, + HDCP_KEY_LOAD_DONE, 1, &val); if (ret) return ret; else if (!(val & HDCP_KEY_LOAD_STATUS)) @@ -428,7 +427,7 @@ static int intel_hdcp_load_keys(struct intel_display *display) static int intel_write_sha_text(struct intel_display *display, u32 sha_text) { intel_de_write(display, HDCP_SHA_TEXT, sha_text); - if (intel_de_wait_for_set(display, HDCP_REP_CTL, HDCP_SHA1_READY, 1)) { + if (intel_de_wait_for_set_ms(display, HDCP_REP_CTL, HDCP_SHA1_READY, 1)) { drm_err(display->drm, "Timed out waiting for SHA1 ready\n"); return -ETIMEDOUT; } @@ -707,8 +706,8 @@ int intel_hdcp_validate_v_prime(struct intel_connector *connector, /* Tell the HW we're done with the hash and wait for it to ACK */ intel_de_write(display, HDCP_REP_CTL, rep_ctl | HDCP_SHA1_COMPLETE_HASH); - if (intel_de_wait_for_set(display, HDCP_REP_CTL, - HDCP_SHA1_COMPLETE, 1)) { + if (intel_de_wait_for_set_ms(display, HDCP_REP_CTL, + HDCP_SHA1_COMPLETE, 1)) { drm_err(display->drm, "Timed out waiting for SHA1 complete\n"); return -ETIMEDOUT; } @@ -856,9 +855,9 @@ static int intel_hdcp_auth(struct intel_connector *connector) HDCP_CONF_CAPTURE_AN); /* Wait for An to be acquired */ - if (intel_de_wait_for_set(display, - HDCP_STATUS(display, cpu_transcoder, port), - HDCP_STATUS_AN_READY, 1)) { + if (intel_de_wait_for_set_ms(display, + HDCP_STATUS(display, cpu_transcoder, port), + HDCP_STATUS_AN_READY, 1)) { drm_err(display->drm, "Timed out waiting for An\n"); return -ETIMEDOUT; } @@ -953,10 +952,10 @@ static int intel_hdcp_auth(struct intel_connector *connector) } /* Wait for encryption confirmation */ - if (intel_de_wait_for_set(display, - HDCP_STATUS(display, cpu_transcoder, port), - HDCP_STATUS_ENC, - HDCP_ENCRYPT_STATUS_CHANGE_TIMEOUT_MS)) { + if (intel_de_wait_for_set_ms(display, + HDCP_STATUS(display, cpu_transcoder, port), + HDCP_STATUS_ENC, + HDCP_ENCRYPT_STATUS_CHANGE_TIMEOUT_MS)) { drm_err(display->drm, "Timed out waiting for encryption\n"); return -ETIMEDOUT; } @@ -1013,9 +1012,9 @@ static int _intel_hdcp_disable(struct intel_connector *connector) hdcp->hdcp_encrypted = false; intel_de_write(display, HDCP_CONF(display, cpu_transcoder, port), 0); - if (intel_de_wait_for_clear(display, - HDCP_STATUS(display, cpu_transcoder, port), - ~0, HDCP_ENCRYPT_STATUS_CHANGE_TIMEOUT_MS)) { + if (intel_de_wait_for_clear_ms(display, + HDCP_STATUS(display, cpu_transcoder, port), + ~0, HDCP_ENCRYPT_STATUS_CHANGE_TIMEOUT_MS)) { drm_err(display->drm, "Failed to disable HDCP, timeout clearing status\n"); return -ETIMEDOUT; @@ -1940,11 +1939,10 @@ static int hdcp2_enable_encryption(struct intel_connector *connector) intel_de_rmw(display, HDCP2_CTL(display, cpu_transcoder, port), 0, CTL_LINK_ENCRYPTION_REQ); - ret = intel_de_wait_for_set(display, - HDCP2_STATUS(display, cpu_transcoder, - port), - LINK_ENCRYPTION_STATUS, - HDCP_ENCRYPT_STATUS_CHANGE_TIMEOUT_MS); + ret = intel_de_wait_for_set_ms(display, + HDCP2_STATUS(display, cpu_transcoder, port), + LINK_ENCRYPTION_STATUS, + HDCP_ENCRYPT_STATUS_CHANGE_TIMEOUT_MS); dig_port->hdcp.auth_status = true; return ret; @@ -1966,11 +1964,10 @@ static int hdcp2_disable_encryption(struct intel_connector *connector) intel_de_rmw(display, HDCP2_CTL(display, cpu_transcoder, port), CTL_LINK_ENCRYPTION_REQ, 0); - ret = intel_de_wait_for_clear(display, - HDCP2_STATUS(display, cpu_transcoder, - port), - LINK_ENCRYPTION_STATUS, - HDCP_ENCRYPT_STATUS_CHANGE_TIMEOUT_MS); + ret = intel_de_wait_for_clear_ms(display, + HDCP2_STATUS(display, cpu_transcoder, port), + LINK_ENCRYPTION_STATUS, + HDCP_ENCRYPT_STATUS_CHANGE_TIMEOUT_MS); if (ret == -ETIMEDOUT) drm_dbg_kms(display->drm, "Disable Encryption Timedout"); diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 5c637341b210..908faf17f93d 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -1598,8 +1598,8 @@ bool intel_hdmi_hdcp_check_link_once(struct intel_digital_port *dig_port, intel_de_write(display, HDCP_RPRIME(display, cpu_transcoder, port), ri.reg); /* Wait for Ri prime match */ - ret = intel_de_wait_for_set(display, HDCP_STATUS(display, cpu_transcoder, port), - HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC, 1); + ret = intel_de_wait_for_set_ms(display, HDCP_STATUS(display, cpu_transcoder, port), + HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC, 1); if (ret) { drm_dbg_kms(display->drm, "Ri' mismatch detected (%x)\n", intel_de_read(display, HDCP_STATUS(display, cpu_transcoder, diff --git a/drivers/gpu/drm/i915/display/intel_lt_phy.c b/drivers/gpu/drm/i915/display/intel_lt_phy.c index af48d6cde226..a67eb4f7f897 100644 --- a/drivers/gpu/drm/i915/display/intel_lt_phy.c +++ b/drivers/gpu/drm/i915/display/intel_lt_phy.c @@ -6,7 +6,6 @@ #include <drm/drm_print.h> #include "i915_reg.h" -#include "i915_utils.h" #include "intel_cx0_phy.h" #include "intel_cx0_phy_regs.h" #include "intel_ddi.h" @@ -14,6 +13,7 @@ #include "intel_de.h" #include "intel_display.h" #include "intel_display_types.h" +#include "intel_display_utils.h" #include "intel_dpll_mgr.h" #include "intel_hdmi.h" #include "intel_lt_phy.h" @@ -31,6 +31,32 @@ #define INTEL_LT_PHY_BOTH_LANES (INTEL_LT_PHY_LANE1 |\ INTEL_LT_PHY_LANE0) #define MODE_DP 3 +#define Q32_TO_INT(x) ((x) >> 32) +#define Q32_TO_FRAC(x) ((x) & 0xFFFFFFFF) +#define DCO_MIN_FREQ_MHZ 11850 +#define REF_CLK_KHZ 38400 +#define TDC_RES_MULTIPLIER 10000000ULL + +struct phy_param_t { + u32 val; + u32 addr; +}; + +struct lt_phy_params { + struct phy_param_t pll_reg4; + struct phy_param_t pll_reg3; + struct phy_param_t pll_reg5; + struct phy_param_t pll_reg57; + struct phy_param_t lf; + struct phy_param_t tdc; + struct phy_param_t ssc; + struct phy_param_t bias2; + struct phy_param_t bias_trim; + struct phy_param_t dco_med; + struct phy_param_t dco_fine; + struct phy_param_t ssc_inj; + struct phy_param_t surv_bonus; +}; static const struct intel_lt_phy_pll_state xe3plpd_lt_dp_rbr = { .clock = 162000, @@ -1041,9 +1067,9 @@ static int __intel_lt_phy_p2p_write_once(struct intel_encoder *encoder, int ack; u32 val; - if (intel_de_wait_for_clear(display, XELPDP_PORT_M2P_MSGBUS_CTL(display, port, lane), - XELPDP_PORT_P2P_TRANSACTION_PENDING, - XELPDP_MSGBUS_TIMEOUT_SLOW)) { + if (intel_de_wait_for_clear_ms(display, XELPDP_PORT_M2P_MSGBUS_CTL(display, port, lane), + XELPDP_PORT_P2P_TRANSACTION_PENDING, + XELPDP_MSGBUS_TIMEOUT_MS)) { drm_dbg_kms(display->drm, "PHY %c Timeout waiting for previous transaction to complete. Resetting bus.\n", phy_name(phy)); @@ -1175,13 +1201,11 @@ intel_lt_phy_lane_reset(struct intel_encoder *encoder, XELPDP_LANE_PCLK_PLL_REQUEST(0), XELPDP_LANE_PCLK_PLL_REQUEST(0)); - if (intel_de_wait_custom(display, XELPDP_PORT_CLOCK_CTL(display, port), - XELPDP_LANE_PCLK_PLL_ACK(0), - XELPDP_LANE_PCLK_PLL_ACK(0), - XE3PLPD_MACCLK_TURNON_LATENCY_US, - XE3PLPD_MACCLK_TURNON_LATENCY_MS, NULL)) - drm_warn(display->drm, "PHY %c PLL MacCLK assertion Ack not done after %dus.\n", - phy_name(phy), XE3PLPD_MACCLK_TURNON_LATENCY_MS * 1000); + if (intel_de_wait_for_set_ms(display, XELPDP_PORT_CLOCK_CTL(display, port), + XELPDP_LANE_PCLK_PLL_ACK(0), + XE3PLPD_MACCLK_TURNON_LATENCY_MS)) + drm_warn(display->drm, "PHY %c PLL MacCLK assertion ack not done\n", + phy_name(phy)); intel_de_rmw(display, XELPDP_PORT_CLOCK_CTL(display, port), XELPDP_FORWARD_CLOCK_UNGATE, @@ -1190,18 +1214,17 @@ intel_lt_phy_lane_reset(struct intel_encoder *encoder, intel_de_rmw(display, XELPDP_PORT_BUF_CTL2(display, port), lane_pipe_reset | lane_phy_pulse_status, 0); - if (intel_de_wait_custom(display, XELPDP_PORT_BUF_CTL2(display, port), - lane_phy_current_status, 0, - XE3PLPD_RESET_END_LATENCY_US, 2, NULL)) - drm_warn(display->drm, - "PHY %c failed to bring out of Lane reset after %dus.\n", - phy_name(phy), XE3PLPD_RESET_END_LATENCY_US); + if (intel_de_wait_for_clear_ms(display, XELPDP_PORT_BUF_CTL2(display, port), + lane_phy_current_status, + XE3PLPD_RESET_END_LATENCY_MS)) + drm_warn(display->drm, "PHY %c failed to bring out of lane reset\n", + phy_name(phy)); - if (intel_de_wait_custom(display, XELPDP_PORT_BUF_CTL2(display, port), - lane_phy_pulse_status, lane_phy_pulse_status, - XE3PLPD_RATE_CALIB_DONE_LATENCY_US, 0, NULL)) - drm_warn(display->drm, "PHY %c PLL rate not changed after %dus.\n", - phy_name(phy), XE3PLPD_RATE_CALIB_DONE_LATENCY_US); + if (intel_de_wait_for_set_ms(display, XELPDP_PORT_BUF_CTL2(display, port), + lane_phy_pulse_status, + XE3PLPD_RATE_CALIB_DONE_LATENCY_MS)) + drm_warn(display->drm, "PHY %c PLL rate not changed\n", + phy_name(phy)); intel_de_rmw(display, XELPDP_PORT_BUF_CTL2(display, port), lane_phy_pulse_status, 0); } @@ -1356,10 +1379,308 @@ intel_lt_phy_pll_is_ssc_enabled(struct intel_crtc_state *crtc_state, return false; } +static u64 mul_q32_u32(u64 a_q32, u32 b) +{ + u64 p0, p1, carry, result; + u64 x_hi = a_q32 >> 32; + u64 x_lo = a_q32 & 0xFFFFFFFFULL; + + p0 = x_lo * (u64)b; + p1 = x_hi * (u64)b; + carry = p0 >> 32; + result = (p1 << 32) + (carry << 32) + (p0 & 0xFFFFFFFFULL); + + return result; +} + +static bool +calculate_target_dco_and_loop_cnt(u32 frequency_khz, u64 *target_dco_mhz, u32 *loop_cnt) +{ + u32 ppm_value = 1; + u32 dco_min_freq = DCO_MIN_FREQ_MHZ; + u32 dco_max_freq = 16200; + u32 dco_min_freq_low = 10000; + u32 dco_max_freq_low = 12000; + u64 val = 0; + u64 refclk_khz = REF_CLK_KHZ; + u64 m2div = 0; + u64 val_with_frac = 0; + u64 ppm = 0; + u64 temp0 = 0, temp1, scale; + int ppm_cnt, dco_count, y; + + for (ppm_cnt = 0; ppm_cnt < 5; ppm_cnt++) { + ppm_value = ppm_cnt == 2 ? 2 : 1; + for (dco_count = 0; dco_count < 2; dco_count++) { + if (dco_count == 1) { + dco_min_freq = dco_min_freq_low; + dco_max_freq = dco_max_freq_low; + } + for (y = 2; y <= 255; y += 2) { + val = div64_u64((u64)y * frequency_khz, 200); + m2div = div64_u64(((u64)(val) << 32), refclk_khz); + m2div = mul_q32_u32(m2div, 500); + val_with_frac = mul_q32_u32(m2div, refclk_khz); + val_with_frac = div64_u64(val_with_frac, 500); + temp1 = Q32_TO_INT(val_with_frac); + temp0 = (temp1 > val) ? (temp1 - val) : + (val - temp1); + ppm = div64_u64(temp0, val); + if (temp1 >= dco_min_freq && + temp1 <= dco_max_freq && + ppm < ppm_value) { + /* Round to two places */ + scale = (1ULL << 32) / 100; + temp0 = DIV_ROUND_UP_ULL(val_with_frac, + scale); + *target_dco_mhz = temp0 * scale; + *loop_cnt = y; + return true; + } + } + } + } + + return false; +} + +static void set_phy_vdr_addresses(struct lt_phy_params *p, int pll_type) +{ + p->pll_reg4.addr = PLL_REG_ADDR(PLL_REG4_ADDR, pll_type); + p->pll_reg3.addr = PLL_REG_ADDR(PLL_REG3_ADDR, pll_type); + p->pll_reg5.addr = PLL_REG_ADDR(PLL_REG5_ADDR, pll_type); + p->pll_reg57.addr = PLL_REG_ADDR(PLL_REG57_ADDR, pll_type); + p->lf.addr = PLL_REG_ADDR(PLL_LF_ADDR, pll_type); + p->tdc.addr = PLL_REG_ADDR(PLL_TDC_ADDR, pll_type); + p->ssc.addr = PLL_REG_ADDR(PLL_SSC_ADDR, pll_type); + p->bias2.addr = PLL_REG_ADDR(PLL_BIAS2_ADDR, pll_type); + p->bias_trim.addr = PLL_REG_ADDR(PLL_BIAS_TRIM_ADDR, pll_type); + p->dco_med.addr = PLL_REG_ADDR(PLL_DCO_MED_ADDR, pll_type); + p->dco_fine.addr = PLL_REG_ADDR(PLL_DCO_FINE_ADDR, pll_type); + p->ssc_inj.addr = PLL_REG_ADDR(PLL_SSC_INJ_ADDR, pll_type); + p->surv_bonus.addr = PLL_REG_ADDR(PLL_SURV_BONUS_ADDR, pll_type); +} + +static void compute_ssc(struct lt_phy_params *p, u32 ana_cfg) +{ + int ssc_stepsize = 0; + int ssc_steplen = 0; + int ssc_steplog = 0; + + p->ssc.val = (1 << 31) | (ana_cfg << 24) | (ssc_steplog << 16) | + (ssc_stepsize << 8) | ssc_steplen; +} + +static void compute_bias2(struct lt_phy_params *p) +{ + u32 ssc_en_local = 0; + u64 dynctrl_ovrd_en = 0; + + p->bias2.val = (dynctrl_ovrd_en << 31) | (ssc_en_local << 30) | + (1 << 23) | (1 << 24) | (32 << 16) | (1 << 8); +} + +static void compute_tdc(struct lt_phy_params *p, u64 tdc_fine) +{ + u32 settling_time = 15; + u32 bias_ovr_en = 1; + u32 coldstart = 1; + u32 true_lock = 2; + u32 early_lock = 1; + u32 lock_ovr_en = 1; + u32 lock_thr = tdc_fine ? 3 : 5; + u32 unlock_thr = tdc_fine ? 5 : 11; + + p->tdc.val = (u32)((2 << 30) + (settling_time << 16) + (bias_ovr_en << 15) + + (lock_ovr_en << 14) + (coldstart << 12) + (true_lock << 10) + + (early_lock << 8) + (unlock_thr << 4) + lock_thr); +} + +static void compute_dco_med(struct lt_phy_params *p) +{ + u32 cselmed_en = 0; + u32 cselmed_dyn_adj = 0; + u32 cselmed_ratio = 39; + u32 cselmed_thr = 8; + + p->dco_med.val = (cselmed_en << 31) + (cselmed_dyn_adj << 30) + + (cselmed_ratio << 24) + (cselmed_thr << 21); +} + +static void compute_dco_fine(struct lt_phy_params *p, u32 dco_12g) +{ + u32 dco_fine0_tune_2_0 = 0; + u32 dco_fine1_tune_2_0 = 0; + u32 dco_fine2_tune_2_0 = 0; + u32 dco_fine3_tune_2_0 = 0; + u32 dco_dith0_tune_2_0 = 0; + u32 dco_dith1_tune_2_0 = 0; + + dco_fine0_tune_2_0 = dco_12g ? 4 : 3; + dco_fine1_tune_2_0 = 2; + dco_fine2_tune_2_0 = dco_12g ? 2 : 1; + dco_fine3_tune_2_0 = 5; + dco_dith0_tune_2_0 = dco_12g ? 4 : 3; + dco_dith1_tune_2_0 = 2; + + p->dco_fine.val = (dco_dith1_tune_2_0 << 19) + + (dco_dith0_tune_2_0 << 16) + + (dco_fine3_tune_2_0 << 11) + + (dco_fine2_tune_2_0 << 8) + + (dco_fine1_tune_2_0 << 3) + + dco_fine0_tune_2_0; +} + +int +intel_lt_phy_calculate_hdmi_state(struct intel_lt_phy_pll_state *lt_state, + u32 frequency_khz) +{ +#define DATA_ASSIGN(i, pll_reg) \ + do { \ + lt_state->data[i][0] = (u8)((((pll_reg).val) & 0xFF000000) >> 24); \ + lt_state->data[i][1] = (u8)((((pll_reg).val) & 0x00FF0000) >> 16); \ + lt_state->data[i][2] = (u8)((((pll_reg).val) & 0x0000FF00) >> 8); \ + lt_state->data[i][3] = (u8)((((pll_reg).val) & 0x000000FF)); \ + } while (0) +#define ADDR_ASSIGN(i, pll_reg) \ + do { \ + lt_state->addr_msb[i] = ((pll_reg).addr >> 8) & 0xFF; \ + lt_state->addr_lsb[i] = (pll_reg).addr & 0xFF; \ + } while (0) + + bool found = false; + struct lt_phy_params p; + u32 dco_fmin = DCO_MIN_FREQ_MHZ; + u64 refclk_khz = REF_CLK_KHZ; + u32 refclk_mhz_int = REF_CLK_KHZ / 1000; + u64 m2div = 0; + u64 target_dco_mhz = 0; + u64 tdc_fine, tdc_targetcnt; + u64 feedfwd_gain ,feedfwd_cal_en; + u64 tdc_res = 30; + u32 prop_coeff; + u32 int_coeff; + u32 ndiv = 1; + u32 m1div = 1, m2div_int, m2div_frac; + u32 frac_en; + u32 ana_cfg; + u32 loop_cnt = 0; + u32 gain_ctrl = 2; + u32 postdiv = 0; + u32 dco_12g = 0; + u32 pll_type = 0; + u32 d1 = 2, d3 = 5, d4 = 0, d5 = 0; + u32 d6 = 0, d6_new = 0; + u32 d7, d8 = 0; + u32 bonus_7_0 = 0; + u32 csel2fo = 11; + u32 csel2fo_ovrd_en = 1; + u64 temp0, temp1, temp2, temp3; + + p.surv_bonus.val = (bonus_7_0 << 16); + p.pll_reg4.val = (refclk_mhz_int << 17) + + (ndiv << 9) + (1 << 4); + p.bias_trim.val = (csel2fo_ovrd_en << 30) + (csel2fo << 24); + p.ssc_inj.val = 0; + found = calculate_target_dco_and_loop_cnt(frequency_khz, &target_dco_mhz, &loop_cnt); + if (!found) + return -EINVAL; + + m2div = div64_u64(target_dco_mhz, (refclk_khz * ndiv * m1div)); + m2div = mul_q32_u32(m2div, 1000); + if (Q32_TO_INT(m2div) > 511) + return -EINVAL; + + m2div_int = (u32)Q32_TO_INT(m2div); + m2div_frac = (u32)(Q32_TO_FRAC(m2div)); + frac_en = (m2div_frac > 0) ? 1 : 0; + + if (frac_en > 0) + tdc_res = 70; + else + tdc_res = 36; + tdc_fine = tdc_res > 50 ? 1 : 0; + temp0 = tdc_res * 40 * 11; + temp1 = div64_u64(((4 * TDC_RES_MULTIPLIER) + temp0) * 500, temp0 * refclk_khz); + temp2 = div64_u64(temp0 * refclk_khz, 1000); + temp3 = div64_u64(((8 * TDC_RES_MULTIPLIER) + temp2), temp2); + tdc_targetcnt = tdc_res < 50 ? (int)(temp1) : (int)(temp3); + tdc_targetcnt = (int)(tdc_targetcnt / 2); + temp0 = mul_q32_u32(target_dco_mhz, tdc_res); + temp0 >>= 32; + feedfwd_gain = (m2div_frac > 0) ? div64_u64(m1div * TDC_RES_MULTIPLIER, temp0) : 0; + feedfwd_cal_en = frac_en; + + temp0 = (u32)Q32_TO_INT(target_dco_mhz); + prop_coeff = (temp0 >= dco_fmin) ? 3 : 4; + int_coeff = (temp0 >= dco_fmin) ? 7 : 8; + ana_cfg = (temp0 >= dco_fmin) ? 8 : 6; + dco_12g = (temp0 >= dco_fmin) ? 0 : 1; + + if (temp0 > 12960) + d7 = 10; + else + d7 = 8; + + d8 = loop_cnt / 2; + d4 = d8 * 2; + + /* Compute pll_reg3,5,57 & lf */ + p.pll_reg3.val = (u32)((d4 << 21) + (d3 << 18) + (d1 << 15) + (m2div_int << 5)); + p.pll_reg5.val = m2div_frac; + postdiv = (d5 == 0) ? 9 : d5; + d6_new = (d6 == 0) ? 40 : d6; + p.pll_reg57.val = (d7 << 24) + (postdiv << 15) + (d8 << 7) + d6_new; + p.lf.val = (u32)((frac_en << 31) + (1 << 30) + (frac_en << 29) + + (feedfwd_cal_en << 28) + (tdc_fine << 27) + + (gain_ctrl << 24) + (feedfwd_gain << 16) + + (int_coeff << 12) + (prop_coeff << 8) + tdc_targetcnt); + + compute_ssc(&p, ana_cfg); + compute_bias2(&p); + compute_tdc(&p, tdc_fine); + compute_dco_med(&p); + compute_dco_fine(&p, dco_12g); + + pll_type = ((frequency_khz == 10000) || (frequency_khz == 20000) || + (frequency_khz == 2500) || (dco_12g == 1)) ? 0 : 1; + set_phy_vdr_addresses(&p, pll_type); + + lt_state->config[0] = 0x84; + lt_state->config[1] = 0x2d; + ADDR_ASSIGN(0, p.pll_reg4); + ADDR_ASSIGN(1, p.pll_reg3); + ADDR_ASSIGN(2, p.pll_reg5); + ADDR_ASSIGN(3, p.pll_reg57); + ADDR_ASSIGN(4, p.lf); + ADDR_ASSIGN(5, p.tdc); + ADDR_ASSIGN(6, p.ssc); + ADDR_ASSIGN(7, p.bias2); + ADDR_ASSIGN(8, p.bias_trim); + ADDR_ASSIGN(9, p.dco_med); + ADDR_ASSIGN(10, p.dco_fine); + ADDR_ASSIGN(11, p.ssc_inj); + ADDR_ASSIGN(12, p.surv_bonus); + DATA_ASSIGN(0, p.pll_reg4); + DATA_ASSIGN(1, p.pll_reg3); + DATA_ASSIGN(2, p.pll_reg5); + DATA_ASSIGN(3, p.pll_reg57); + DATA_ASSIGN(4, p.lf); + DATA_ASSIGN(5, p.tdc); + DATA_ASSIGN(6, p.ssc); + DATA_ASSIGN(7, p.bias2); + DATA_ASSIGN(8, p.bias_trim); + DATA_ASSIGN(9, p.dco_med); + DATA_ASSIGN(10, p.dco_fine); + DATA_ASSIGN(11, p.ssc_inj); + DATA_ASSIGN(12, p.surv_bonus); + + return 0; +} + static int -intel_lt_phy_calc_hdmi_port_clock(const struct intel_lt_phy_pll_state *lt_state) +intel_lt_phy_calc_hdmi_port_clock(const struct intel_crtc_state *crtc_state) { -#define REF_CLK_KHZ 38400 #define REGVAL(i) ( \ (lt_state->data[i][3]) | \ (lt_state->data[i][2] << 8) | \ @@ -1367,6 +1688,9 @@ intel_lt_phy_calc_hdmi_port_clock(const struct intel_lt_phy_pll_state *lt_state) (lt_state->data[i][0] << 24) \ ) + struct intel_display *display = to_intel_display(crtc_state); + const struct intel_lt_phy_pll_state *lt_state = + &crtc_state->dpll_hw_state.ltpll; int clk = 0; u32 d8, pll_reg_5, pll_reg_3, pll_reg_57, m2div_frac, m2div_int; u64 temp0, temp1; @@ -1409,11 +1733,14 @@ intel_lt_phy_calc_hdmi_port_clock(const struct intel_lt_phy_pll_state *lt_state) * frequency = (m2div * refclk_khz / (d8 * 10)) */ d8 = (pll_reg_57 & REG_GENMASK(14, 7)) >> 7; + if (d8 == 0) { + drm_WARN_ON(display->drm, + "Invalid port clock using lowest HDMI portclock\n"); + return xe3plpd_lt_hdmi_252.clock; + } m2div_int = (pll_reg_3 & REG_GENMASK(14, 5)) >> 5; temp0 = ((u64)m2div_frac * REF_CLK_KHZ) >> 32; temp1 = (u64)m2div_int * REF_CLK_KHZ; - if (d8 == 0) - return 0; clk = div_u64((temp1 + temp0), d8 * 10); @@ -1442,7 +1769,7 @@ intel_lt_phy_calc_port_clock(struct intel_encoder *encoder, lt_state->config[0]); clk = intel_lt_phy_get_dp_clock(rate); } else { - clk = intel_lt_phy_calc_hdmi_port_clock(lt_state); + clk = intel_lt_phy_calc_hdmi_port_clock(crtc_state); } return clk; @@ -1472,7 +1799,10 @@ intel_lt_phy_pll_calc_state(struct intel_crtc_state *crtc_state, } } - /* TODO: Add a function to compute the data for HDMI TMDS*/ + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) { + return intel_lt_phy_calculate_hdmi_state(&crtc_state->dpll_hw_state.ltpll, + crtc_state->port_clock); + } return -EINVAL; } @@ -1651,11 +1981,11 @@ void intel_lt_phy_pll_enable(struct intel_encoder *encoder, XELPDP_LANE_PCLK_PLL_REQUEST(0), 0); /* 8. Poll for PORT_CLOCK_CTL[PCLK PLL Ack LN0]= 0. */ - if (intel_de_wait_custom(display, XELPDP_PORT_CLOCK_CTL(display, port), - XELPDP_LANE_PCLK_PLL_ACK(0), 0, - XE3PLPD_MACCLK_TURNOFF_LATENCY_US, 0, NULL)) - drm_warn(display->drm, "PHY %c PLL MacCLK Ack deassertion Timeout after %dus.\n", - phy_name(phy), XE3PLPD_MACCLK_TURNOFF_LATENCY_US); + if (intel_de_wait_for_clear_us(display, XELPDP_PORT_CLOCK_CTL(display, port), + XELPDP_LANE_PCLK_PLL_ACK(0), + XE3PLPD_MACCLK_TURNOFF_LATENCY_US)) + drm_warn(display->drm, "PHY %c PLL MacCLK ack deassertion timeout\n", + phy_name(phy)); /* * 9. Follow the Display Voltage Frequency Switching - Sequence Before Frequency @@ -1671,12 +2001,11 @@ void intel_lt_phy_pll_enable(struct intel_encoder *encoder, XELPDP_LANE_PCLK_PLL_REQUEST(0)); /* 12. Poll for PORT_CLOCK_CTL[PCLK PLL Ack LN0]= 1. */ - if (intel_de_wait_custom(display, XELPDP_PORT_CLOCK_CTL(display, port), - XELPDP_LANE_PCLK_PLL_ACK(0), - XELPDP_LANE_PCLK_PLL_ACK(0), - XE3PLPD_MACCLK_TURNON_LATENCY_US, 2, NULL)) - drm_warn(display->drm, "PHY %c PLL MacCLK Ack assertion Timeout after %dus.\n", - phy_name(phy), XE3PLPD_MACCLK_TURNON_LATENCY_US); + if (intel_de_wait_for_set_ms(display, XELPDP_PORT_CLOCK_CTL(display, port), + XELPDP_LANE_PCLK_PLL_ACK(0), + XE3PLPD_MACCLK_TURNON_LATENCY_MS)) + drm_warn(display->drm, "PHY %c PLL MacCLK ack assertion timeout\n", + phy_name(phy)); /* * 13. Ungate the forward clock by setting @@ -1700,11 +2029,11 @@ void intel_lt_phy_pll_enable(struct intel_encoder *encoder, rate_update, MB_WRITE_COMMITTED); /* 16. Poll for PORT_BUF_CTL2 register PHY Pulse Status = 1 for Owned PHY Lanes. */ - if (intel_de_wait_custom(display, XELPDP_PORT_BUF_CTL2(display, port), - lane_phy_pulse_status, lane_phy_pulse_status, - XE3PLPD_RATE_CALIB_DONE_LATENCY_US, 2, NULL)) - drm_warn(display->drm, "PHY %c PLL rate not changed after %dus.\n", - phy_name(phy), XE3PLPD_RATE_CALIB_DONE_LATENCY_US); + if (intel_de_wait_for_set_ms(display, XELPDP_PORT_BUF_CTL2(display, port), + lane_phy_pulse_status, + XE3PLPD_RATE_CALIB_DONE_LATENCY_MS)) + drm_warn(display->drm, "PHY %c PLL rate not changed\n", + phy_name(phy)); /* 17. SW clears PORT_BUF_CTL2 [PHY Pulse Status]. */ intel_de_rmw(display, XELPDP_PORT_BUF_CTL2(display, port), @@ -1758,13 +2087,11 @@ void intel_lt_phy_pll_disable(struct intel_encoder *encoder) lane_pipe_reset); /* 3. Poll for PORT_BUF_CTL2<port> Lane<PHY Lanes Owned> PHY Current Status == 1. */ - if (intel_de_wait_custom(display, XELPDP_PORT_BUF_CTL2(display, port), - lane_phy_current_status, - lane_phy_current_status, - XE3PLPD_RESET_START_LATENCY_US, 0, NULL)) - drm_warn(display->drm, - "PHY %c failed to reset Lane after %dms.\n", - phy_name(phy), XE3PLPD_RESET_START_LATENCY_US); + if (intel_de_wait_for_set_us(display, XELPDP_PORT_BUF_CTL2(display, port), + lane_phy_current_status, + XE3PLPD_RESET_START_LATENCY_US)) + drm_warn(display->drm, "PHY %c failed to reset lane\n", + phy_name(phy)); /* 4. Clear for PHY pulse status on owned PHY lanes. */ intel_de_rmw(display, XELPDP_PORT_BUF_CTL2(display, port), @@ -1783,11 +2110,11 @@ void intel_lt_phy_pll_disable(struct intel_encoder *encoder) intel_de_write(display, DDI_CLK_VALFREQ(encoder->port), 0); /* 8. Poll for PORT_CLOCK_CTL[PCLK PLL Ack LN0]= 0. */ - if (intel_de_wait_custom(display, XELPDP_PORT_CLOCK_CTL(display, port), - XELPDP_LANE_PCLK_PLL_ACK(0), 0, - XE3PLPD_MACCLK_TURNOFF_LATENCY_US, 0, NULL)) - drm_warn(display->drm, "PHY %c PLL MacCLK Ack deassertion Timeout after %dus.\n", - phy_name(phy), XE3PLPD_MACCLK_TURNOFF_LATENCY_US); + if (intel_de_wait_for_clear_us(display, XELPDP_PORT_CLOCK_CTL(display, port), + XELPDP_LANE_PCLK_PLL_ACK(0), + XE3PLPD_MACCLK_TURNOFF_LATENCY_US)) + drm_warn(display->drm, "PHY %c PLL MacCLK ack deassertion timeout\n", + phy_name(phy)); /* * 9. Follow the Display Voltage Frequency Switching - diff --git a/drivers/gpu/drm/i915/display/intel_lt_phy.h b/drivers/gpu/drm/i915/display/intel_lt_phy.h index a538d4c69210..b7911acd7dcd 100644 --- a/drivers/gpu/drm/i915/display/intel_lt_phy.h +++ b/drivers/gpu/drm/i915/display/intel_lt_phy.h @@ -35,6 +35,9 @@ void intel_lt_phy_pll_readout_hw_state(struct intel_encoder *encoder, struct intel_lt_phy_pll_state *pll_state); void intel_lt_phy_pll_state_verify(struct intel_atomic_state *state, struct intel_crtc *crtc); +int +intel_lt_phy_calculate_hdmi_state(struct intel_lt_phy_pll_state *lt_state, + u32 frequency_khz); void intel_xe3plpd_pll_enable(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); void intel_xe3plpd_pll_disable(struct intel_encoder *encoder); diff --git a/drivers/gpu/drm/i915/display/intel_lt_phy_regs.h b/drivers/gpu/drm/i915/display/intel_lt_phy_regs.h index 9223487d764e..98ccc069a69b 100644 --- a/drivers/gpu/drm/i915/display/intel_lt_phy_regs.h +++ b/drivers/gpu/drm/i915/display/intel_lt_phy_regs.h @@ -7,13 +7,12 @@ #define __INTEL_LT_PHY_REGS_H__ #define XE3PLPD_MSGBUS_TIMEOUT_FAST_US 500 -#define XE3PLPD_MACCLK_TURNON_LATENCY_MS 1 -#define XE3PLPD_MACCLK_TURNON_LATENCY_US 21 +#define XE3PLPD_MACCLK_TURNON_LATENCY_MS 2 #define XE3PLPD_MACCLK_TURNOFF_LATENCY_US 1 -#define XE3PLPD_RATE_CALIB_DONE_LATENCY_US 50 +#define XE3PLPD_RATE_CALIB_DONE_LATENCY_MS 1 #define XE3PLPD_RESET_START_LATENCY_US 10 #define XE3PLPD_PWRDN_TO_RDY_LATENCY_US 4 -#define XE3PLPD_RESET_END_LATENCY_US 200 +#define XE3PLPD_RESET_END_LATENCY_MS 2 /* LT Phy MAC Register */ #define LT_PHY_MAC_VDR _MMIO(0xC00) @@ -72,4 +71,20 @@ #define XE3PLPD_PORT_P2M_MSGBUS_STATUS_P2P(port, lane) _XE3PLPD_PORT_P2M_MSGBUS_STATUS_P2P(__xe2lpd_port_idx(port), \ lane) #define XE3LPD_PORT_P2M_ADDR_MASK REG_GENMASK(11, 0) + +#define PLL_REG4_ADDR 0x8510 +#define PLL_REG3_ADDR 0x850C +#define PLL_REG5_ADDR 0x8514 +#define PLL_REG57_ADDR 0x85E4 +#define PLL_LF_ADDR 0x860C +#define PLL_TDC_ADDR 0x8610 +#define PLL_SSC_ADDR 0x8614 +#define PLL_BIAS2_ADDR 0x8618 +#define PLL_BIAS_TRIM_ADDR 0x8648 +#define PLL_DCO_MED_ADDR 0x8640 +#define PLL_DCO_FINE_ADDR 0x864C +#define PLL_SSC_INJ_ADDR 0x8624 +#define PLL_SURV_BONUS_ADDR 0x8644 +#define PLL_TYPE_OFFSET 0x200 +#define PLL_REG_ADDR(base, pll_type) ((pll_type) ? (base) + PLL_TYPE_OFFSET : (base)) #endif /* __INTEL_LT_PHY_REGS_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index 48f4d8ed4f15..89aeb4fb340e 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -329,7 +329,7 @@ static void intel_enable_lvds(struct intel_atomic_state *state, intel_de_rmw(display, PP_CONTROL(display, 0), 0, PANEL_POWER_ON); intel_de_posting_read(display, lvds_encoder->reg); - if (intel_de_wait_for_set(display, PP_STATUS(display, 0), PP_ON, 5000)) + if (intel_de_wait_for_set_ms(display, PP_STATUS(display, 0), PP_ON, 5000)) drm_err(display->drm, "timed out waiting for panel to power on\n"); @@ -345,7 +345,7 @@ static void intel_disable_lvds(struct intel_atomic_state *state, struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(encoder); intel_de_rmw(display, PP_CONTROL(display, 0), PANEL_POWER_ON, 0); - if (intel_de_wait_for_clear(display, PP_STATUS(display, 0), PP_ON, 1000)) + if (intel_de_wait_for_clear_ms(display, PP_STATUS(display, 0), PP_ON, 1000)) drm_err(display->drm, "timed out waiting for panel to power off\n"); @@ -384,7 +384,7 @@ static void intel_lvds_shutdown(struct intel_encoder *encoder) { struct intel_display *display = to_intel_display(encoder); - if (intel_de_wait_for_clear(display, PP_STATUS(display, 0), PP_CYCLE_DELAY_ACTIVE, 5000)) + if (intel_de_wait_for_clear_ms(display, PP_STATUS(display, 0), PP_CYCLE_DELAY_ACTIVE, 5000)) drm_err(display->drm, "timed out waiting for panel power cycle delay\n"); } diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 231b1632d64a..88eb7ae5765c 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -308,8 +308,6 @@ static void intel_overlay_flip_prepare(struct intel_overlay *overlay, intel_frontbuffer_put(overlay->frontbuffer); overlay->frontbuffer = frontbuffer; - intel_frontbuffer_flip_prepare(display, INTEL_FRONTBUFFER_OVERLAY(pipe)); - overlay->old_vma = overlay->vma; if (vma) overlay->vma = i915_vma_get(vma); @@ -366,7 +364,7 @@ static void intel_overlay_release_old_vma(struct intel_overlay *overlay) if (drm_WARN_ON(display->drm, !vma)) return; - intel_frontbuffer_flip_complete(display, INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe)); + intel_frontbuffer_flip(display, INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe)); i915_vma_unpin(vma); i915_vma_put(vma); @@ -822,8 +820,6 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, goto out_pin_section; } - i915_gem_object_flush_frontbuffer(new_bo, ORIGIN_DIRTYFB); - if (!overlay->active) { const struct intel_crtc_state *crtc_state = overlay->crtc->config; diff --git a/drivers/gpu/drm/i915/display/intel_pch_display.c b/drivers/gpu/drm/i915/display/intel_pch_display.c index 3456c794e0e7..16619f7be5f8 100644 --- a/drivers/gpu/drm/i915/display/intel_pch_display.c +++ b/drivers/gpu/drm/i915/display/intel_pch_display.c @@ -305,7 +305,7 @@ static void ilk_enable_pch_transcoder(const struct intel_crtc_state *crtc_state) } intel_de_write(display, reg, val | TRANS_ENABLE); - if (intel_de_wait_for_set(display, reg, TRANS_STATE_ENABLE, 100)) + if (intel_de_wait_for_set_ms(display, reg, TRANS_STATE_ENABLE, 100)) drm_err(display->drm, "failed to enable transcoder %c\n", pipe_name(pipe)); } @@ -326,7 +326,7 @@ static void ilk_disable_pch_transcoder(struct intel_crtc *crtc) reg = PCH_TRANSCONF(pipe); intel_de_rmw(display, reg, TRANS_ENABLE, 0); /* wait for PCH transcoder off, transcoder state */ - if (intel_de_wait_for_clear(display, reg, TRANS_STATE_ENABLE, 50)) + if (intel_de_wait_for_clear_ms(display, reg, TRANS_STATE_ENABLE, 50)) drm_err(display->drm, "failed to disable transcoder %c\n", pipe_name(pipe)); @@ -572,8 +572,8 @@ static void lpt_enable_pch_transcoder(const struct intel_crtc_state *crtc_state) val |= TRANS_INTERLACE_PROGRESSIVE; intel_de_write(display, LPT_TRANSCONF, val); - if (intel_de_wait_for_set(display, LPT_TRANSCONF, - TRANS_STATE_ENABLE, 100)) + if (intel_de_wait_for_set_ms(display, LPT_TRANSCONF, + TRANS_STATE_ENABLE, 100)) drm_err(display->drm, "Failed to enable PCH transcoder\n"); } @@ -581,8 +581,8 @@ static void lpt_disable_pch_transcoder(struct intel_display *display) { intel_de_rmw(display, LPT_TRANSCONF, TRANS_ENABLE, 0); /* wait for PCH transcoder off, transcoder state */ - if (intel_de_wait_for_clear(display, LPT_TRANSCONF, - TRANS_STATE_ENABLE, 50)) + if (intel_de_wait_for_clear_ms(display, LPT_TRANSCONF, + TRANS_STATE_ENABLE, 50)) drm_err(display->drm, "Failed to disable PCH transcoder\n"); /* Workaround: clear timing override bit. */ diff --git a/drivers/gpu/drm/i915/display/intel_pch_refclk.c b/drivers/gpu/drm/i915/display/intel_pch_refclk.c index cca880c7eed4..9a89bb6dcf65 100644 --- a/drivers/gpu/drm/i915/display/intel_pch_refclk.c +++ b/drivers/gpu/drm/i915/display/intel_pch_refclk.c @@ -21,17 +21,15 @@ static void lpt_fdi_reset_mphy(struct intel_display *display) intel_de_rmw(display, SOUTH_CHICKEN2, 0, FDI_MPHY_IOSFSB_RESET_CTL); - ret = intel_de_wait_custom(display, SOUTH_CHICKEN2, - FDI_MPHY_IOSFSB_RESET_STATUS, FDI_MPHY_IOSFSB_RESET_STATUS, - 100, 0, NULL); + ret = intel_de_wait_for_set_us(display, SOUTH_CHICKEN2, + FDI_MPHY_IOSFSB_RESET_STATUS, 100); if (ret) drm_err(display->drm, "FDI mPHY reset assert timeout\n"); intel_de_rmw(display, SOUTH_CHICKEN2, FDI_MPHY_IOSFSB_RESET_CTL, 0); - ret = intel_de_wait_custom(display, SOUTH_CHICKEN2, - FDI_MPHY_IOSFSB_RESET_STATUS, 0, - 100, 0, NULL); + ret = intel_de_wait_for_clear_us(display, SOUTH_CHICKEN2, + FDI_MPHY_IOSFSB_RESET_STATUS, 100); if (ret) drm_err(display->drm, "FDI mPHY reset de-assert timeout\n"); } diff --git a/drivers/gpu/drm/i915/display/intel_plane.c b/drivers/gpu/drm/i915/display/intel_plane.c index 505c776c0585..5105e3278bc4 100644 --- a/drivers/gpu/drm/i915/display/intel_plane.c +++ b/drivers/gpu/drm/i915/display/intel_plane.c @@ -1051,6 +1051,9 @@ int intel_plane_check_src_coordinates(struct intel_plane_state *plane_state) DISPLAY_VERx100(display) == 3002) && src_x % 2 != 0) hsub = 2; + + if (DISPLAY_VER(display) == 35) + vsub = 2; } else { hsub = fb->format->hsub; vsub = fb->format->vsub; diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c index 81444e0b3c41..a1de1ec564d1 100644 --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c @@ -133,6 +133,7 @@ initial_plane_vma(struct intel_display *display, struct drm_mm_node orig_mm = {}; struct i915_vma *vma; resource_size_t phys_base; + unsigned int tiling; u32 base, size; u64 pinctl; @@ -179,17 +180,19 @@ initial_plane_vma(struct intel_display *display, i915_gem_object_set_cache_coherency(obj, HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE); - switch (plane_config->tiling) { + tiling = intel_fb_modifier_to_tiling(plane_config->fb->base.modifier); + + switch (tiling) { case I915_TILING_NONE: break; case I915_TILING_X: case I915_TILING_Y: obj->tiling_and_stride = plane_config->fb->base.pitches[0] | - plane_config->tiling; + tiling; break; default: - MISSING_CASE(plane_config->tiling); + MISSING_CASE(tiling); goto err_obj; } @@ -374,7 +377,7 @@ valid_fb: plane_state->uapi.crtc_w = fb->width; plane_state->uapi.crtc_h = fb->height; - if (plane_config->tiling) + if (fb->modifier != DRM_FORMAT_MOD_LINEAR) dev_priv->preserve_bios_swizzle = true; plane_state->uapi.fb = fb; diff --git a/drivers/gpu/drm/i915/display/intel_pmdemand.c b/drivers/gpu/drm/i915/display/intel_pmdemand.c index f52abd4e2eb0..dc44a7a169c1 100644 --- a/drivers/gpu/drm/i915/display/intel_pmdemand.c +++ b/drivers/gpu/drm/i915/display/intel_pmdemand.c @@ -390,12 +390,12 @@ int intel_pmdemand_atomic_check(struct intel_atomic_state *state) static bool intel_pmdemand_check_prev_transaction(struct intel_display *display) { - return !(intel_de_wait_for_clear(display, - XELPDP_INITIATE_PMDEMAND_REQUEST(1), - XELPDP_PMDEMAND_REQ_ENABLE, 10) || - intel_de_wait_for_clear(display, - GEN12_DCPR_STATUS_1, - XELPDP_PMDEMAND_INFLIGHT_STATUS, 10)); + return !(intel_de_wait_for_clear_ms(display, + XELPDP_INITIATE_PMDEMAND_REQUEST(1), + XELPDP_PMDEMAND_REQ_ENABLE, 10) || + intel_de_wait_for_clear_ms(display, + GEN12_DCPR_STATUS_1, + XELPDP_PMDEMAND_INFLIGHT_STATUS, 10)); } void @@ -462,9 +462,9 @@ static void intel_pmdemand_poll(struct intel_display *display) u32 status; int ret; - ret = intel_de_wait_custom(display, XELPDP_INITIATE_PMDEMAND_REQUEST(1), - XELPDP_PMDEMAND_REQ_ENABLE, 0, - 50, timeout_ms, &status); + ret = intel_de_wait_ms(display, XELPDP_INITIATE_PMDEMAND_REQUEST(1), + XELPDP_PMDEMAND_REQ_ENABLE, 0, + timeout_ms, &status); if (ret == -ETIMEDOUT) drm_err(display->drm, diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 05014ffe3ce1..00ac652809cc 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -626,6 +626,10 @@ static void _panel_replay_init_dpcd(struct intel_dp *intel_dp) struct intel_display *display = to_intel_display(intel_dp); int ret; + /* TODO: Enable Panel Replay on MST once it's properly implemented. */ + if (intel_dp->mst_detect == DRM_DP_MST) + return; + ret = drm_dp_dpcd_read_data(&intel_dp->aux, DP_PANEL_REPLAY_CAP_SUPPORT, &intel_dp->pr_dpcd, sizeof(intel_dp->pr_dpcd)); if (ret < 0) @@ -932,7 +936,8 @@ static bool is_dc5_dc6_blocked(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); u32 current_dc_state = intel_display_power_get_current_dc_state(display); - struct drm_vblank_crtc *vblank = &display->drm->vblank[intel_dp->psr.pipe]; + struct intel_crtc *crtc = intel_crtc_for_pipe(display, intel_dp->psr.pipe); + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(&crtc->base); return (current_dc_state != DC_STATE_EN_UPTO_DC5 && current_dc_state != DC_STATE_EN_UPTO_DC6) || @@ -2272,8 +2277,8 @@ static void intel_psr_wait_exit_locked(struct intel_dp *intel_dp) } /* Wait till PSR is idle */ - if (intel_de_wait_for_clear(display, psr_status, - psr_status_mask, 2000)) + if (intel_de_wait_for_clear_ms(display, psr_status, + psr_status_mask, 2000)) drm_err(display->drm, "Timed out waiting PSR idle state\n"); } @@ -3161,7 +3166,7 @@ _psr2_ready_for_pipe_update_locked(const struct intel_crtc_state *new_crtc_state return true; } - return intel_de_wait_for_clear(display, + return intel_de_wait_for_clear_ms(display, EDP_PSR2_STATUS(display, cpu_transcoder), EDP_PSR2_STATUS_STATE_DEEP_SLEEP, PSR_IDLE_TIMEOUT_MS); @@ -3181,7 +3186,7 @@ _psr1_ready_for_pipe_update_locked(const struct intel_crtc_state *new_crtc_state return true; } - return intel_de_wait_for_clear(display, + return intel_de_wait_for_clear_ms(display, psr_status_reg(display, cpu_transcoder), EDP_PSR_STATUS_STATE_MASK, PSR_IDLE_TIMEOUT_MS); @@ -3259,7 +3264,7 @@ static bool __psr_wait_for_idle_locked(struct intel_dp *intel_dp) mutex_unlock(&intel_dp->psr.lock); - err = intel_de_wait_for_clear(display, reg, mask, 50); + err = intel_de_wait_for_clear_ms(display, reg, mask, 50); if (err) drm_err(display->drm, "Timed out waiting for PSR Idle for re-enable\n"); diff --git a/drivers/gpu/drm/i915/display/intel_sbi.c b/drivers/gpu/drm/i915/display/intel_sbi.c index dfcff924f0ed..b636a0060d39 100644 --- a/drivers/gpu/drm/i915/display/intel_sbi.c +++ b/drivers/gpu/drm/i915/display/intel_sbi.c @@ -21,7 +21,8 @@ static int intel_sbi_rw(struct intel_display *display, u16 reg, lockdep_assert_held(&display->sbi.lock); - if (intel_de_wait_fw(display, SBI_CTL_STAT, SBI_STATUS_MASK, SBI_STATUS_READY, 100, NULL)) { + if (intel_de_wait_fw_ms(display, SBI_CTL_STAT, + SBI_STATUS_MASK, SBI_STATUS_READY, 100, NULL)) { drm_err(display->drm, "timeout waiting for SBI to become ready\n"); return -EBUSY; } @@ -37,7 +38,8 @@ static int intel_sbi_rw(struct intel_display *display, u16 reg, cmd |= SBI_CTL_OP_WR; intel_de_write_fw(display, SBI_CTL_STAT, cmd | SBI_STATUS_BUSY); - if (intel_de_wait_fw(display, SBI_CTL_STAT, SBI_STATUS_MASK, SBI_STATUS_READY, 100, &cmd)) { + if (intel_de_wait_fw_ms(display, SBI_CTL_STAT, + SBI_STATUS_MASK, SBI_STATUS_READY, 100, &cmd)) { drm_err(display->drm, "timeout waiting for SBI to complete read\n"); return -ETIMEDOUT; } diff --git a/drivers/gpu/drm/i915/display/intel_snps_phy.c b/drivers/gpu/drm/i915/display/intel_snps_phy.c index 4f028e6a91cd..295030742294 100644 --- a/drivers/gpu/drm/i915/display/intel_snps_phy.c +++ b/drivers/gpu/drm/i915/display/intel_snps_phy.c @@ -42,8 +42,8 @@ void intel_snps_phy_wait_for_calibration(struct intel_display *display) * which phy was affected and skip setup of the corresponding * output later. */ - if (intel_de_wait_for_clear(display, DG2_PHY_MISC(phy), - DG2_PHY_DP_TX_ACK_MASK, 25)) + if (intel_de_wait_for_clear_ms(display, DG2_PHY_MISC(phy), + DG2_PHY_DP_TX_ACK_MASK, 25)) display->snps.phy_failed_calibration |= BIT(phy); } } @@ -1863,7 +1863,7 @@ void intel_mpllb_enable(struct intel_encoder *encoder, * is locked at new settings. This register bit is sampling PHY * dp_mpllb_state interface signal. */ - if (intel_de_wait_for_set(display, enable_reg, PLL_LOCK, 5)) + if (intel_de_wait_for_set_ms(display, enable_reg, PLL_LOCK, 5)) drm_dbg_kms(display->drm, "Port %c PLL not locked\n", phy_name(phy)); /* @@ -1903,7 +1903,7 @@ void intel_mpllb_disable(struct intel_encoder *encoder) * 5. Software polls DPLL_ENABLE [PLL Lock] for PHY acknowledgment * (dp_txX_ack) that the new transmitter setting request is completed. */ - if (intel_de_wait_for_clear(display, enable_reg, PLL_LOCK, 5)) + if (intel_de_wait_for_clear_ms(display, enable_reg, PLL_LOCK, 5)) drm_err(display->drm, "Port %c PLL not locked\n", phy_name(phy)); /* diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index 60f1d9ed181e..69b6873a6044 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -958,10 +958,9 @@ static int g4x_sprite_min_cdclk(const struct intel_crtc_state *crtc_state, static unsigned int g4x_sprite_max_stride(struct intel_plane *plane, - u32 pixel_format, u64 modifier, - unsigned int rotation) + const struct drm_format_info *info, + u64 modifier, unsigned int rotation) { - const struct drm_format_info *info = drm_format_info(pixel_format); int cpp = info->cpp[0]; /* Limit to 4k pixels to guarantee TILEOFF.x doesn't get too big. */ @@ -973,10 +972,9 @@ g4x_sprite_max_stride(struct intel_plane *plane, static unsigned int hsw_sprite_max_stride(struct intel_plane *plane, - u32 pixel_format, u64 modifier, - unsigned int rotation) + const struct drm_format_info *info, + u64 modifier, unsigned int rotation) { - const struct drm_format_info *info = drm_format_info(pixel_format); int cpp = info->cpp[0]; /* Limit to 8k pixels to guarantee OFFSET.x doesn't get too big. */ diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 7e17ca018748..1e21fd02685d 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -1076,8 +1076,8 @@ xelpdp_tc_phy_wait_for_tcss_power(struct intel_tc_port *tc, bool enabled) static void xelpdp_tc_power_request_wa(struct intel_display *display, bool enable) { /* check if mailbox is running busy */ - if (intel_de_wait_for_clear(display, TCSS_DISP_MAILBOX_IN_CMD, - TCSS_DISP_MAILBOX_IN_CMD_RUN_BUSY, 10)) { + if (intel_de_wait_for_clear_ms(display, TCSS_DISP_MAILBOX_IN_CMD, + TCSS_DISP_MAILBOX_IN_CMD_RUN_BUSY, 10)) { drm_dbg_kms(display->drm, "Timeout waiting for TCSS mailbox run/busy bit to clear\n"); return; @@ -1089,8 +1089,8 @@ static void xelpdp_tc_power_request_wa(struct intel_display *display, bool enabl TCSS_DISP_MAILBOX_IN_CMD_DATA(0x1)); /* wait to clear mailbox running busy bit before continuing */ - if (intel_de_wait_for_clear(display, TCSS_DISP_MAILBOX_IN_CMD, - TCSS_DISP_MAILBOX_IN_CMD_RUN_BUSY, 10)) { + if (intel_de_wait_for_clear_ms(display, TCSS_DISP_MAILBOX_IN_CMD, + TCSS_DISP_MAILBOX_IN_CMD_RUN_BUSY, 10)) { drm_dbg_kms(display->drm, "Timeout after writing data to mailbox. Mailbox run/busy bit did not clear\n"); return; diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 00cbc126fb36..b92c42fde937 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -716,9 +716,9 @@ static void intel_vrr_tg_disable(const struct intel_crtc_state *old_crtc_state) intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), trans_vrr_ctl(old_crtc_state)); - if (intel_de_wait_for_clear(display, - TRANS_VRR_STATUS(display, cpu_transcoder), - VRR_STATUS_VRR_EN_LIVE, 1000)) + if (intel_de_wait_for_clear_ms(display, + TRANS_VRR_STATUS(display, cpu_transcoder), + VRR_STATUS_VRR_EN_LIVE, 1000)) drm_err(display->drm, "Timed out waiting for VRR live status to clear\n"); intel_de_write(display, TRANS_PUSH(display, cpu_transcoder), 0); diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index ba1bf0bd4c55..89c8003ccfe7 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -465,12 +465,11 @@ static int icl_plane_max_height(const struct drm_framebuffer *fb, static unsigned int plane_max_stride(struct intel_plane *plane, - u32 pixel_format, u64 modifier, - unsigned int rotation, + const struct drm_format_info *info, + u64 modifier, unsigned int rotation, unsigned int max_pixels, unsigned int max_bytes) { - const struct drm_format_info *info = drm_format_info(pixel_format); int cpp = info->cpp[0]; if (drm_rotation_90_or_270(rotation)) @@ -481,26 +480,26 @@ plane_max_stride(struct intel_plane *plane, static unsigned int adl_plane_max_stride(struct intel_plane *plane, - u32 pixel_format, u64 modifier, - unsigned int rotation) + const struct drm_format_info *info, + u64 modifier, unsigned int rotation) { unsigned int max_pixels = 65536; /* PLANE_OFFSET limit */ unsigned int max_bytes = 128 * 1024; - return plane_max_stride(plane, pixel_format, + return plane_max_stride(plane, info, modifier, rotation, max_pixels, max_bytes); } static unsigned int skl_plane_max_stride(struct intel_plane *plane, - u32 pixel_format, u64 modifier, - unsigned int rotation) + const struct drm_format_info *info, + u64 modifier, unsigned int rotation) { unsigned int max_pixels = 8192; /* PLANE_OFFSET limit */ unsigned int max_bytes = 32 * 1024; - return plane_max_stride(plane, pixel_format, + return plane_max_stride(plane, info, modifier, rotation, max_pixels, max_bytes); } @@ -1748,7 +1747,8 @@ static int skl_plane_check_fb(const struct intel_crtc_state *crtc_state, } if (rotation & DRM_MODE_REFLECT_X && - fb->modifier == DRM_FORMAT_MOD_LINEAR) { + fb->modifier == DRM_FORMAT_MOD_LINEAR && + DISPLAY_VER(display) < 35) { drm_dbg_kms(display->drm, "[PLANE:%d:%s] horizontal flip is not supported with linear surface formats\n", plane->base.base.id, plane->base.name); @@ -3083,7 +3083,6 @@ skl_get_initial_plane_config(struct intel_crtc *crtc, fourcc = skl_format_to_fourcc(pixel_format, val & PLANE_CTL_ORDER_RGBX, alpha); - fb->format = drm_format_info(fourcc); tiling = val & PLANE_CTL_TILED_MASK; switch (tiling) { @@ -3091,11 +3090,9 @@ skl_get_initial_plane_config(struct intel_crtc *crtc, fb->modifier = DRM_FORMAT_MOD_LINEAR; break; case PLANE_CTL_TILED_X: - plane_config->tiling = I915_TILING_X; fb->modifier = I915_FORMAT_MOD_X_TILED; break; case PLANE_CTL_TILED_Y: - plane_config->tiling = I915_TILING_Y; if (val & PLANE_CTL_RENDER_DECOMPRESSION_ENABLE) if (DISPLAY_VER(display) >= 14) fb->modifier = I915_FORMAT_MOD_4_TILED_MTL_RC_CCS; @@ -3136,6 +3133,8 @@ skl_get_initial_plane_config(struct intel_crtc *crtc, goto error; } + fb->format = drm_get_format_info(display->drm, fourcc, fb->modifier); + if (!display->params.enable_dpt && intel_fb_modifier_uses_dpt(display, fb->modifier)) { drm_dbg_kms(display->drm, "DPT disabled, skipping initial FB\n"); diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane_regs.h b/drivers/gpu/drm/i915/display/skl_universal_plane_regs.h index 7c944d3ca855..6f815b231340 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane_regs.h +++ b/drivers/gpu/drm/i915/display/skl_universal_plane_regs.h @@ -324,7 +324,7 @@ #define PLANE_WM_IGNORE_LINES REG_BIT(30) #define PLANE_WM_AUTO_MIN_ALLOC_EN REG_BIT(29) #define PLANE_WM_LINES_MASK REG_GENMASK(26, 14) -#define PLANE_WM_BLOCKS_MASK REG_GENMASK(11, 0) +#define PLANE_WM_BLOCKS_MASK REG_GENMASK(12, 0) #define _PLANE_WM_SAGV_1_A 0x70258 #define _PLANE_WM_SAGV_1_B 0x71258 @@ -375,10 +375,10 @@ _PLANE_BUF_CFG_1_A, _PLANE_BUF_CFG_1_B, \ _PLANE_BUF_CFG_2_A, _PLANE_BUF_CFG_2_B) -/* skl+: 10 bits, icl+ 11 bits, adlp+ 12 bits */ -#define PLANE_BUF_END_MASK REG_GENMASK(27, 16) +/* skl+: 10 bits, icl+ 11 bits, adlp+ 12 bits, xe3p_lpd 13 bits */ +#define PLANE_BUF_END_MASK REG_GENMASK(28, 16) #define PLANE_BUF_END(end) REG_FIELD_PREP(PLANE_BUF_END_MASK, (end)) -#define PLANE_BUF_START_MASK REG_GENMASK(11, 0) +#define PLANE_BUF_START_MASK REG_GENMASK(12, 0) #define PLANE_BUF_START(start) REG_FIELD_PREP(PLANE_BUF_START_MASK, (start)) #define _PLANE_MIN_BUF_CFG_1_A 0x70274 @@ -389,9 +389,9 @@ _PLANE_MIN_BUF_CFG_1_A, _PLANE_MIN_BUF_CFG_1_B, \ _PLANE_MIN_BUF_CFG_2_A, _PLANE_MIN_BUF_CFG_2_B) #define PLANE_AUTO_MIN_DBUF_EN REG_BIT(31) -#define PLANE_MIN_DBUF_BLOCKS_MASK REG_GENMASK(27, 16) +#define PLANE_MIN_DBUF_BLOCKS_MASK REG_GENMASK(28, 16) #define PLANE_MIN_DBUF_BLOCKS(val) REG_FIELD_PREP(PLANE_MIN_DBUF_BLOCKS_MASK, (val)) -#define PLANE_INTERIM_DBUF_BLOCKS_MASK REG_GENMASK(11, 0) +#define PLANE_INTERIM_DBUF_BLOCKS_MASK REG_GENMASK(12, 0) #define PLANE_INTERIM_DBUF_BLOCKS(val) REG_FIELD_PREP(PLANE_INTERIM_DBUF_BLOCKS_MASK, (val)) /* tgl+ */ diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index 6d050408618c..54e9e0be019d 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -635,15 +635,22 @@ skl_cursor_allocation(const struct intel_crtc_state *crtc_state, { struct intel_display *display = to_intel_display(crtc_state); struct intel_plane *plane = to_intel_plane(crtc_state->uapi.crtc->cursor); + const struct drm_mode_config *mode_config = &display->drm->mode_config; + const struct drm_format_info *info; struct skl_wm_level wm = {}; int ret, min_ddb_alloc = 0; struct skl_wm_params wp; + u64 modifier; + u32 format; int level; - ret = skl_compute_wm_params(crtc_state, 256, - drm_format_info(DRM_FORMAT_ARGB8888), - DRM_FORMAT_MOD_LINEAR, - DRM_MODE_ROTATE_0, + format = DRM_FORMAT_ARGB8888; + modifier = DRM_FORMAT_MOD_LINEAR; + + info = drm_get_format_info(display->drm, format, modifier); + + ret = skl_compute_wm_params(crtc_state, mode_config->cursor_width, + info, modifier, DRM_MODE_ROTATE_0, crtc_state->pixel_rate, &wp, 0, 0); drm_WARN_ON(display->drm, ret); @@ -1812,6 +1819,8 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, if (wp->y_tiled) { selected_result = max_fixed16(method2, wp->y_tile_minimum); + } else if (DISPLAY_VER(display) >= 35) { + selected_result = method2; } else { if ((wp->cpp * crtc_state->hw.pipe_mode.crtc_htotal / wp->dbuf_block_size < 1) && @@ -3183,6 +3192,13 @@ static void sanitize_wm_latency(struct intel_display *display) int level, num_levels = display->wm.num_levels; /* + * Xe3p and beyond should ignore level 0's reported latency and + * always apply WaWmMemoryReadLatency logic. + */ + if (DISPLAY_VER(display) >= 35) + wm[0] = 0; + + /* * If a level n (n > 1) has a 0us latency, all levels m (m >= n) * need to be disabled. We make sure to sanitize the values out * of the punit to satisfy this requirement. @@ -3477,7 +3493,10 @@ void intel_dbuf_mdclk_cdclk_ratio_update(struct intel_display *display, if (!HAS_MBUS_JOINING(display)) return; - if (DISPLAY_VER(display) >= 20) + if (DISPLAY_VER(display) >= 35) + intel_de_rmw(display, MBUS_CTL, XE3P_MBUS_TRANSLATION_THROTTLE_MIN_MASK, + XE3P_MBUS_TRANSLATION_THROTTLE_MIN(ratio - 1)); + else if (DISPLAY_VER(display) >= 20) intel_de_rmw(display, MBUS_CTL, MBUS_TRANSLATION_THROTTLE_MIN_MASK, MBUS_TRANSLATION_THROTTLE_MIN(ratio - 1)); @@ -3488,9 +3507,14 @@ void intel_dbuf_mdclk_cdclk_ratio_update(struct intel_display *display, ratio, str_yes_no(joined_mbus)); for_each_dbuf_slice(display, slice) - intel_de_rmw(display, DBUF_CTL_S(slice), - DBUF_MIN_TRACKER_STATE_SERVICE_MASK, - DBUF_MIN_TRACKER_STATE_SERVICE(ratio - 1)); + if (DISPLAY_VER(display) >= 35) + intel_de_rmw(display, DBUF_CTL_S(slice), + XE3P_DBUF_MIN_TRACKER_STATE_SERVICE_MASK, + XE3P_DBUF_MIN_TRACKER_STATE_SERVICE(ratio - 1)); + else + intel_de_rmw(display, DBUF_CTL_S(slice), + DBUF_MIN_TRACKER_STATE_SERVICE_MASK, + DBUF_MIN_TRACKER_STATE_SERVICE(ratio - 1)); } static void intel_dbuf_mdclk_min_tracker_update(struct intel_atomic_state *state) diff --git a/drivers/gpu/drm/i915/display/skl_watermark_regs.h b/drivers/gpu/drm/i915/display/skl_watermark_regs.h index c5572fc0e847..abf56ac31105 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark_regs.h +++ b/drivers/gpu/drm/i915/display/skl_watermark_regs.h @@ -32,16 +32,18 @@ #define MBUS_BBOX_CTL_S1 _MMIO(0x45040) #define MBUS_BBOX_CTL_S2 _MMIO(0x45044) -#define MBUS_CTL _MMIO(0x4438C) -#define MBUS_JOIN REG_BIT(31) -#define MBUS_HASHING_MODE_MASK REG_BIT(30) -#define MBUS_HASHING_MODE_2x2 REG_FIELD_PREP(MBUS_HASHING_MODE_MASK, 0) -#define MBUS_HASHING_MODE_1x4 REG_FIELD_PREP(MBUS_HASHING_MODE_MASK, 1) -#define MBUS_JOIN_PIPE_SELECT_MASK REG_GENMASK(28, 26) -#define MBUS_JOIN_PIPE_SELECT(pipe) REG_FIELD_PREP(MBUS_JOIN_PIPE_SELECT_MASK, pipe) -#define MBUS_JOIN_PIPE_SELECT_NONE MBUS_JOIN_PIPE_SELECT(7) -#define MBUS_TRANSLATION_THROTTLE_MIN_MASK REG_GENMASK(15, 13) -#define MBUS_TRANSLATION_THROTTLE_MIN(val) REG_FIELD_PREP(MBUS_TRANSLATION_THROTTLE_MIN_MASK, val) +#define MBUS_CTL _MMIO(0x4438C) +#define MBUS_JOIN REG_BIT(31) +#define MBUS_HASHING_MODE_MASK REG_BIT(30) +#define MBUS_HASHING_MODE_2x2 REG_FIELD_PREP(MBUS_HASHING_MODE_MASK, 0) +#define MBUS_HASHING_MODE_1x4 REG_FIELD_PREP(MBUS_HASHING_MODE_MASK, 1) +#define MBUS_JOIN_PIPE_SELECT_MASK REG_GENMASK(28, 26) +#define MBUS_JOIN_PIPE_SELECT(pipe) REG_FIELD_PREP(MBUS_JOIN_PIPE_SELECT_MASK, pipe) +#define MBUS_JOIN_PIPE_SELECT_NONE MBUS_JOIN_PIPE_SELECT(7) +#define XE3P_MBUS_TRANSLATION_THROTTLE_MIN_MASK REG_GENMASK(16, 13) +#define XE3P_MBUS_TRANSLATION_THROTTLE_MIN(val) REG_FIELD_PREP(XE3P_MBUS_TRANSLATION_THROTTLE_MIN_MASK, val) +#define MBUS_TRANSLATION_THROTTLE_MIN_MASK REG_GENMASK(15, 13) +#define MBUS_TRANSLATION_THROTTLE_MIN(val) REG_FIELD_PREP(MBUS_TRANSLATION_THROTTLE_MIN_MASK, val) /* * The below are numbered starting from "S1" on gen11/gen12, but starting @@ -51,20 +53,22 @@ * way things will be named by the hardware team going forward, plus it's more * consistent with how most of the rest of our registers are named. */ -#define _DBUF_CTL_S0 0x45008 -#define _DBUF_CTL_S1 0x44FE8 -#define _DBUF_CTL_S2 0x44300 -#define _DBUF_CTL_S3 0x44304 -#define DBUF_CTL_S(slice) _MMIO(_PICK(slice, \ - _DBUF_CTL_S0, \ - _DBUF_CTL_S1, \ - _DBUF_CTL_S2, \ - _DBUF_CTL_S3)) -#define DBUF_POWER_REQUEST REG_BIT(31) -#define DBUF_POWER_STATE REG_BIT(30) -#define DBUF_TRACKER_STATE_SERVICE_MASK REG_GENMASK(23, 19) -#define DBUF_TRACKER_STATE_SERVICE(x) REG_FIELD_PREP(DBUF_TRACKER_STATE_SERVICE_MASK, x) -#define DBUF_MIN_TRACKER_STATE_SERVICE_MASK REG_GENMASK(18, 16) /* ADL-P+ */ +#define _DBUF_CTL_S0 0x45008 +#define _DBUF_CTL_S1 0x44FE8 +#define _DBUF_CTL_S2 0x44300 +#define _DBUF_CTL_S3 0x44304 +#define DBUF_CTL_S(slice) _MMIO(_PICK(slice, \ + _DBUF_CTL_S0, \ + _DBUF_CTL_S1, \ + _DBUF_CTL_S2, \ + _DBUF_CTL_S3)) +#define DBUF_POWER_REQUEST REG_BIT(31) +#define DBUF_POWER_STATE REG_BIT(30) +#define DBUF_TRACKER_STATE_SERVICE_MASK REG_GENMASK(23, 19) +#define DBUF_TRACKER_STATE_SERVICE(x) REG_FIELD_PREP(DBUF_TRACKER_STATE_SERVICE_MASK, x) +#define XE3P_DBUF_MIN_TRACKER_STATE_SERVICE_MASK REG_GENMASK(20, 16) +#define XE3P_DBUF_MIN_TRACKER_STATE_SERVICE(x) REG_FIELD_PREP(XE3P_DBUF_MIN_TRACKER_STATE_SERVICE_MASK, x) +#define DBUF_MIN_TRACKER_STATE_SERVICE_MASK REG_GENMASK(18, 16) /* ADL-P+ */ #define DBUF_MIN_TRACKER_STATE_SERVICE(x) REG_FIELD_PREP(DBUF_MIN_TRACKER_STATE_SERVICE_MASK, x) /* ADL-P+ */ #define MTL_LATENCY_LP0_LP1 _MMIO(0x45780) diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 444682995658..19bdd8662359 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -94,8 +94,8 @@ void vlv_dsi_wait_for_fifo_empty(struct intel_dsi *intel_dsi, enum port port) mask = LP_CTRL_FIFO_EMPTY | HS_CTRL_FIFO_EMPTY | LP_DATA_FIFO_EMPTY | HS_DATA_FIFO_EMPTY; - if (intel_de_wait_for_set(display, MIPI_GEN_FIFO_STAT(display, port), - mask, 100)) + if (intel_de_wait_for_set_ms(display, MIPI_GEN_FIFO_STAT(display, port), + mask, 100)) drm_err(display->drm, "DPI FIFOs are not empty\n"); } @@ -162,8 +162,8 @@ static ssize_t intel_dsi_host_transfer(struct mipi_dsi_host *host, /* note: this is never true for reads */ if (packet.payload_length) { - if (intel_de_wait_for_clear(display, MIPI_GEN_FIFO_STAT(display, port), - data_mask, 50)) + if (intel_de_wait_for_clear_ms(display, MIPI_GEN_FIFO_STAT(display, port), + data_mask, 50)) drm_err(display->drm, "Timeout waiting for HS/LP DATA FIFO !full\n"); @@ -176,8 +176,8 @@ static ssize_t intel_dsi_host_transfer(struct mipi_dsi_host *host, GEN_READ_DATA_AVAIL); } - if (intel_de_wait_for_clear(display, MIPI_GEN_FIFO_STAT(display, port), - ctrl_mask, 50)) { + if (intel_de_wait_for_clear_ms(display, MIPI_GEN_FIFO_STAT(display, port), + ctrl_mask, 50)) { drm_err(display->drm, "Timeout waiting for HS/LP CTRL FIFO !full\n"); } @@ -188,8 +188,8 @@ static ssize_t intel_dsi_host_transfer(struct mipi_dsi_host *host, /* ->rx_len is set only for reads */ if (msg->rx_len) { data_mask = GEN_READ_DATA_AVAIL; - if (intel_de_wait_for_set(display, MIPI_INTR_STAT(display, port), - data_mask, 50)) + if (intel_de_wait_for_set_ms(display, MIPI_INTR_STAT(display, port), + data_mask, 50)) drm_err(display->drm, "Timeout waiting for read data.\n"); @@ -246,7 +246,7 @@ static int dpi_send_cmd(struct intel_dsi *intel_dsi, u32 cmd, bool hs, intel_de_write(display, MIPI_DPI_CONTROL(display, port), cmd); mask = SPL_PKT_SENT_INTERRUPT; - if (intel_de_wait_for_set(display, MIPI_INTR_STAT(display, port), mask, 100)) + if (intel_de_wait_for_set_ms(display, MIPI_INTR_STAT(display, port), mask, 100)) drm_err(display->drm, "Video mode command 0x%08x send failed.\n", cmd); @@ -352,8 +352,8 @@ static bool glk_dsi_enable_io(struct intel_encoder *encoder) /* Wait for Pwr ACK */ for_each_dsi_port(port, intel_dsi->ports) { - if (intel_de_wait_for_set(display, MIPI_CTRL(display, port), - GLK_MIPIIO_PORT_POWERED, 20)) + if (intel_de_wait_for_set_ms(display, MIPI_CTRL(display, port), + GLK_MIPIIO_PORT_POWERED, 20)) drm_err(display->drm, "MIPIO port is powergated\n"); } @@ -374,8 +374,8 @@ static void glk_dsi_device_ready(struct intel_encoder *encoder) /* Wait for MIPI PHY status bit to set */ for_each_dsi_port(port, intel_dsi->ports) { - if (intel_de_wait_for_set(display, MIPI_CTRL(display, port), - GLK_PHY_STATUS_PORT_READY, 20)) + if (intel_de_wait_for_set_ms(display, MIPI_CTRL(display, port), + GLK_PHY_STATUS_PORT_READY, 20)) drm_err(display->drm, "PHY is not ON\n"); } @@ -394,8 +394,8 @@ static void glk_dsi_device_ready(struct intel_encoder *encoder) ULPS_STATE_MASK, ULPS_STATE_ENTER | DEVICE_READY); /* Wait for ULPS active */ - if (intel_de_wait_for_clear(display, MIPI_CTRL(display, port), - GLK_ULPS_NOT_ACTIVE, 20)) + if (intel_de_wait_for_clear_ms(display, MIPI_CTRL(display, port), + GLK_ULPS_NOT_ACTIVE, 20)) drm_err(display->drm, "ULPS not active\n"); /* Exit ULPS */ @@ -413,16 +413,16 @@ static void glk_dsi_device_ready(struct intel_encoder *encoder) /* Wait for Stop state */ for_each_dsi_port(port, intel_dsi->ports) { - if (intel_de_wait_for_set(display, MIPI_CTRL(display, port), - GLK_DATA_LANE_STOP_STATE, 20)) + if (intel_de_wait_for_set_ms(display, MIPI_CTRL(display, port), + GLK_DATA_LANE_STOP_STATE, 20)) drm_err(display->drm, "Date lane not in STOP state\n"); } /* Wait for AFE LATCH */ for_each_dsi_port(port, intel_dsi->ports) { - if (intel_de_wait_for_set(display, BXT_MIPI_PORT_CTRL(port), - AFE_LATCHOUT, 20)) + if (intel_de_wait_for_set_ms(display, BXT_MIPI_PORT_CTRL(port), + AFE_LATCHOUT, 20)) drm_err(display->drm, "D-PHY not entering LP-11 state\n"); } @@ -519,15 +519,15 @@ static void glk_dsi_enter_low_power_mode(struct intel_encoder *encoder) /* Wait for MIPI PHY status bit to unset */ for_each_dsi_port(port, intel_dsi->ports) { - if (intel_de_wait_for_clear(display, MIPI_CTRL(display, port), - GLK_PHY_STATUS_PORT_READY, 20)) + if (intel_de_wait_for_clear_ms(display, MIPI_CTRL(display, port), + GLK_PHY_STATUS_PORT_READY, 20)) drm_err(display->drm, "PHY is not turning OFF\n"); } /* Wait for Pwr ACK bit to unset */ for_each_dsi_port(port, intel_dsi->ports) { - if (intel_de_wait_for_clear(display, MIPI_CTRL(display, port), - GLK_MIPIIO_PORT_POWERED, 20)) + if (intel_de_wait_for_clear_ms(display, MIPI_CTRL(display, port), + GLK_MIPIIO_PORT_POWERED, 20)) drm_err(display->drm, "MIPI IO Port is not powergated\n"); } @@ -544,8 +544,8 @@ static void glk_dsi_disable_mipi_io(struct intel_encoder *encoder) /* Wait for MIPI PHY status bit to unset */ for_each_dsi_port(port, intel_dsi->ports) { - if (intel_de_wait_for_clear(display, MIPI_CTRL(display, port), - GLK_PHY_STATUS_PORT_READY, 20)) + if (intel_de_wait_for_clear_ms(display, MIPI_CTRL(display, port), + GLK_PHY_STATUS_PORT_READY, 20)) drm_err(display->drm, "PHY is not turning OFF\n"); } @@ -595,8 +595,8 @@ static void vlv_dsi_clear_device_ready(struct intel_encoder *encoder) * Port A only. MIPI Port C has no similar bit for checking. */ if ((display->platform.broxton || port == PORT_A) && - intel_de_wait_for_clear(display, port_ctrl, - AFE_LATCHOUT, 30)) + intel_de_wait_for_clear_ms(display, port_ctrl, + AFE_LATCHOUT, 30)) drm_err(display->drm, "DSI LP not going Low\n"); /* Disable MIPI PHY transparent latch */ diff --git a/drivers/gpu/drm/i915/display/vlv_dsi_pll.c b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c index f078b9cda96c..a2da6285890b 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi_pll.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c @@ -319,8 +319,8 @@ void bxt_dsi_pll_disable(struct intel_encoder *encoder) * PLL lock should deassert within 200us. * Wait up to 1ms before timing out. */ - if (intel_de_wait_for_clear(display, BXT_DSI_PLL_ENABLE, - BXT_DSI_PLL_LOCKED, 1)) + if (intel_de_wait_for_clear_ms(display, BXT_DSI_PLL_ENABLE, + BXT_DSI_PLL_LOCKED, 1)) drm_err(display->drm, "Timeout waiting for PLL lock deassertion\n"); } @@ -568,8 +568,8 @@ void bxt_dsi_pll_enable(struct intel_encoder *encoder, intel_de_rmw(display, BXT_DSI_PLL_ENABLE, 0, BXT_DSI_PLL_DO_ENABLE); /* Timeout and fail if PLL not locked */ - if (intel_de_wait_for_set(display, BXT_DSI_PLL_ENABLE, - BXT_DSI_PLL_LOCKED, 1)) { + if (intel_de_wait_for_set_ms(display, BXT_DSI_PLL_ENABLE, + BXT_DSI_PLL_LOCKED, 1)) { drm_err(display->drm, "Timed out waiting for DSI PLL to lock\n"); return; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index cd9686a7ded2..189ecdd0a9c1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -194,8 +194,8 @@ i915_gem_dumb_create(struct drm_file *file, args->pitch = ALIGN(args->width * cpp, 64); /* align stride to page size so that we can remap */ - if (args->pitch > intel_plane_fb_max_stride(dev, format, - DRM_FORMAT_MOD_LINEAR)) + if (args->pitch > intel_dumb_fb_max_stride(dev, format, + DRM_FORMAT_MOD_LINEAR)) args->pitch = ALIGN(args->pitch, 4096); if (args->pitch < args->width) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index e1348a0043db..3f6f040c359d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -477,24 +477,24 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj) void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj, enum fb_op_origin origin) { - struct intel_frontbuffer *front; + struct i915_frontbuffer *front; - front = i915_gem_object_get_frontbuffer(obj); + front = i915_gem_object_frontbuffer_lookup(obj); if (front) { - intel_frontbuffer_flush(front, origin); - intel_frontbuffer_put(front); + intel_frontbuffer_flush(&front->base, origin); + i915_gem_object_frontbuffer_put(front); } } void __i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj, enum fb_op_origin origin) { - struct intel_frontbuffer *front; + struct i915_frontbuffer *front; - front = i915_gem_object_get_frontbuffer(obj); + front = i915_gem_object_frontbuffer_lookup(obj); if (front) { - intel_frontbuffer_invalidate(front, origin); - intel_frontbuffer_put(front); + intel_frontbuffer_invalidate(&front->base, origin); + i915_gem_object_frontbuffer_put(front); } } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.c new file mode 100644 index 000000000000..aaa15e7b3f17 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: MIT +/* Copyright © 2025 Intel Corporation */ + +#include "i915_drv.h" +#include "i915_gem_object_frontbuffer.h" + +static int frontbuffer_active(struct i915_active *ref) +{ + struct i915_frontbuffer *front = + container_of(ref, typeof(*front), write); + + kref_get(&front->ref); + return 0; +} + +static void frontbuffer_retire(struct i915_active *ref) +{ + struct i915_frontbuffer *front = + container_of(ref, typeof(*front), write); + + intel_frontbuffer_flush(&front->base, ORIGIN_CS); + i915_gem_object_frontbuffer_put(front); +} + +struct i915_frontbuffer * +i915_gem_object_frontbuffer_get(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_frontbuffer *front, *cur; + + front = i915_gem_object_frontbuffer_lookup(obj); + if (front) + return front; + + front = kmalloc(sizeof(*front), GFP_KERNEL); + if (!front) + return NULL; + + intel_frontbuffer_init(&front->base, &i915->drm); + + kref_init(&front->ref); + i915_gem_object_get(obj); + front->obj = obj; + + i915_active_init(&front->write, + frontbuffer_active, + frontbuffer_retire, + I915_ACTIVE_RETIRE_SLEEPS); + + spin_lock(&i915->frontbuffer_lock); + if (rcu_access_pointer(obj->frontbuffer)) { + cur = rcu_dereference_protected(obj->frontbuffer, true); + kref_get(&cur->ref); + } else { + cur = front; + rcu_assign_pointer(obj->frontbuffer, front); + } + spin_unlock(&i915->frontbuffer_lock); + + if (cur != front) { + i915_gem_object_put(obj); + intel_frontbuffer_fini(&front->base); + kfree(front); + } + + return cur; +} + +void i915_gem_object_frontbuffer_ref(struct i915_frontbuffer *front) +{ + kref_get(&front->ref); +} + +static void frontbuffer_release(struct kref *ref) + __releases(&i915->frontbuffer_lock) +{ + struct i915_frontbuffer *front = + container_of(ref, typeof(*front), ref); + struct drm_i915_gem_object *obj = front->obj; + struct drm_i915_private *i915 = to_i915(obj->base.dev); + + i915_ggtt_clear_scanout(obj); + + RCU_INIT_POINTER(obj->frontbuffer, NULL); + + spin_unlock(&i915->frontbuffer_lock); + + i915_active_fini(&front->write); + + i915_gem_object_put(obj); + + intel_frontbuffer_fini(&front->base); + + kfree_rcu(front, rcu); +} + +void i915_gem_object_frontbuffer_put(struct i915_frontbuffer *front) +{ + struct drm_i915_private *i915 = to_i915(front->obj->base.dev); + + kref_put_lock(&front->ref, frontbuffer_release, + &i915->frontbuffer_lock); +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h b/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h index b682969e3a29..2133e29047c5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h @@ -12,6 +12,14 @@ #include "display/intel_frontbuffer.h" #include "i915_gem_object_types.h" +struct i915_frontbuffer { + struct intel_frontbuffer base; + struct drm_i915_gem_object *obj; + struct i915_active write; + struct rcu_head rcu; + struct kref ref; +}; + void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj, enum fb_op_origin origin); void __i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj, @@ -33,19 +41,23 @@ i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj, __i915_gem_object_invalidate_frontbuffer(obj, origin); } +struct i915_frontbuffer *i915_gem_object_frontbuffer_get(struct drm_i915_gem_object *obj); +void i915_gem_object_frontbuffer_ref(struct i915_frontbuffer *front); +void i915_gem_object_frontbuffer_put(struct i915_frontbuffer *front); + /** - * i915_gem_object_get_frontbuffer - Get the object's frontbuffer - * @obj: The object whose frontbuffer to get. + * i915_gem_object_frontbuffer_lookup - Look up the object's frontbuffer + * @obj: The object whose frontbuffer to look up. * * Get pointer to object's frontbuffer if such exists. Please note that RCU * mechanism is used to handle e.g. ongoing removal of frontbuffer pointer. * * Return: pointer to object's frontbuffer is such exists or NULL */ -static inline struct intel_frontbuffer * -i915_gem_object_get_frontbuffer(const struct drm_i915_gem_object *obj) +static inline struct i915_frontbuffer * +i915_gem_object_frontbuffer_lookup(const struct drm_i915_gem_object *obj) { - struct intel_frontbuffer *front; + struct i915_frontbuffer *front; if (likely(!rcu_access_pointer(obj->frontbuffer))) return NULL; @@ -62,41 +74,11 @@ i915_gem_object_get_frontbuffer(const struct drm_i915_gem_object *obj) if (likely(front == rcu_access_pointer(obj->frontbuffer))) break; - intel_frontbuffer_put(front); + i915_gem_object_frontbuffer_put(front); } while (1); rcu_read_unlock(); return front; } -/** - * i915_gem_object_set_frontbuffer - Set the object's frontbuffer - * @obj: The object whose frontbuffer to set. - * @front: The frontbuffer to set - * - * Set object's frontbuffer pointer. If frontbuffer is already set for the - * object keep it and return it's pointer to the caller. Please note that RCU - * mechanism is used to handle e.g. ongoing removal of frontbuffer pointer. This - * function is protected by i915->display->fb_tracking.lock - * - * Return: pointer to frontbuffer which was set. - */ -static inline struct intel_frontbuffer * -i915_gem_object_set_frontbuffer(struct drm_i915_gem_object *obj, - struct intel_frontbuffer *front) -{ - struct intel_frontbuffer *cur = front; - - if (!front) { - RCU_INIT_POINTER(obj->frontbuffer, NULL); - } else if (rcu_access_pointer(obj->frontbuffer)) { - cur = rcu_dereference_protected(obj->frontbuffer, true); - kref_get(&cur->ref); - } else { - rcu_assign_pointer(obj->frontbuffer, front); - } - - return cur; -} - #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 64600aa8227f..465ce94aee76 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -574,7 +574,7 @@ struct drm_i915_gem_object { */ u16 write_domain; - struct intel_frontbuffer __rcu *frontbuffer; + struct i915_frontbuffer __rcu *frontbuffer; /** Current tiling stride for the object, if it's tiled. */ unsigned int tiling_and_stride; diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.h b/drivers/gpu/drm/i915/gt/intel_tlb.h index 337327af92ac..ec7612216248 100644 --- a/drivers/gpu/drm/i915/gt/intel_tlb.h +++ b/drivers/gpu/drm/i915/gt/intel_tlb.h @@ -18,7 +18,7 @@ void intel_gt_fini_tlb(struct intel_gt *gt); static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt) { - return seqprop_sequence(>->tlb.seqno); + return raw_read_seqcount(>->tlb.seqno); } static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index 54bc447efce0..fdf0e9858607 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -710,7 +710,14 @@ static int threaded_migrate(struct intel_migrate *migrate, thread[i].tsk = tsk; } - msleep(10 * n_cpus); /* start all threads before we kthread_stop() */ + /* + * Start all threads before we kthread_stop(). + * In CHV / BXT+VTD environments, where VMA pinning is committed + * asynchronously, empirically determined 100ms delay is needed + * to avoid stopping threads that may still wait for completion of + * intel_ggtt_bind_vma and fail with -ERESTARTSYS when interrupted. + */ + msleep((intel_vm_no_concurrent_access_wa(migrate->context->vm->i915) ? 100 : 10) * n_cpus); for (i = 0; i < n_cpus; ++i) { struct task_struct *tsk = thread[i].tsk; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 95f9ddf22ce4..5381a934a671 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -311,6 +311,8 @@ struct drm_i915_private { struct file *mmap_singleton; } gem; + spinlock_t frontbuffer_lock; /* protects obj->frontbuffer (write-side) */ + struct intel_pxp *pxp; struct i915_pmu pmu; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 638fe20dfe54..4c82c9544b93 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1299,6 +1299,8 @@ void i915_gem_init_early(struct drm_i915_private *dev_priv) { i915_gem_init__mm(dev_priv); i915_gem_init__contexts(dev_priv); + + spin_lock_init(&dev_priv->frontbuffer_lock); } void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 354ef75ef6a5..5bf3b4ab2baa 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1233,6 +1233,7 @@ #define OROM_OFFSET_MASK REG_GENMASK(20, 16) #define MTL_MEM_SS_INFO_GLOBAL _MMIO(0x45700) +#define XE3P_ECC_IMPACTING_DE REG_BIT(12) #define MTL_N_OF_ENABLED_QGV_POINTS_MASK REG_GENMASK(11, 8) #define MTL_N_OF_POPULATED_CH_MASK REG_GENMASK(7, 4) #define MTL_DDR_TYPE_MASK REG_GENMASK(3, 0) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index ef9c272e60e5..2c0a63664e13 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1597,8 +1597,20 @@ err_unlock: err_vma_res: i915_vma_resource_free(vma_res); err_fence: - if (work) - dma_fence_work_commit_imm(&work->base); + if (work) { + /* + * When pinning VMA to GGTT on CHV or BXT with VTD enabled, + * commit VMA binding asynchronously to avoid risk of lock + * inversion among reservation_ww locks held here and + * cpu_hotplug_lock acquired from stop_machine(), which we + * wrap around GGTT updates when running in those environments. + */ + if (i915_vma_is_ggtt(vma) && + intel_vm_no_concurrent_access_wa(vma->vm->i915)) + dma_fence_work_commit(&work->base); + else + dma_fence_work_commit_imm(&work->base); + } err_rpm: intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); @@ -1992,13 +2004,13 @@ int _i915_vma_move_to_active(struct i915_vma *vma, } if (flags & EXEC_OBJECT_WRITE) { - struct intel_frontbuffer *front; + struct i915_frontbuffer *front; - front = i915_gem_object_get_frontbuffer(obj); + front = i915_gem_object_frontbuffer_lookup(obj); if (unlikely(front)) { - if (intel_frontbuffer_invalidate(front, ORIGIN_CS)) + if (intel_frontbuffer_invalidate(&front->base, ORIGIN_CS)) i915_active_add_request(&front->write, rq); - intel_frontbuffer_put(front); + i915_gem_object_frontbuffer_put(front); } } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 7ab4c4e60264..0a86e4857539 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1118,6 +1118,10 @@ static int misaligned_case(struct i915_address_space *vm, struct intel_memory_re goto err_put; } + /* make sure page_sizes_gtt has been populated before use */ + if (i915_is_ggtt(vm) && intel_vm_no_concurrent_access_wa(vm->i915)) + i915_vma_wait_for_bind(vma); + expected_vma_size = round_up(size, 1 << (ffs(vma->resource->page_sizes_gtt) - 1)); expected_node_size = expected_vma_size; diff --git a/drivers/gpu/drm/i915/soc/intel_dram.c b/drivers/gpu/drm/i915/soc/intel_dram.c index 2e16346a6cc0..3e588762709a 100644 --- a/drivers/gpu/drm/i915/soc/intel_dram.c +++ b/drivers/gpu/drm/i915/soc/intel_dram.c @@ -686,6 +686,7 @@ static int gen12_get_dram_info(struct drm_i915_private *i915, struct dram_info * static int xelpdp_get_dram_info(struct drm_i915_private *i915, struct dram_info *dram_info) { + struct intel_display *display = i915->display; u32 val = intel_uncore_read(&i915->uncore, MTL_MEM_SS_INFO_GLOBAL); switch (REG_FIELD_GET(MTL_DDR_TYPE_MASK, val)) { @@ -724,6 +725,9 @@ static int xelpdp_get_dram_info(struct drm_i915_private *i915, struct dram_info dram_info->num_qgv_points = REG_FIELD_GET(MTL_N_OF_ENABLED_QGV_POINTS_MASK, val); /* PSF GV points not supported in D14+ */ + if (DISPLAY_VER(display) >= 35) + dram_info->ecc_impacting_de_bw = REG_FIELD_GET(XE3P_ECC_IMPACTING_DE, val); + return 0; } diff --git a/drivers/gpu/drm/i915/soc/intel_dram.h b/drivers/gpu/drm/i915/soc/intel_dram.h index 03a973f1c941..8475ee379daa 100644 --- a/drivers/gpu/drm/i915/soc/intel_dram.h +++ b/drivers/gpu/drm/i915/soc/intel_dram.h @@ -30,6 +30,7 @@ struct dram_info { u8 num_channels; u8 num_qgv_points; u8 num_psf_gv_points; + bool ecc_impacting_de_bw; /* Only valid from Xe3p_LPD onward. */ bool symmetric_memory; bool has_16gb_dimms; }; diff --git a/drivers/gpu/drm/i915/soc/intel_rom.c b/drivers/gpu/drm/i915/soc/intel_rom.c index 243d98cab8c3..2f17dc856e7f 100644 --- a/drivers/gpu/drm/i915/soc/intel_rom.c +++ b/drivers/gpu/drm/i915/soc/intel_rom.c @@ -39,8 +39,9 @@ static u16 spi_read16(struct intel_rom *rom, loff_t offset) return spi_read32(rom, offset) & 0xffff; } -struct intel_rom *intel_rom_spi(struct drm_i915_private *i915) +struct intel_rom *intel_rom_spi(struct drm_device *drm) { + struct drm_i915_private *i915 = to_i915(drm); struct intel_rom *rom; u32 static_region; @@ -85,7 +86,7 @@ static void pci_free(struct intel_rom *rom) pci_unmap_rom(rom->pdev, rom->oprom); } -struct intel_rom *intel_rom_pci(struct drm_i915_private *i915) +struct intel_rom *intel_rom_pci(struct drm_device *drm) { struct intel_rom *rom; @@ -93,7 +94,7 @@ struct intel_rom *intel_rom_pci(struct drm_i915_private *i915) if (!rom) return NULL; - rom->pdev = to_pci_dev(i915->drm.dev); + rom->pdev = to_pci_dev(drm->dev); rom->oprom = pci_map_rom(rom->pdev, &rom->size); if (!rom->oprom) { diff --git a/drivers/gpu/drm/i915/soc/intel_rom.h b/drivers/gpu/drm/i915/soc/intel_rom.h index fb2979c8ef7f..4e59a375787e 100644 --- a/drivers/gpu/drm/i915/soc/intel_rom.h +++ b/drivers/gpu/drm/i915/soc/intel_rom.h @@ -8,11 +8,11 @@ #include <linux/types.h> -struct drm_i915_private; +struct drm_device; struct intel_rom; -struct intel_rom *intel_rom_spi(struct drm_i915_private *i915); -struct intel_rom *intel_rom_pci(struct drm_i915_private *i915); +struct intel_rom *intel_rom_spi(struct drm_device *drm); +struct intel_rom *intel_rom_pci(struct drm_device *drm); u32 intel_rom_read32(struct intel_rom *rom, loff_t offset); u16 intel_rom_read16(struct intel_rom *rom, loff_t offset); diff --git a/drivers/gpu/drm/imx/ipuv3/imx-drm-core.c b/drivers/gpu/drm/imx/ipuv3/imx-drm-core.c index 465b5a6ad5bb..eddb471119c6 100644 --- a/drivers/gpu/drm/imx/ipuv3/imx-drm-core.c +++ b/drivers/gpu/drm/imx/ipuv3/imx-drm-core.c @@ -144,7 +144,6 @@ static int imx_drm_dumb_create(struct drm_file *file_priv, struct drm_mode_create_dumb *args) { u32 fourcc; - const struct drm_format_info *info; u64 pitch_align; int ret; @@ -156,12 +155,15 @@ static int imx_drm_dumb_create(struct drm_file *file_priv, * the allocated buffer. */ fourcc = drm_driver_color_mode_format(drm, args->bpp); - if (fourcc == DRM_FORMAT_INVALID) - return -EINVAL; - info = drm_format_info(fourcc); - if (!info) - return -EINVAL; - pitch_align = drm_format_info_min_pitch(info, 0, SZ_8); + if (fourcc != DRM_FORMAT_INVALID) { + const struct drm_format_info *info = drm_format_info(fourcc); + + if (!info) + return -EINVAL; + pitch_align = drm_format_info_min_pitch(info, 0, 8); + } else { + pitch_align = DIV_ROUND_UP(args->bpp, SZ_8) * 8; + } if (!pitch_align || pitch_align > U32_MAX) return -EINVAL; ret = drm_mode_size_dumb(drm, args, pitch_align, 0); diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c index da53ca88251e..e8066f9fd534 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c @@ -527,13 +527,14 @@ static void mdp4_crtc_wait_for_flush_done(struct drm_crtc *crtc) struct drm_device *dev = crtc->dev; struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); struct mdp4_kms *mdp4_kms = get_kms(crtc); + wait_queue_head_t *queue = drm_crtc_vblank_waitqueue(crtc); int ret; ret = drm_crtc_vblank_get(crtc); if (ret) return; - ret = wait_event_timeout(dev->vblank[drm_crtc_index(crtc)].queue, + ret = wait_event_timeout(*queue, !(mdp4_read(mdp4_kms, REG_MDP4_OVERLAY_FLUSH) & mdp4_crtc->flushed_mask), msecs_to_jiffies(50)); diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c index 4c4900a7beda..373ae7d9bf01 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c @@ -1234,6 +1234,7 @@ static void mdp5_crtc_wait_for_flush_done(struct drm_crtc *crtc) struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc); struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state); struct mdp5_ctl *ctl = mdp5_cstate->ctl; + wait_queue_head_t *queue = drm_crtc_vblank_waitqueue(crtc); int ret; /* Should not call this function if crtc is disabled. */ @@ -1244,7 +1245,7 @@ static void mdp5_crtc_wait_for_flush_done(struct drm_crtc *crtc) if (ret) return; - ret = wait_event_timeout(dev->vblank[drm_crtc_index(crtc)].queue, + ret = wait_event_timeout(*queue, ((mdp5_ctl_get_commit_status(ctl) & mdp5_crtc->flushed_mask) == 0), msecs_to_jiffies(50)); diff --git a/drivers/gpu/drm/nouveau/include/nvfw/hs.h b/drivers/gpu/drm/nouveau/include/nvfw/hs.h index 8b58b668fc0c..c78ab11ec3ac 100644 --- a/drivers/gpu/drm/nouveau/include/nvfw/hs.h +++ b/drivers/gpu/drm/nouveau/include/nvfw/hs.h @@ -52,7 +52,9 @@ struct nvfw_hs_load_header_v2 { struct { u32 offset; u32 size; - } app[]; + u32 data_offset; + u32 data_size; + } app[] __counted_by(num_apps); }; const struct nvfw_hs_load_header_v2 *nvfw_hs_load_header_v2(struct nvkm_subdev *, const void *); diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 84a275b06295..0e409414f44d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -10,7 +10,7 @@ #define DRIVER_MAJOR 1 #define DRIVER_MINOR 4 -#define DRIVER_PATCHLEVEL 0 +#define DRIVER_PATCHLEVEL 1 /* * 1.1.1: @@ -35,6 +35,8 @@ * programs that get directly linked with NVKM. * 1.3.1: * - implemented limited ABI16/NVIF interop + * 1.4.1: + * - add variable page sizes and compression for Turing+ */ #include <linux/notifier.h> diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index 79eefdfd08a2..f10809115c56 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -107,34 +107,34 @@ nouveau_uvmm_vmm_sparse_unref(struct nouveau_uvmm *uvmm, static int nouveau_uvmm_vmm_get(struct nouveau_uvmm *uvmm, - u64 addr, u64 range) + u64 addr, u64 range, u8 page_shift) { struct nvif_vmm *vmm = &uvmm->vmm.vmm; - return nvif_vmm_raw_get(vmm, addr, range, PAGE_SHIFT); + return nvif_vmm_raw_get(vmm, addr, range, page_shift); } static int nouveau_uvmm_vmm_put(struct nouveau_uvmm *uvmm, - u64 addr, u64 range) + u64 addr, u64 range, u8 page_shift) { struct nvif_vmm *vmm = &uvmm->vmm.vmm; - return nvif_vmm_raw_put(vmm, addr, range, PAGE_SHIFT); + return nvif_vmm_raw_put(vmm, addr, range, page_shift); } static int nouveau_uvmm_vmm_unmap(struct nouveau_uvmm *uvmm, - u64 addr, u64 range, bool sparse) + u64 addr, u64 range, u8 page_shift, bool sparse) { struct nvif_vmm *vmm = &uvmm->vmm.vmm; - return nvif_vmm_raw_unmap(vmm, addr, range, PAGE_SHIFT, sparse); + return nvif_vmm_raw_unmap(vmm, addr, range, page_shift, sparse); } static int nouveau_uvmm_vmm_map(struct nouveau_uvmm *uvmm, - u64 addr, u64 range, + u64 addr, u64 range, u8 page_shift, u64 bo_offset, u8 kind, struct nouveau_mem *mem) { @@ -163,7 +163,7 @@ nouveau_uvmm_vmm_map(struct nouveau_uvmm *uvmm, return -ENOSYS; } - return nvif_vmm_raw_map(vmm, addr, range, PAGE_SHIFT, + return nvif_vmm_raw_map(vmm, addr, range, page_shift, &args, argc, &mem->mem, bo_offset); } @@ -182,8 +182,9 @@ nouveau_uvma_vmm_put(struct nouveau_uvma *uvma) { u64 addr = uvma->va.va.addr; u64 range = uvma->va.va.range; + u8 page_shift = uvma->page_shift; - return nouveau_uvmm_vmm_put(to_uvmm(uvma), addr, range); + return nouveau_uvmm_vmm_put(to_uvmm(uvma), addr, range, page_shift); } static int @@ -193,9 +194,11 @@ nouveau_uvma_map(struct nouveau_uvma *uvma, u64 addr = uvma->va.va.addr; u64 offset = uvma->va.gem.offset; u64 range = uvma->va.va.range; + u8 page_shift = uvma->page_shift; return nouveau_uvmm_vmm_map(to_uvmm(uvma), addr, range, - offset, uvma->kind, mem); + page_shift, offset, uvma->kind, + mem); } static int @@ -203,12 +206,13 @@ nouveau_uvma_unmap(struct nouveau_uvma *uvma) { u64 addr = uvma->va.va.addr; u64 range = uvma->va.va.range; + u8 page_shift = uvma->page_shift; bool sparse = !!uvma->region; if (drm_gpuva_invalidated(&uvma->va)) return 0; - return nouveau_uvmm_vmm_unmap(to_uvmm(uvma), addr, range, sparse); + return nouveau_uvmm_vmm_unmap(to_uvmm(uvma), addr, range, page_shift, sparse); } static int @@ -450,6 +454,62 @@ op_unmap_prepare_unwind(struct drm_gpuva *va) drm_gpuva_insert(va->vm, va); } +static bool +op_map_aligned_to_page_shift(const struct drm_gpuva_op_map *op, u8 page_shift) +{ + u64 non_page_bits = (1ULL << page_shift) - 1; + + return (op->va.addr & non_page_bits) == 0 && + (op->va.range & non_page_bits) == 0 && + (op->gem.offset & non_page_bits) == 0; +} + +static u8 +select_page_shift(struct nouveau_uvmm *uvmm, struct drm_gpuva_op_map *op) +{ + struct nouveau_bo *nvbo = nouveau_gem_object(op->gem.obj); + + /* nouveau_bo_fixup_align() guarantees that the page size will be aligned + * for most cases, but it can't handle cases where userspace allocates with + * a size and then binds with a smaller granularity. So in order to avoid + * breaking old userspace, we need to ensure that the VA is actually + * aligned before using it, and if it isn't, then we downgrade to the first + * granularity that will fit, which is optimal from a correctness and + * performance perspective. + */ + if (op_map_aligned_to_page_shift(op, nvbo->page)) + return nvbo->page; + + struct nouveau_mem *mem = nouveau_mem(nvbo->bo.resource); + struct nvif_vmm *vmm = &uvmm->vmm.vmm; + int i; + + /* If the given granularity doesn't fit, let's find one that will fit. */ + for (i = 0; i < vmm->page_nr; i++) { + /* Ignore anything that is bigger or identical to the BO preference. */ + if (vmm->page[i].shift >= nvbo->page) + continue; + + /* Skip incompatible domains. */ + if ((mem->mem.type & NVIF_MEM_VRAM) && !vmm->page[i].vram) + continue; + if ((mem->mem.type & NVIF_MEM_HOST) && + (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT)) + continue; + + /* If it fits, return the proposed shift. */ + if (op_map_aligned_to_page_shift(op, vmm->page[i].shift)) + return vmm->page[i].shift; + } + + /* If we get here then nothing can reconcile the requirements. This should never + * happen. + */ + drm_WARN_ONCE(op->gem.obj->dev, 1, "Could not find an appropriate page size.\n"); + + return PAGE_SHIFT; +} + static void nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm, struct nouveau_uvma_prealloc *new, @@ -501,7 +561,8 @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm, if (vmm_get_range) nouveau_uvmm_vmm_put(uvmm, vmm_get_start, - vmm_get_range); + vmm_get_range, + select_page_shift(uvmm, &op->map)); break; } case DRM_GPUVA_OP_REMAP: { @@ -528,6 +589,7 @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm, u64 ustart = va->va.addr; u64 urange = va->va.range; u64 uend = ustart + urange; + u8 page_shift = uvma_from_va(va)->page_shift; /* Nothing to do for mappings we merge with. */ if (uend == vmm_get_start || @@ -538,7 +600,8 @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm, u64 vmm_get_range = ustart - vmm_get_start; nouveau_uvmm_vmm_put(uvmm, vmm_get_start, - vmm_get_range); + vmm_get_range, + page_shift); } vmm_get_start = uend; break; @@ -592,6 +655,7 @@ op_map_prepare(struct nouveau_uvmm *uvmm, uvma->region = args->region; uvma->kind = args->kind; + uvma->page_shift = select_page_shift(uvmm, op); drm_gpuva_map(&uvmm->base, &uvma->va, op); @@ -633,7 +697,8 @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm, if (vmm_get_range) { ret = nouveau_uvmm_vmm_get(uvmm, vmm_get_start, - vmm_get_range); + vmm_get_range, + new->map->page_shift); if (ret) { op_map_prepare_unwind(new->map); goto unwind; @@ -689,6 +754,7 @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm, u64 ustart = va->va.addr; u64 urange = va->va.range; u64 uend = ustart + urange; + u8 page_shift = uvma_from_va(va)->page_shift; op_unmap_prepare(u); @@ -704,7 +770,7 @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm, u64 vmm_get_range = ustart - vmm_get_start; ret = nouveau_uvmm_vmm_get(uvmm, vmm_get_start, - vmm_get_range); + vmm_get_range, page_shift); if (ret) { op_unmap_prepare_unwind(va); goto unwind; @@ -799,10 +865,11 @@ op_unmap_range(struct drm_gpuva_op_unmap *u, u64 addr, u64 range) { struct nouveau_uvma *uvma = uvma_from_va(u->va); + u8 page_shift = uvma->page_shift; bool sparse = !!uvma->region; if (!drm_gpuva_invalidated(u->va)) - nouveau_uvmm_vmm_unmap(to_uvmm(uvma), addr, range, sparse); + nouveau_uvmm_vmm_unmap(to_uvmm(uvma), addr, range, page_shift, sparse); } static void @@ -882,6 +949,7 @@ nouveau_uvmm_sm_cleanup(struct nouveau_uvmm *uvmm, struct drm_gpuva_op_map *n = r->next; struct drm_gpuva *va = r->unmap->va; struct nouveau_uvma *uvma = uvma_from_va(va); + u8 page_shift = uvma->page_shift; if (unmap) { u64 addr = va->va.addr; @@ -893,7 +961,7 @@ nouveau_uvmm_sm_cleanup(struct nouveau_uvmm *uvmm, if (n) end = n->va.addr; - nouveau_uvmm_vmm_put(uvmm, addr, end - addr); + nouveau_uvmm_vmm_put(uvmm, addr, end - addr, page_shift); } nouveau_uvma_gem_put(uvma); diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.h b/drivers/gpu/drm/nouveau/nouveau_uvmm.h index 9d3c348581eb..51925711ae90 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h @@ -33,6 +33,7 @@ struct nouveau_uvma { struct nouveau_uvma_region *region; u8 kind; + u8 page_shift; }; #define uvmm_from_gpuvm(x) container_of((x), struct nouveau_uvmm, base) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c index 851fd847a2a9..ed15a4475181 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c @@ -21,9 +21,7 @@ */ #include "vmm.h" -#include <core/client.h> #include <subdev/fb.h> -#include <subdev/ltc.h> #include <subdev/timer.h> #include <engine/gr.h> @@ -111,13 +109,33 @@ gp100_vmm_pgt_pfn(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, nvkm_done(pt->memory); } +static inline u64 +gp100_vmm_comptag_nr(u64 size) +{ + return size >> 16; /* One comptag per 64KiB VRAM. */ +} + +static inline u64 +gp100_vmm_pte_comptagline_base(u64 addr) +{ + /* RM allocates enough comptags for all of VRAM, so use a 1:1 mapping. */ + return (1 + gp100_vmm_comptag_nr(addr)) << 36; /* NV_MMU_VER2_PTE_COMPTAGLINE */ +} + +static inline u64 +gp100_vmm_pte_comptagline_incr(u32 page_size) +{ + return gp100_vmm_comptag_nr(page_size) << 36; /* NV_MMU_VER2_PTE_COMPTAGLINE */ +} + static inline void gp100_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr) { u64 data = (addr >> 4) | map->type; - map->type += ptes * map->ctag; + if (map->ctag) + data |= gp100_vmm_pte_comptagline_base(addr); while (ptes--) { VMM_WO064(pt, vmm, ptei++ * 8, data); @@ -142,7 +160,6 @@ gp100_vmm_pgt_dma(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, while (ptes--) { const u64 data = (*map->dma++ >> 4) | map->type; VMM_WO064(pt, vmm, ptei++ * 8, data); - map->type += map->ctag; } nvkm_done(pt->memory); return; @@ -200,7 +217,8 @@ gp100_vmm_pd0_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, { u64 data = (addr >> 4) | map->type; - map->type += ptes * map->ctag; + if (map->ctag) + data |= gp100_vmm_pte_comptagline_base(addr); while (ptes--) { VMM_WO128(pt, vmm, ptei++ * 0x10, data, 0ULL); @@ -411,8 +429,6 @@ gp100_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, struct gp100_vmm_map_vn vn; struct gp100_vmm_map_v0 v0; } *args = argv; - struct nvkm_device *device = vmm->mmu->subdev.device; - struct nvkm_memory *memory = map->memory; u8 kind, kind_inv, priv, ro, vol; int kindn, aper, ret = -ENOSYS; const u8 *kindm; @@ -449,29 +465,24 @@ gp100_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, return -EINVAL; } + /* Handle compression. */ if (kindm[kind] != kind) { - u64 tags = nvkm_memory_size(memory) >> 16; - if (aper != 0 || !(page->type & NVKM_VMM_PAGE_COMP)) { - VMM_DEBUG(vmm, "comp %d %02x", aper, page->type); - return -EINVAL; - } - - if (!map->no_comp) { - ret = nvkm_memory_tags_get(memory, device, tags, - nvkm_ltc_tags_clear, - &map->tags); - if (ret) { - VMM_DEBUG(vmm, "comp %d", ret); - return ret; + struct nvkm_device *device = vmm->mmu->subdev.device; + + /* Compression is only supported when using GSP-RM, as + * PMU firmware is required in order to initialise the + * compbit backing store. + */ + if (nvkm_gsp_rm(device->gsp)) { + /* Turing GPUs require PTE_COMPTAGLINE to be filled, + * in addition to specifying a compressed kind. + */ + if (device->card_type < GA100) { + map->ctag = gp100_vmm_pte_comptagline_incr(1 << map->page->shift); + map->next |= map->ctag; } - } - - if (!map->no_comp && map->tags->mn) { - tags = map->tags->mn->offset + (map->offset >> 16); - map->ctag |= ((1ULL << page->shift) >> 16) << 36; - map->type |= tags << 36; - map->next |= map->ctag; } else { + /* Revert to non-compressed kind. */ kind = kindm[kind]; } } @@ -592,8 +603,8 @@ gp100_vmm = { { 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx }, { 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx }, { 29, &gp100_vmm_desc_16[2], NVKM_VMM_PAGE_Sxxx }, - { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SVxC }, - { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SVxC }, + { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SVxx }, + { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SVxx }, { 12, &gp100_vmm_desc_12[0], NVKM_VMM_PAGE_SVHx }, {} } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c index e081239afe58..5791d134962b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c @@ -34,8 +34,8 @@ gp10b_vmm = { { 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx }, { 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx }, { 29, &gp100_vmm_desc_16[2], NVKM_VMM_PAGE_Sxxx }, - { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SxHC }, - { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SxHC }, + { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SxHx }, + { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SxHx }, { 12, &gp100_vmm_desc_12[0], NVKM_VMM_PAGE_SxHx }, {} } diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index 58fead90533a..6a41dfd7aaf3 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -1147,6 +1147,20 @@ static void panthor_vm_cleanup_op_ctx(struct panthor_vm_op_ctx *op_ctx, } } +static void +panthor_vm_op_ctx_return_vma(struct panthor_vm_op_ctx *op_ctx, + struct panthor_vma *vma) +{ + for (u32 i = 0; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++) { + if (!op_ctx->preallocated_vmas[i]) { + op_ctx->preallocated_vmas[i] = vma; + return; + } + } + + WARN_ON_ONCE(1); +} + static struct panthor_vma * panthor_vm_op_ctx_get_vma(struct panthor_vm_op_ctx *op_ctx) { @@ -2082,8 +2096,10 @@ static int panthor_gpuva_sm_step_map(struct drm_gpuva_op *op, void *priv) ret = panthor_vm_map_pages(vm, op->map.va.addr, flags_to_prot(vma->flags), op_ctx->map.sgt, op->map.gem.offset, op->map.va.range); - if (ret) + if (ret) { + panthor_vm_op_ctx_return_vma(op_ctx, vma); return ret; + } /* Ref owned by the mapping now, clear the obj field so we don't release the * pinning/obj ref behind GPUVA's back. diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 2463ced2427a..1d4f1b822e7b 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -1320,7 +1320,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_init_ sched->name = args->name; sched->timeout = args->timeout; sched->hang_limit = args->hang_limit; - sched->timeout_wq = args->timeout_wq ? args->timeout_wq : system_wq; + sched->timeout_wq = args->timeout_wq ? args->timeout_wq : system_percpu_wq; sched->score = args->score ? args->score : &sched->_score; sched->dev = args->dev; diff --git a/drivers/gpu/drm/sun4i/sun8i_csc.c b/drivers/gpu/drm/sun4i/sun8i_csc.c index c100d29b1a89..ce81c12f511d 100644 --- a/drivers/gpu/drm/sun4i/sun8i_csc.c +++ b/drivers/gpu/drm/sun4i/sun8i_csc.c @@ -3,11 +3,20 @@ * Copyright (C) Jernej Skrabec <jernej.skrabec@siol.net> */ +#include <drm/drm_fourcc.h> +#include <drm/drm_framebuffer.h> +#include <drm/drm_plane.h> #include <drm/drm_print.h> #include "sun8i_csc.h" #include "sun8i_mixer.h" +enum sun8i_csc_mode { + SUN8I_CSC_MODE_OFF, + SUN8I_CSC_MODE_YUV2RGB, + SUN8I_CSC_MODE_YVU2RGB, +}; + static const u32 ccsc_base[][2] = { [CCSC_MIXER0_LAYOUT] = {CCSC00_OFFSET, CCSC01_OFFSET}, [CCSC_MIXER1_LAYOUT] = {CCSC10_OFFSET, CCSC11_OFFSET}, @@ -107,23 +116,28 @@ static const u32 yuv2rgb_de3[2][3][12] = { }, }; -static void sun8i_csc_set_coefficients(struct regmap *map, u32 base, - enum sun8i_csc_mode mode, - enum drm_color_encoding encoding, - enum drm_color_range range) +static void sun8i_csc_setup(struct regmap *map, u32 base, + enum sun8i_csc_mode mode, + enum drm_color_encoding encoding, + enum drm_color_range range) { + u32 base_reg, val; const u32 *table; - u32 base_reg; int i; table = yuv2rgb[range][encoding]; switch (mode) { + case SUN8I_CSC_MODE_OFF: + val = 0; + break; case SUN8I_CSC_MODE_YUV2RGB: + val = SUN8I_CSC_CTRL_EN; base_reg = SUN8I_CSC_COEFF(base, 0); regmap_bulk_write(map, base_reg, table, 12); break; case SUN8I_CSC_MODE_YVU2RGB: + val = SUN8I_CSC_CTRL_EN; for (i = 0; i < 12; i++) { if ((i & 3) == 1) base_reg = SUN8I_CSC_COEFF(base, i + 1); @@ -135,28 +149,37 @@ static void sun8i_csc_set_coefficients(struct regmap *map, u32 base, } break; default: + val = 0; DRM_WARN("Wrong CSC mode specified.\n"); return; } + + regmap_write(map, SUN8I_CSC_CTRL(base), val); } -static void sun8i_de3_ccsc_set_coefficients(struct regmap *map, int layer, - enum sun8i_csc_mode mode, - enum drm_color_encoding encoding, - enum drm_color_range range) +static void sun8i_de3_ccsc_setup(struct regmap *map, int layer, + enum sun8i_csc_mode mode, + enum drm_color_encoding encoding, + enum drm_color_range range) { + u32 addr, val, mask; const u32 *table; - u32 addr; int i; + mask = SUN50I_MIXER_BLEND_CSC_CTL_EN(layer); table = yuv2rgb_de3[range][encoding]; switch (mode) { + case SUN8I_CSC_MODE_OFF: + val = 0; + break; case SUN8I_CSC_MODE_YUV2RGB: + val = mask; addr = SUN50I_MIXER_BLEND_CSC_COEFF(DE3_BLD_BASE, layer, 0); regmap_bulk_write(map, addr, table, 12); break; case SUN8I_CSC_MODE_YVU2RGB: + val = mask; for (i = 0; i < 12; i++) { if ((i & 3) == 1) addr = SUN50I_MIXER_BLEND_CSC_COEFF(DE3_BLD_BASE, @@ -173,67 +196,53 @@ static void sun8i_de3_ccsc_set_coefficients(struct regmap *map, int layer, } break; default: + val = 0; DRM_WARN("Wrong CSC mode specified.\n"); return; } -} - -static void sun8i_csc_enable(struct regmap *map, u32 base, bool enable) -{ - u32 val; - - if (enable) - val = SUN8I_CSC_CTRL_EN; - else - val = 0; - - regmap_update_bits(map, SUN8I_CSC_CTRL(base), SUN8I_CSC_CTRL_EN, val); -} - -static void sun8i_de3_ccsc_enable(struct regmap *map, int layer, bool enable) -{ - u32 val, mask; - - mask = SUN50I_MIXER_BLEND_CSC_CTL_EN(layer); - - if (enable) - val = mask; - else - val = 0; regmap_update_bits(map, SUN50I_MIXER_BLEND_CSC_CTL(DE3_BLD_BASE), mask, val); } -void sun8i_csc_set_ccsc_coefficients(struct sun8i_mixer *mixer, int layer, - enum sun8i_csc_mode mode, - enum drm_color_encoding encoding, - enum drm_color_range range) +static u32 sun8i_csc_get_mode(struct drm_plane_state *state) { - u32 base; + const struct drm_format_info *format; - if (mixer->cfg->de_type == SUN8I_MIXER_DE3) { - sun8i_de3_ccsc_set_coefficients(mixer->engine.regs, layer, - mode, encoding, range); - return; - } + if (!state->crtc || !state->visible) + return SUN8I_CSC_MODE_OFF; - base = ccsc_base[mixer->cfg->ccsc][layer]; + format = state->fb->format; + if (!format->is_yuv) + return SUN8I_CSC_MODE_OFF; - sun8i_csc_set_coefficients(mixer->engine.regs, base, - mode, encoding, range); + switch (format->format) { + case DRM_FORMAT_YVU411: + case DRM_FORMAT_YVU420: + case DRM_FORMAT_YVU422: + case DRM_FORMAT_YVU444: + return SUN8I_CSC_MODE_YVU2RGB; + default: + return SUN8I_CSC_MODE_YUV2RGB; + } } -void sun8i_csc_enable_ccsc(struct sun8i_mixer *mixer, int layer, bool enable) +void sun8i_csc_config(struct sun8i_layer *layer, + struct drm_plane_state *state) { + u32 mode = sun8i_csc_get_mode(state); u32 base; - if (mixer->cfg->de_type == SUN8I_MIXER_DE3) { - sun8i_de3_ccsc_enable(mixer->engine.regs, layer, enable); + if (layer->cfg->de_type == SUN8I_MIXER_DE3) { + sun8i_de3_ccsc_setup(layer->regs, layer->channel, + mode, state->color_encoding, + state->color_range); return; } - base = ccsc_base[mixer->cfg->ccsc][layer]; + base = ccsc_base[layer->cfg->ccsc][layer->channel]; - sun8i_csc_enable(mixer->engine.regs, base, enable); + sun8i_csc_setup(layer->regs, base, + mode, state->color_encoding, + state->color_range); } diff --git a/drivers/gpu/drm/sun4i/sun8i_csc.h b/drivers/gpu/drm/sun4i/sun8i_csc.h index 828b86fd0cab..2a4b79599610 100644 --- a/drivers/gpu/drm/sun4i/sun8i_csc.h +++ b/drivers/gpu/drm/sun4i/sun8i_csc.h @@ -8,7 +8,8 @@ #include <drm/drm_color_mgmt.h> -struct sun8i_mixer; +struct drm_plane_state; +struct sun8i_layer; /* VI channel CSC units offsets */ #define CCSC00_OFFSET 0xAA050 @@ -22,16 +23,7 @@ struct sun8i_mixer; #define SUN8I_CSC_CTRL_EN BIT(0) -enum sun8i_csc_mode { - SUN8I_CSC_MODE_OFF, - SUN8I_CSC_MODE_YUV2RGB, - SUN8I_CSC_MODE_YVU2RGB, -}; - -void sun8i_csc_set_ccsc_coefficients(struct sun8i_mixer *mixer, int layer, - enum sun8i_csc_mode mode, - enum drm_color_encoding encoding, - enum drm_color_range range); -void sun8i_csc_enable_ccsc(struct sun8i_mixer *mixer, int layer, bool enable); +void sun8i_csc_config(struct sun8i_layer *layer, + struct drm_plane_state *state); #endif diff --git a/drivers/gpu/drm/sun4i/sun8i_mixer.c b/drivers/gpu/drm/sun4i/sun8i_mixer.c index e2a532e11183..ce9c155bfad7 100644 --- a/drivers/gpu/drm/sun4i/sun8i_mixer.c +++ b/drivers/gpu/drm/sun4i/sun8i_mixer.c @@ -251,24 +251,6 @@ int sun8i_mixer_drm_format_to_hw(u32 format, u32 *hw_format) return -EINVAL; } -static void sun8i_layer_enable(struct sun8i_layer *layer, bool enable) -{ - u32 ch_base = sun8i_channel_base(layer->mixer, layer->channel); - u32 val, reg, mask; - - if (layer->type == SUN8I_LAYER_TYPE_UI) { - val = enable ? SUN8I_MIXER_CHAN_UI_LAYER_ATTR_EN : 0; - mask = SUN8I_MIXER_CHAN_UI_LAYER_ATTR_EN; - reg = SUN8I_MIXER_CHAN_UI_LAYER_ATTR(ch_base, layer->overlay); - } else { - val = enable ? SUN8I_MIXER_CHAN_VI_LAYER_ATTR_EN : 0; - mask = SUN8I_MIXER_CHAN_VI_LAYER_ATTR_EN; - reg = SUN8I_MIXER_CHAN_VI_LAYER_ATTR(ch_base, layer->overlay); - } - - regmap_update_bits(layer->mixer->engine.regs, reg, mask, val); -} - static void sun8i_mixer_commit(struct sunxi_engine *engine, struct drm_crtc *crtc, struct drm_atomic_state *state) @@ -284,10 +266,10 @@ static void sun8i_mixer_commit(struct sunxi_engine *engine, drm_for_each_plane(plane, state->dev) { struct sun8i_layer *layer = plane_to_sun8i_layer(plane); + int w, h, x, y, zpos; bool enable; - int zpos; - if (!(plane->possible_crtcs & drm_crtc_mask(crtc)) || layer->mixer != mixer) + if (!(plane->possible_crtcs & drm_crtc_mask(crtc))) continue; plane_state = drm_atomic_get_new_plane_state(state, plane); @@ -296,23 +278,28 @@ static void sun8i_mixer_commit(struct sunxi_engine *engine, enable = plane_state->crtc && plane_state->visible; zpos = plane_state->normalized_zpos; + x = plane_state->dst.x1; + y = plane_state->dst.y1; + w = drm_rect_width(&plane_state->dst); + h = drm_rect_height(&plane_state->dst); - DRM_DEBUG_DRIVER(" plane %d: chan=%d ovl=%d en=%d zpos=%d\n", - plane->base.id, layer->channel, layer->overlay, - enable, zpos); - - /* - * We always update the layer enable bit, because it can clear - * spontaneously for unknown reasons. - */ - sun8i_layer_enable(layer, enable); + DRM_DEBUG_DRIVER(" plane %d: chan=%d ovl=%d en=%d zpos=%d x=%d y=%d w=%d h=%d\n", + plane->base.id, layer->index, layer->overlay, + enable, zpos, x, y, w, h); if (!enable) continue; /* Route layer to pipe based on zpos */ - route |= layer->channel << SUN8I_MIXER_BLEND_ROUTE_PIPE_SHIFT(zpos); + route |= layer->index << SUN8I_MIXER_BLEND_ROUTE_PIPE_SHIFT(zpos); pipe_en |= SUN8I_MIXER_BLEND_PIPE_CTL_EN(zpos); + + regmap_write(bld_regs, + SUN8I_MIXER_BLEND_ATTR_COORD(bld_base, zpos), + SUN8I_MIXER_COORD(x, y)); + regmap_write(bld_regs, + SUN8I_MIXER_BLEND_ATTR_INSIZE(bld_base, zpos), + SUN8I_MIXER_SIZE(w, h)); } regmap_write(bld_regs, SUN8I_MIXER_BLEND_ROUTE(bld_base), route); @@ -329,18 +316,30 @@ static struct drm_plane **sun8i_layers_init(struct drm_device *drm, { struct drm_plane **planes; struct sun8i_mixer *mixer = engine_to_sun8i_mixer(engine); + int plane_cnt = mixer->cfg->ui_num + mixer->cfg->vi_num; + enum drm_plane_type type; + unsigned int phy_index; int i; - planes = devm_kcalloc(drm->dev, - mixer->cfg->vi_num + mixer->cfg->ui_num + 1, - sizeof(*planes), GFP_KERNEL); + planes = devm_kcalloc(drm->dev, plane_cnt, sizeof(*planes), GFP_KERNEL); if (!planes) return ERR_PTR(-ENOMEM); for (i = 0; i < mixer->cfg->vi_num; i++) { struct sun8i_layer *layer; - layer = sun8i_vi_layer_init_one(drm, mixer, i); + if (i == 0 && !mixer->cfg->ui_num) + type = DRM_PLANE_TYPE_PRIMARY; + else + type = DRM_PLANE_TYPE_OVERLAY; + + phy_index = i; + if (mixer->cfg->de_type == SUN8I_MIXER_DE33) + phy_index = mixer->cfg->map[i]; + + layer = sun8i_vi_layer_init_one(drm, type, mixer->engine.regs, + i, phy_index, plane_cnt, + &mixer->cfg->lay_cfg); if (IS_ERR(layer)) { dev_err(drm->dev, "Couldn't initialize overlay plane\n"); @@ -351,16 +350,28 @@ static struct drm_plane **sun8i_layers_init(struct drm_device *drm, } for (i = 0; i < mixer->cfg->ui_num; i++) { + unsigned int index = mixer->cfg->vi_num + i; struct sun8i_layer *layer; - layer = sun8i_ui_layer_init_one(drm, mixer, i); + if (i == 0) + type = DRM_PLANE_TYPE_PRIMARY; + else + type = DRM_PLANE_TYPE_OVERLAY; + + phy_index = index; + if (mixer->cfg->de_type == SUN8I_MIXER_DE33) + phy_index = mixer->cfg->map[index]; + + layer = sun8i_ui_layer_init_one(drm, type, mixer->engine.regs, + index, phy_index, plane_cnt, + &mixer->cfg->lay_cfg); if (IS_ERR(layer)) { dev_err(drm->dev, "Couldn't initialize %s plane\n", i ? "overlay" : "primary"); return ERR_CAST(layer); } - planes[mixer->cfg->vi_num + i] = &layer->plane; + planes[index] = &layer->plane; } return planes; @@ -693,119 +704,173 @@ static void sun8i_mixer_remove(struct platform_device *pdev) } static const struct sun8i_mixer_cfg sun8i_a83t_mixer0_cfg = { - .ccsc = CCSC_MIXER0_LAYOUT, + .lay_cfg = { + .ccsc = CCSC_MIXER0_LAYOUT, + .de_type = SUN8I_MIXER_DE2, + .vi_scaler_num = 1, + .scaler_mask = 0xf, + .scanline_yuv = 2048, + .de2_fcc_alpha = 1, + }, .de_type = SUN8I_MIXER_DE2, - .scaler_mask = 0xf, - .scanline_yuv = 2048, .ui_num = 3, .vi_num = 1, }; static const struct sun8i_mixer_cfg sun8i_a83t_mixer1_cfg = { - .ccsc = CCSC_MIXER1_LAYOUT, + .lay_cfg = { + .ccsc = CCSC_MIXER1_LAYOUT, + .de_type = SUN8I_MIXER_DE2, + .vi_scaler_num = 1, + .scaler_mask = 0x3, + .scanline_yuv = 2048, + .de2_fcc_alpha = 1, + }, .de_type = SUN8I_MIXER_DE2, - .scaler_mask = 0x3, - .scanline_yuv = 2048, .ui_num = 1, .vi_num = 1, }; static const struct sun8i_mixer_cfg sun8i_h3_mixer0_cfg = { - .ccsc = CCSC_MIXER0_LAYOUT, + .lay_cfg = { + .ccsc = CCSC_MIXER0_LAYOUT, + .de_type = SUN8I_MIXER_DE2, + .vi_scaler_num = 1, + .scaler_mask = 0xf, + .scanline_yuv = 2048, + .de2_fcc_alpha = 1, + }, .de_type = SUN8I_MIXER_DE2, .mod_rate = 432000000, - .scaler_mask = 0xf, - .scanline_yuv = 2048, .ui_num = 3, .vi_num = 1, }; static const struct sun8i_mixer_cfg sun8i_r40_mixer0_cfg = { - .ccsc = CCSC_MIXER0_LAYOUT, + .lay_cfg = { + .ccsc = CCSC_MIXER0_LAYOUT, + .de_type = SUN8I_MIXER_DE2, + .vi_scaler_num = 1, + .scaler_mask = 0xf, + .scanline_yuv = 2048, + .de2_fcc_alpha = 1, + }, .de_type = SUN8I_MIXER_DE2, .mod_rate = 297000000, - .scaler_mask = 0xf, - .scanline_yuv = 2048, .ui_num = 3, .vi_num = 1, }; static const struct sun8i_mixer_cfg sun8i_r40_mixer1_cfg = { - .ccsc = CCSC_MIXER1_LAYOUT, + .lay_cfg = { + .ccsc = CCSC_MIXER1_LAYOUT, + .de_type = SUN8I_MIXER_DE2, + .vi_scaler_num = 1, + .scaler_mask = 0x3, + .scanline_yuv = 2048, + .de2_fcc_alpha = 1, + }, .de_type = SUN8I_MIXER_DE2, .mod_rate = 297000000, - .scaler_mask = 0x3, - .scanline_yuv = 2048, .ui_num = 1, .vi_num = 1, }; static const struct sun8i_mixer_cfg sun8i_v3s_mixer_cfg = { - .de_type = SUN8I_MIXER_DE2, - .vi_num = 2, - .ui_num = 1, - .scaler_mask = 0x3, - .scanline_yuv = 2048, - .ccsc = CCSC_MIXER0_LAYOUT, - .mod_rate = 150000000, + .lay_cfg = { + .ccsc = CCSC_MIXER0_LAYOUT, + .de_type = SUN8I_MIXER_DE2, + .vi_scaler_num = 2, + .scaler_mask = 0x3, + .scanline_yuv = 2048, + }, + .de_type = SUN8I_MIXER_DE2, + .mod_rate = 150000000, + .vi_num = 2, + .ui_num = 1, }; static const struct sun8i_mixer_cfg sun20i_d1_mixer0_cfg = { - .ccsc = CCSC_D1_MIXER0_LAYOUT, + .lay_cfg = { + .ccsc = CCSC_D1_MIXER0_LAYOUT, + .de_type = SUN8I_MIXER_DE2, + .vi_scaler_num = 1, + .scaler_mask = 0x3, + .scanline_yuv = 2048, + .de2_fcc_alpha = 1, + }, .de_type = SUN8I_MIXER_DE2, .mod_rate = 297000000, - .scaler_mask = 0x3, - .scanline_yuv = 2048, .ui_num = 1, .vi_num = 1, }; static const struct sun8i_mixer_cfg sun20i_d1_mixer1_cfg = { - .ccsc = CCSC_MIXER1_LAYOUT, + .lay_cfg = { + .ccsc = CCSC_MIXER1_LAYOUT, + .de_type = SUN8I_MIXER_DE2, + .vi_scaler_num = 1, + .scaler_mask = 0x1, + .scanline_yuv = 1024, + .de2_fcc_alpha = 1, + }, .de_type = SUN8I_MIXER_DE2, .mod_rate = 297000000, - .scaler_mask = 0x1, - .scanline_yuv = 1024, .ui_num = 0, .vi_num = 1, }; static const struct sun8i_mixer_cfg sun50i_a64_mixer0_cfg = { - .ccsc = CCSC_MIXER0_LAYOUT, + .lay_cfg = { + .ccsc = CCSC_MIXER0_LAYOUT, + .de_type = SUN8I_MIXER_DE2, + .vi_scaler_num = 1, + .scaler_mask = 0xf, + .scanline_yuv = 4096, + .de2_fcc_alpha = 1, + }, .de_type = SUN8I_MIXER_DE2, .mod_rate = 297000000, - .scaler_mask = 0xf, - .scanline_yuv = 4096, .ui_num = 3, .vi_num = 1, }; static const struct sun8i_mixer_cfg sun50i_a64_mixer1_cfg = { - .ccsc = CCSC_MIXER1_LAYOUT, + .lay_cfg = { + .ccsc = CCSC_MIXER1_LAYOUT, + .de_type = SUN8I_MIXER_DE2, + .vi_scaler_num = 1, + .scaler_mask = 0x3, + .scanline_yuv = 2048, + .de2_fcc_alpha = 1, + }, .de_type = SUN8I_MIXER_DE2, .mod_rate = 297000000, - .scaler_mask = 0x3, - .scanline_yuv = 2048, .ui_num = 1, .vi_num = 1, }; static const struct sun8i_mixer_cfg sun50i_h6_mixer0_cfg = { - .ccsc = CCSC_MIXER0_LAYOUT, + .lay_cfg = { + .de_type = SUN8I_MIXER_DE3, + .vi_scaler_num = 1, + .scaler_mask = 0xf, + .scanline_yuv = 4096, + }, .de_type = SUN8I_MIXER_DE3, .mod_rate = 600000000, - .scaler_mask = 0xf, - .scanline_yuv = 4096, .ui_num = 3, .vi_num = 1, }; static const struct sun8i_mixer_cfg sun50i_h616_mixer0_cfg = { - .ccsc = CCSC_MIXER0_LAYOUT, + .lay_cfg = { + .de_type = SUN8I_MIXER_DE33, + .scaler_mask = 0xf, + .scanline_yuv = 4096, + }, .de_type = SUN8I_MIXER_DE33, .mod_rate = 600000000, - .scaler_mask = 0xf, - .scanline_yuv = 4096, .ui_num = 3, .vi_num = 1, .map = {0, 6, 7, 8}, diff --git a/drivers/gpu/drm/sun4i/sun8i_mixer.h b/drivers/gpu/drm/sun4i/sun8i_mixer.h index a1c1cbccc654..e2f83301aae8 100644 --- a/drivers/gpu/drm/sun4i/sun8i_mixer.h +++ b/drivers/gpu/drm/sun4i/sun8i_mixer.h @@ -39,6 +39,9 @@ #define DE3_CH_BASE 0x1000 #define DE3_CH_SIZE 0x0800 +#define DE33_CH_BASE 0x1000 +#define DE33_CH_SIZE 0x20000 + #define SUN8I_MIXER_BLEND_PIPE_CTL(base) ((base) + 0) #define SUN8I_MIXER_BLEND_ATTR_FCOLOR(base, x) ((base) + 0x4 + 0x10 * (x)) #define SUN8I_MIXER_BLEND_ATTR_INSIZE(base, x) ((base) + 0x8 + 0x10 * (x)) @@ -161,29 +164,45 @@ enum sun8i_mixer_type { }; /** - * struct sun8i_mixer_cfg - mixer HW configuration - * @vi_num: number of VI channels - * @ui_num: number of UI channels + * struct sun8i_layer_cfg - layer configuration + * @vi_scaler_num: Number of VI scalers. Used on DE2 and DE3. * @scaler_mask: bitmask which tells which channel supports scaling * First, scaler supports for VI channels is defined and after that, scaler * support for UI channels. For example, if mixer has 2 VI channels without * scaler and 2 UI channels with scaler, bitmask would be 0xC. * @ccsc: select set of CCSC base addresses from the enumeration above. - * @mod_rate: module clock rate that needs to be set in order to have - * a functional block. * @de_type: sun8i_mixer_type enum representing the display engine generation. * @scaline_yuv: size of a scanline for VI scaler for YUV formats. - * @map: channel map for DE variants processing YUV separately (DE33) + * @de2_fcc_alpha: use FCC for missing DE2 VI alpha capability + * Most DE2 cores has FCC. If number of VI planes is one, enable this. */ -struct sun8i_mixer_cfg { - int vi_num; - int ui_num; +struct sun8i_layer_cfg { + unsigned int vi_scaler_num; int scaler_mask; int ccsc; - unsigned long mod_rate; unsigned int de_type; unsigned int scanline_yuv; - unsigned int map[6]; + unsigned int de2_fcc_alpha : 1; +}; + +/** + * struct sun8i_mixer_cfg - mixer HW configuration + * @lay_cfg: layer configuration + * @vi_num: number of VI channels + * @ui_num: number of UI channels + * @de_type: sun8i_mixer_type enum representing the display engine generation. + * @mod_rate: module clock rate that needs to be set in order to have + * a functional block. + * @map: channel map for DE variants processing YUV separately (DE33) + */ + +struct sun8i_mixer_cfg { + struct sun8i_layer_cfg lay_cfg; + int vi_num; + int ui_num; + unsigned int de_type; + unsigned long mod_rate; + unsigned int map[6]; }; struct sun8i_mixer { @@ -206,11 +225,13 @@ enum { }; struct sun8i_layer { - struct drm_plane plane; - struct sun8i_mixer *mixer; - int type; - int channel; - int overlay; + struct drm_plane plane; + int type; + int index; + int channel; + int overlay; + struct regmap *regs; + const struct sun8i_layer_cfg *cfg; }; static inline struct sun8i_layer * @@ -239,14 +260,14 @@ sun8i_blender_regmap(struct sun8i_mixer *mixer) } static inline u32 -sun8i_channel_base(struct sun8i_mixer *mixer, int channel) +sun8i_channel_base(struct sun8i_layer *layer) { - if (mixer->cfg->de_type == SUN8I_MIXER_DE33) - return mixer->cfg->map[channel] * 0x20000 + DE2_CH_SIZE; - else if (mixer->cfg->de_type == SUN8I_MIXER_DE3) - return DE3_CH_BASE + channel * DE3_CH_SIZE; + if (layer->cfg->de_type == SUN8I_MIXER_DE33) + return DE33_CH_BASE + layer->channel * DE33_CH_SIZE; + else if (layer->cfg->de_type == SUN8I_MIXER_DE3) + return DE3_CH_BASE + layer->channel * DE3_CH_SIZE; else - return DE2_CH_BASE + channel * DE2_CH_SIZE; + return DE2_CH_BASE + layer->channel * DE2_CH_SIZE; } int sun8i_mixer_drm_format_to_hw(u32 format, u32 *hw_format); diff --git a/drivers/gpu/drm/sun4i/sun8i_ui_layer.c b/drivers/gpu/drm/sun4i/sun8i_ui_layer.c index 6108dda1e414..f08f6da55dd0 100644 --- a/drivers/gpu/drm/sun4i/sun8i_ui_layer.c +++ b/drivers/gpu/drm/sun4i/sun8i_ui_layer.c @@ -26,44 +26,49 @@ #include "sun8i_ui_scaler.h" #include "sun8i_vi_scaler.h" -static void sun8i_ui_layer_update_alpha(struct sun8i_mixer *mixer, int channel, - int overlay, struct drm_plane *plane) +static void sun8i_ui_layer_disable(struct sun8i_layer *layer) { - u32 mask, val, ch_base; + u32 ch_base = sun8i_channel_base(layer); - ch_base = sun8i_channel_base(mixer, channel); + regmap_write(layer->regs, + SUN8I_MIXER_CHAN_UI_LAYER_ATTR(ch_base, layer->overlay), 0); +} - mask = SUN8I_MIXER_CHAN_UI_LAYER_ATTR_ALPHA_MODE_MASK | - SUN8I_MIXER_CHAN_UI_LAYER_ATTR_ALPHA_MASK; +static void sun8i_ui_layer_update_attributes(struct sun8i_layer *layer, + struct drm_plane *plane) +{ + struct drm_plane_state *state = plane->state; + const struct drm_format_info *fmt; + u32 val, ch_base, hw_fmt; - val = SUN8I_MIXER_CHAN_UI_LAYER_ATTR_ALPHA(plane->state->alpha >> 8); + ch_base = sun8i_channel_base(layer); + fmt = state->fb->format; + sun8i_mixer_drm_format_to_hw(fmt->format, &hw_fmt); - val |= (plane->state->alpha == DRM_BLEND_ALPHA_OPAQUE) ? + val = SUN8I_MIXER_CHAN_UI_LAYER_ATTR_ALPHA(state->alpha >> 8); + val |= (state->alpha == DRM_BLEND_ALPHA_OPAQUE) ? SUN8I_MIXER_CHAN_UI_LAYER_ATTR_ALPHA_MODE_PIXEL : SUN8I_MIXER_CHAN_UI_LAYER_ATTR_ALPHA_MODE_COMBINED; + val |= hw_fmt << SUN8I_MIXER_CHAN_UI_LAYER_ATTR_FBFMT_OFFSET; + val |= SUN8I_MIXER_CHAN_UI_LAYER_ATTR_EN; - regmap_update_bits(mixer->engine.regs, - SUN8I_MIXER_CHAN_UI_LAYER_ATTR(ch_base, overlay), - mask, val); + regmap_write(layer->regs, + SUN8I_MIXER_CHAN_UI_LAYER_ATTR(ch_base, layer->overlay), val); } -static int sun8i_ui_layer_update_coord(struct sun8i_mixer *mixer, int channel, - int overlay, struct drm_plane *plane, - unsigned int zpos) +static void sun8i_ui_layer_update_coord(struct sun8i_layer *layer, + struct drm_plane *plane) { struct drm_plane_state *state = plane->state; u32 src_w, src_h, dst_w, dst_h; - struct regmap *bld_regs; - u32 bld_base, ch_base; u32 outsize, insize; u32 hphase, vphase; + u32 ch_base; DRM_DEBUG_DRIVER("Updating UI channel %d overlay %d\n", - channel, overlay); + layer->channel, layer->overlay); - bld_base = sun8i_blender_base(mixer); - bld_regs = sun8i_blender_regmap(mixer); - ch_base = sun8i_channel_base(mixer, channel); + ch_base = sun8i_channel_base(layer); src_w = drm_rect_width(&state->src) >> 16; src_h = drm_rect_height(&state->src) >> 16; @@ -80,10 +85,10 @@ static int sun8i_ui_layer_update_coord(struct sun8i_mixer *mixer, int channel, DRM_DEBUG_DRIVER("Layer source offset X: %d Y: %d\n", state->src.x1 >> 16, state->src.y1 >> 16); DRM_DEBUG_DRIVER("Layer source size W: %d H: %d\n", src_w, src_h); - regmap_write(mixer->engine.regs, - SUN8I_MIXER_CHAN_UI_LAYER_SIZE(ch_base, overlay), + regmap_write(layer->regs, + SUN8I_MIXER_CHAN_UI_LAYER_SIZE(ch_base, layer->overlay), insize); - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_MIXER_CHAN_UI_OVL_SIZE(ch_base), insize); @@ -95,67 +100,27 @@ static int sun8i_ui_layer_update_coord(struct sun8i_mixer *mixer, int channel, hscale = state->src_w / state->crtc_w; vscale = state->src_h / state->crtc_h; - if (mixer->cfg->de_type == SUN8I_MIXER_DE33) { - sun8i_vi_scaler_setup(mixer, channel, src_w, src_h, - dst_w, dst_h, hscale, vscale, - hphase, vphase, + if (layer->cfg->de_type == SUN8I_MIXER_DE33) { + sun8i_vi_scaler_setup(layer, src_w, src_h, dst_w, dst_h, + hscale, vscale, hphase, vphase, state->fb->format); - sun8i_vi_scaler_enable(mixer, channel, true); + sun8i_vi_scaler_enable(layer, true); } else { - sun8i_ui_scaler_setup(mixer, channel, src_w, src_h, - dst_w, dst_h, hscale, vscale, - hphase, vphase); - sun8i_ui_scaler_enable(mixer, channel, true); + sun8i_ui_scaler_setup(layer, src_w, src_h, dst_w, dst_h, + hscale, vscale, hphase, vphase); + sun8i_ui_scaler_enable(layer, true); } } else { DRM_DEBUG_DRIVER("HW scaling is not needed\n"); - if (mixer->cfg->de_type == SUN8I_MIXER_DE33) - sun8i_vi_scaler_enable(mixer, channel, false); + if (layer->cfg->de_type == SUN8I_MIXER_DE33) + sun8i_vi_scaler_enable(layer, false); else - sun8i_ui_scaler_enable(mixer, channel, false); + sun8i_ui_scaler_enable(layer, false); } - - /* Set base coordinates */ - DRM_DEBUG_DRIVER("Layer destination coordinates X: %d Y: %d\n", - state->dst.x1, state->dst.y1); - DRM_DEBUG_DRIVER("Layer destination size W: %d H: %d\n", dst_w, dst_h); - regmap_write(bld_regs, - SUN8I_MIXER_BLEND_ATTR_COORD(bld_base, zpos), - SUN8I_MIXER_COORD(state->dst.x1, state->dst.y1)); - regmap_write(bld_regs, - SUN8I_MIXER_BLEND_ATTR_INSIZE(bld_base, zpos), - outsize); - - return 0; } -static int sun8i_ui_layer_update_formats(struct sun8i_mixer *mixer, int channel, - int overlay, struct drm_plane *plane) -{ - struct drm_plane_state *state = plane->state; - const struct drm_format_info *fmt; - u32 val, ch_base, hw_fmt; - int ret; - - ch_base = sun8i_channel_base(mixer, channel); - - fmt = state->fb->format; - ret = sun8i_mixer_drm_format_to_hw(fmt->format, &hw_fmt); - if (ret || fmt->is_yuv) { - DRM_DEBUG_DRIVER("Invalid format\n"); - return -EINVAL; - } - - val = hw_fmt << SUN8I_MIXER_CHAN_UI_LAYER_ATTR_FBFMT_OFFSET; - regmap_update_bits(mixer->engine.regs, - SUN8I_MIXER_CHAN_UI_LAYER_ATTR(ch_base, overlay), - SUN8I_MIXER_CHAN_UI_LAYER_ATTR_FBFMT_MASK, val); - - return 0; -} - -static int sun8i_ui_layer_update_buffer(struct sun8i_mixer *mixer, int channel, - int overlay, struct drm_plane *plane) +static void sun8i_ui_layer_update_buffer(struct sun8i_layer *layer, + struct drm_plane *plane) { struct drm_plane_state *state = plane->state; struct drm_framebuffer *fb = state->fb; @@ -164,7 +129,7 @@ static int sun8i_ui_layer_update_buffer(struct sun8i_mixer *mixer, int channel, u32 ch_base; int bpp; - ch_base = sun8i_channel_base(mixer, channel); + ch_base = sun8i_channel_base(layer); /* Get the physical address of the buffer in memory */ gem = drm_fb_dma_get_gem_obj(fb, 0); @@ -181,17 +146,15 @@ static int sun8i_ui_layer_update_buffer(struct sun8i_mixer *mixer, int channel, /* Set the line width */ DRM_DEBUG_DRIVER("Layer line width: %d bytes\n", fb->pitches[0]); - regmap_write(mixer->engine.regs, - SUN8I_MIXER_CHAN_UI_LAYER_PITCH(ch_base, overlay), + regmap_write(layer->regs, + SUN8I_MIXER_CHAN_UI_LAYER_PITCH(ch_base, layer->overlay), fb->pitches[0]); DRM_DEBUG_DRIVER("Setting buffer address to %pad\n", &dma_addr); - regmap_write(mixer->engine.regs, - SUN8I_MIXER_CHAN_UI_LAYER_TOP_LADDR(ch_base, overlay), + regmap_write(layer->regs, + SUN8I_MIXER_CHAN_UI_LAYER_TOP_LADDR(ch_base, layer->overlay), lower_32_bits(dma_addr)); - - return 0; } static int sun8i_ui_layer_atomic_check(struct drm_plane *plane, @@ -202,7 +165,9 @@ static int sun8i_ui_layer_atomic_check(struct drm_plane *plane, struct sun8i_layer *layer = plane_to_sun8i_layer(plane); struct drm_crtc *crtc = new_plane_state->crtc; struct drm_crtc_state *crtc_state; - int min_scale, max_scale; + const struct drm_format_info *fmt; + int min_scale, max_scale, ret; + u32 hw_fmt; if (!crtc) return 0; @@ -211,10 +176,17 @@ static int sun8i_ui_layer_atomic_check(struct drm_plane *plane, if (WARN_ON(!crtc_state)) return -EINVAL; + fmt = new_plane_state->fb->format; + ret = sun8i_mixer_drm_format_to_hw(fmt->format, &hw_fmt); + if (ret || fmt->is_yuv) { + DRM_DEBUG_DRIVER("Invalid plane format\n"); + return -EINVAL; + } + min_scale = DRM_PLANE_NO_SCALING; max_scale = DRM_PLANE_NO_SCALING; - if (layer->mixer->cfg->scaler_mask & BIT(layer->channel)) { + if (layer->cfg->scaler_mask & BIT(layer->channel)) { min_scale = SUN8I_UI_SCALER_SCALE_MIN; max_scale = SUN8I_UI_SCALER_SCALE_MAX; } @@ -232,20 +204,15 @@ static void sun8i_ui_layer_atomic_update(struct drm_plane *plane, struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane); struct sun8i_layer *layer = plane_to_sun8i_layer(plane); - unsigned int zpos = new_state->normalized_zpos; - struct sun8i_mixer *mixer = layer->mixer; - if (!new_state->crtc || !new_state->visible) + if (!new_state->crtc || !new_state->visible) { + sun8i_ui_layer_disable(layer); return; + } - sun8i_ui_layer_update_coord(mixer, layer->channel, - layer->overlay, plane, zpos); - sun8i_ui_layer_update_alpha(mixer, layer->channel, - layer->overlay, plane); - sun8i_ui_layer_update_formats(mixer, layer->channel, - layer->overlay, plane); - sun8i_ui_layer_update_buffer(mixer, layer->channel, - layer->overlay, plane); + sun8i_ui_layer_update_attributes(layer, plane); + sun8i_ui_layer_update_coord(layer, plane); + sun8i_ui_layer_update_buffer(layer, plane); } static const struct drm_plane_helper_funcs sun8i_ui_layer_helper_funcs = { @@ -291,21 +258,25 @@ static const uint64_t sun8i_layer_modifiers[] = { }; struct sun8i_layer *sun8i_ui_layer_init_one(struct drm_device *drm, - struct sun8i_mixer *mixer, - int index) + enum drm_plane_type type, + struct regmap *regs, + int index, int phy_index, + int plane_cnt, + const struct sun8i_layer_cfg *cfg) { - enum drm_plane_type type = DRM_PLANE_TYPE_OVERLAY; - int channel = mixer->cfg->vi_num + index; struct sun8i_layer *layer; - unsigned int plane_cnt; int ret; layer = devm_kzalloc(drm->dev, sizeof(*layer), GFP_KERNEL); if (!layer) return ERR_PTR(-ENOMEM); - if (index == 0) - type = DRM_PLANE_TYPE_PRIMARY; + layer->type = SUN8I_LAYER_TYPE_UI; + layer->index = index; + layer->channel = phy_index; + layer->overlay = 0; + layer->regs = regs; + layer->cfg = cfg; /* possible crtcs are set later */ ret = drm_universal_plane_init(drm, &layer->plane, 0, @@ -318,15 +289,13 @@ struct sun8i_layer *sun8i_ui_layer_init_one(struct drm_device *drm, return ERR_PTR(ret); } - plane_cnt = mixer->cfg->ui_num + mixer->cfg->vi_num; - ret = drm_plane_create_alpha_property(&layer->plane); if (ret) { dev_err(drm->dev, "Couldn't add alpha property\n"); return ERR_PTR(ret); } - ret = drm_plane_create_zpos_property(&layer->plane, channel, + ret = drm_plane_create_zpos_property(&layer->plane, index, 0, plane_cnt - 1); if (ret) { dev_err(drm->dev, "Couldn't add zpos property\n"); @@ -334,10 +303,6 @@ struct sun8i_layer *sun8i_ui_layer_init_one(struct drm_device *drm, } drm_plane_helper_add(&layer->plane, &sun8i_ui_layer_helper_funcs); - layer->mixer = mixer; - layer->type = SUN8I_LAYER_TYPE_UI; - layer->channel = channel; - layer->overlay = 0; return layer; } diff --git a/drivers/gpu/drm/sun4i/sun8i_ui_layer.h b/drivers/gpu/drm/sun4i/sun8i_ui_layer.h index 83892f6ff211..1581ffc6d4e5 100644 --- a/drivers/gpu/drm/sun4i/sun8i_ui_layer.h +++ b/drivers/gpu/drm/sun4i/sun8i_ui_layer.h @@ -50,6 +50,9 @@ struct sun8i_mixer; struct sun8i_layer; struct sun8i_layer *sun8i_ui_layer_init_one(struct drm_device *drm, - struct sun8i_mixer *mixer, - int index); + enum drm_plane_type type, + struct regmap *regs, + int index, int phy_index, + int plane_cnt, + const struct sun8i_layer_cfg *cfg); #endif /* _SUN8I_UI_LAYER_H_ */ diff --git a/drivers/gpu/drm/sun4i/sun8i_ui_scaler.c b/drivers/gpu/drm/sun4i/sun8i_ui_scaler.c index 8b7a58e27517..a178da8f532a 100644 --- a/drivers/gpu/drm/sun4i/sun8i_ui_scaler.c +++ b/drivers/gpu/drm/sun4i/sun8i_ui_scaler.c @@ -89,18 +89,18 @@ static const u32 lan2coefftab16[240] = { 0x0b1c1603, 0x0d1c1502, 0x0e1d1401, 0x0f1d1301, }; -static u32 sun8i_ui_scaler_base(struct sun8i_mixer *mixer, int channel) +static u32 sun8i_ui_scaler_base(struct sun8i_layer *layer) { - int vi_num = mixer->cfg->vi_num; + int offset = layer->cfg->vi_scaler_num; - if (mixer->cfg->de_type == SUN8I_MIXER_DE3) + if (layer->cfg->de_type == SUN8I_MIXER_DE3) return DE3_VI_SCALER_UNIT_BASE + - DE3_VI_SCALER_UNIT_SIZE * vi_num + - DE3_UI_SCALER_UNIT_SIZE * (channel - vi_num); + DE3_VI_SCALER_UNIT_SIZE * offset + + DE3_UI_SCALER_UNIT_SIZE * (layer->channel - offset); else return DE2_VI_SCALER_UNIT_BASE + - DE2_VI_SCALER_UNIT_SIZE * vi_num + - DE2_UI_SCALER_UNIT_SIZE * (channel - vi_num); + DE2_VI_SCALER_UNIT_SIZE * offset + + DE2_UI_SCALER_UNIT_SIZE * (layer->channel - offset); } static int sun8i_ui_scaler_coef_index(unsigned int step) @@ -127,14 +127,11 @@ static int sun8i_ui_scaler_coef_index(unsigned int step) } } -void sun8i_ui_scaler_enable(struct sun8i_mixer *mixer, int layer, bool enable) +void sun8i_ui_scaler_enable(struct sun8i_layer *layer, bool enable) { u32 val, base; - if (WARN_ON(layer < mixer->cfg->vi_num)) - return; - - base = sun8i_ui_scaler_base(mixer, layer); + base = sun8i_ui_scaler_base(layer); if (enable) val = SUN8I_SCALER_GSU_CTRL_EN | @@ -142,10 +139,10 @@ void sun8i_ui_scaler_enable(struct sun8i_mixer *mixer, int layer, bool enable) else val = 0; - regmap_write(mixer->engine.regs, SUN8I_SCALER_GSU_CTRL(base), val); + regmap_write(layer->regs, SUN8I_SCALER_GSU_CTRL(base), val); } -void sun8i_ui_scaler_setup(struct sun8i_mixer *mixer, int layer, +void sun8i_ui_scaler_setup(struct sun8i_layer *layer, u32 src_w, u32 src_h, u32 dst_w, u32 dst_h, u32 hscale, u32 vscale, u32 hphase, u32 vphase) { @@ -153,10 +150,7 @@ void sun8i_ui_scaler_setup(struct sun8i_mixer *mixer, int layer, int i, offset; u32 base; - if (WARN_ON(layer < mixer->cfg->vi_num)) - return; - - base = sun8i_ui_scaler_base(mixer, layer); + base = sun8i_ui_scaler_base(layer); hphase <<= SUN8I_UI_SCALER_PHASE_FRAC - 16; vphase <<= SUN8I_UI_SCALER_PHASE_FRAC - 16; @@ -166,22 +160,22 @@ void sun8i_ui_scaler_setup(struct sun8i_mixer *mixer, int layer, insize = SUN8I_UI_SCALER_SIZE(src_w, src_h); outsize = SUN8I_UI_SCALER_SIZE(dst_w, dst_h); - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_SCALER_GSU_OUTSIZE(base), outsize); - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_SCALER_GSU_INSIZE(base), insize); - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_SCALER_GSU_HSTEP(base), hscale); - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_SCALER_GSU_VSTEP(base), vscale); - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_SCALER_GSU_HPHASE(base), hphase); - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_SCALER_GSU_VPHASE(base), vphase); offset = sun8i_ui_scaler_coef_index(hscale) * SUN8I_UI_SCALER_COEFF_COUNT; for (i = 0; i < SUN8I_UI_SCALER_COEFF_COUNT; i++) - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_SCALER_GSU_HCOEFF(base, i), lan2coefftab16[offset + i]); } diff --git a/drivers/gpu/drm/sun4i/sun8i_ui_scaler.h b/drivers/gpu/drm/sun4i/sun8i_ui_scaler.h index 1ef4bd6f2718..872d88a58e7e 100644 --- a/drivers/gpu/drm/sun4i/sun8i_ui_scaler.h +++ b/drivers/gpu/drm/sun4i/sun8i_ui_scaler.h @@ -35,8 +35,8 @@ #define SUN8I_SCALER_GSU_CTRL_EN BIT(0) #define SUN8I_SCALER_GSU_CTRL_COEFF_RDY BIT(4) -void sun8i_ui_scaler_enable(struct sun8i_mixer *mixer, int layer, bool enable); -void sun8i_ui_scaler_setup(struct sun8i_mixer *mixer, int layer, +void sun8i_ui_scaler_enable(struct sun8i_layer *layer, bool enable); +void sun8i_ui_scaler_setup(struct sun8i_layer *layer, u32 src_w, u32 src_h, u32 dst_w, u32 dst_h, u32 hscale, u32 vscale, u32 hphase, u32 vphase); diff --git a/drivers/gpu/drm/sun4i/sun8i_vi_layer.c b/drivers/gpu/drm/sun4i/sun8i_vi_layer.c index de2fe1942840..ca3ab59e108d 100644 --- a/drivers/gpu/drm/sun4i/sun8i_vi_layer.c +++ b/drivers/gpu/drm/sun4i/sun8i_vi_layer.c @@ -14,62 +14,71 @@ #include <drm/drm_print.h> #include <drm/drm_probe_helper.h> +#include "sun4i_crtc.h" #include "sun8i_csc.h" #include "sun8i_mixer.h" #include "sun8i_vi_layer.h" #include "sun8i_vi_scaler.h" -static void sun8i_vi_layer_update_alpha(struct sun8i_mixer *mixer, int channel, - int overlay, struct drm_plane *plane) +static void sun8i_vi_layer_disable(struct sun8i_layer *layer) { - u32 mask, val, ch_base; + u32 ch_base = sun8i_channel_base(layer); - ch_base = sun8i_channel_base(mixer, channel); + regmap_write(layer->regs, + SUN8I_MIXER_CHAN_VI_LAYER_ATTR(ch_base, layer->overlay), 0); +} - if (mixer->cfg->de_type >= SUN8I_MIXER_DE3) { - mask = SUN50I_MIXER_CHAN_VI_LAYER_ATTR_ALPHA_MASK | - SUN50I_MIXER_CHAN_VI_LAYER_ATTR_ALPHA_MODE_MASK; - val = SUN50I_MIXER_CHAN_VI_LAYER_ATTR_ALPHA - (plane->state->alpha >> 8); +static void sun8i_vi_layer_update_attributes(struct sun8i_layer *layer, + struct drm_plane *plane) +{ + struct drm_plane_state *state = plane->state; + const struct drm_format_info *fmt; + u32 val, ch_base, hw_fmt; - val |= (plane->state->alpha == DRM_BLEND_ALPHA_OPAQUE) ? + ch_base = sun8i_channel_base(layer); + fmt = state->fb->format; + sun8i_mixer_drm_format_to_hw(fmt->format, &hw_fmt); + + val = hw_fmt << SUN8I_MIXER_CHAN_VI_LAYER_ATTR_FBFMT_OFFSET; + if (!fmt->is_yuv) + val |= SUN8I_MIXER_CHAN_VI_LAYER_ATTR_RGB_MODE; + val |= SUN8I_MIXER_CHAN_VI_LAYER_ATTR_EN; + if (layer->cfg->de_type >= SUN8I_MIXER_DE3) { + val |= SUN50I_MIXER_CHAN_VI_LAYER_ATTR_ALPHA(state->alpha >> 8); + val |= (state->alpha == DRM_BLEND_ALPHA_OPAQUE) ? SUN50I_MIXER_CHAN_VI_LAYER_ATTR_ALPHA_MODE_PIXEL : SUN50I_MIXER_CHAN_VI_LAYER_ATTR_ALPHA_MODE_COMBINED; + } + + regmap_write(layer->regs, + SUN8I_MIXER_CHAN_VI_LAYER_ATTR(ch_base, layer->overlay), val); - regmap_update_bits(mixer->engine.regs, - SUN8I_MIXER_CHAN_VI_LAYER_ATTR(ch_base, - overlay), - mask, val); - } else if (mixer->cfg->vi_num == 1) { - regmap_update_bits(mixer->engine.regs, - SUN8I_MIXER_FCC_GLOBAL_ALPHA_REG, - SUN8I_MIXER_FCC_GLOBAL_ALPHA_MASK, - SUN8I_MIXER_FCC_GLOBAL_ALPHA - (plane->state->alpha >> 8)); + if (layer->cfg->de2_fcc_alpha) { + regmap_write(layer->regs, + SUN8I_MIXER_FCC_GLOBAL_ALPHA_REG, + SUN8I_MIXER_FCC_GLOBAL_ALPHA(state->alpha >> 8)); } } -static int sun8i_vi_layer_update_coord(struct sun8i_mixer *mixer, int channel, - int overlay, struct drm_plane *plane, - unsigned int zpos) +static void sun8i_vi_layer_update_coord(struct sun8i_layer *layer, + struct drm_plane *plane) { struct drm_plane_state *state = plane->state; + struct sun4i_crtc *scrtc = drm_crtc_to_sun4i_crtc(state->crtc); + struct sun8i_mixer *mixer = engine_to_sun8i_mixer(scrtc->engine); const struct drm_format_info *format = state->fb->format; u32 src_w, src_h, dst_w, dst_h; - struct regmap *bld_regs; - u32 bld_base, ch_base; u32 outsize, insize; u32 hphase, vphase; u32 hn = 0, hm = 0; u32 vn = 0, vm = 0; bool subsampled; + u32 ch_base; DRM_DEBUG_DRIVER("Updating VI channel %d overlay %d\n", - channel, overlay); + layer->channel, layer->overlay); - bld_base = sun8i_blender_base(mixer); - bld_regs = sun8i_blender_regmap(mixer); - ch_base = sun8i_channel_base(mixer, channel); + ch_base = sun8i_channel_base(layer); src_w = drm_rect_width(&state->src) >> 16; src_h = drm_rect_height(&state->src) >> 16; @@ -106,10 +115,10 @@ static int sun8i_vi_layer_update_coord(struct sun8i_mixer *mixer, int channel, (state->src.x1 >> 16) & ~(format->hsub - 1), (state->src.y1 >> 16) & ~(format->vsub - 1)); DRM_DEBUG_DRIVER("Layer source size W: %d H: %d\n", src_w, src_h); - regmap_write(mixer->engine.regs, - SUN8I_MIXER_CHAN_VI_LAYER_SIZE(ch_base, overlay), + regmap_write(layer->regs, + SUN8I_MIXER_CHAN_VI_LAYER_SIZE(ch_base, layer->overlay), insize); - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_MIXER_CHAN_VI_OVL_SIZE(ch_base), insize); @@ -144,7 +153,7 @@ static int sun8i_vi_layer_update_coord(struct sun8i_mixer *mixer, int channel, } /* it seems that every RGB scaler has buffer for 2048 pixels */ - scanline = subsampled ? mixer->cfg->scanline_yuv : 2048; + scanline = subsampled ? layer->cfg->scanline_yuv : 2048; if (src_w > scanline) { DRM_DEBUG_DRIVER("Using horizontal coarse scaling\n"); @@ -156,108 +165,34 @@ static int sun8i_vi_layer_update_coord(struct sun8i_mixer *mixer, int channel, hscale = (src_w << 16) / dst_w; vscale = (src_h << 16) / dst_h; - sun8i_vi_scaler_setup(mixer, channel, src_w, src_h, dst_w, - dst_h, hscale, vscale, hphase, vphase, - format); - sun8i_vi_scaler_enable(mixer, channel, true); + sun8i_vi_scaler_setup(layer, src_w, src_h, dst_w, dst_h, + hscale, vscale, hphase, vphase, format); + sun8i_vi_scaler_enable(layer, true); } else { DRM_DEBUG_DRIVER("HW scaling is not needed\n"); - sun8i_vi_scaler_enable(mixer, channel, false); + sun8i_vi_scaler_enable(layer, false); } - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_MIXER_CHAN_VI_HDS_Y(ch_base), SUN8I_MIXER_CHAN_VI_DS_N(hn) | SUN8I_MIXER_CHAN_VI_DS_M(hm)); - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_MIXER_CHAN_VI_HDS_UV(ch_base), SUN8I_MIXER_CHAN_VI_DS_N(hn) | SUN8I_MIXER_CHAN_VI_DS_M(hm)); - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_MIXER_CHAN_VI_VDS_Y(ch_base), SUN8I_MIXER_CHAN_VI_DS_N(vn) | SUN8I_MIXER_CHAN_VI_DS_M(vm)); - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_MIXER_CHAN_VI_VDS_UV(ch_base), SUN8I_MIXER_CHAN_VI_DS_N(vn) | SUN8I_MIXER_CHAN_VI_DS_M(vm)); - - /* Set base coordinates */ - DRM_DEBUG_DRIVER("Layer destination coordinates X: %d Y: %d\n", - state->dst.x1, state->dst.y1); - DRM_DEBUG_DRIVER("Layer destination size W: %d H: %d\n", dst_w, dst_h); - regmap_write(bld_regs, - SUN8I_MIXER_BLEND_ATTR_COORD(bld_base, zpos), - SUN8I_MIXER_COORD(state->dst.x1, state->dst.y1)); - regmap_write(bld_regs, - SUN8I_MIXER_BLEND_ATTR_INSIZE(bld_base, zpos), - outsize); - - return 0; -} - -static u32 sun8i_vi_layer_get_csc_mode(const struct drm_format_info *format) -{ - if (!format->is_yuv) - return SUN8I_CSC_MODE_OFF; - - switch (format->format) { - case DRM_FORMAT_YVU411: - case DRM_FORMAT_YVU420: - case DRM_FORMAT_YVU422: - case DRM_FORMAT_YVU444: - return SUN8I_CSC_MODE_YVU2RGB; - default: - return SUN8I_CSC_MODE_YUV2RGB; - } -} - -static int sun8i_vi_layer_update_formats(struct sun8i_mixer *mixer, int channel, - int overlay, struct drm_plane *plane) -{ - struct drm_plane_state *state = plane->state; - u32 val, ch_base, csc_mode, hw_fmt; - const struct drm_format_info *fmt; - int ret; - - ch_base = sun8i_channel_base(mixer, channel); - - fmt = state->fb->format; - ret = sun8i_mixer_drm_format_to_hw(fmt->format, &hw_fmt); - if (ret) { - DRM_DEBUG_DRIVER("Invalid format\n"); - return ret; - } - - val = hw_fmt << SUN8I_MIXER_CHAN_VI_LAYER_ATTR_FBFMT_OFFSET; - regmap_update_bits(mixer->engine.regs, - SUN8I_MIXER_CHAN_VI_LAYER_ATTR(ch_base, overlay), - SUN8I_MIXER_CHAN_VI_LAYER_ATTR_FBFMT_MASK, val); - - csc_mode = sun8i_vi_layer_get_csc_mode(fmt); - if (csc_mode != SUN8I_CSC_MODE_OFF) { - sun8i_csc_set_ccsc_coefficients(mixer, channel, csc_mode, - state->color_encoding, - state->color_range); - sun8i_csc_enable_ccsc(mixer, channel, true); - } else { - sun8i_csc_enable_ccsc(mixer, channel, false); - } - - if (!fmt->is_yuv) - val = SUN8I_MIXER_CHAN_VI_LAYER_ATTR_RGB_MODE; - else - val = 0; - - regmap_update_bits(mixer->engine.regs, - SUN8I_MIXER_CHAN_VI_LAYER_ATTR(ch_base, overlay), - SUN8I_MIXER_CHAN_VI_LAYER_ATTR_RGB_MODE, val); - - return 0; } -static int sun8i_vi_layer_update_buffer(struct sun8i_mixer *mixer, int channel, - int overlay, struct drm_plane *plane) +static void sun8i_vi_layer_update_buffer(struct sun8i_layer *layer, + struct drm_plane *plane) { struct drm_plane_state *state = plane->state; struct drm_framebuffer *fb = state->fb; @@ -268,7 +203,7 @@ static int sun8i_vi_layer_update_buffer(struct sun8i_mixer *mixer, int channel, u32 ch_base; int i; - ch_base = sun8i_channel_base(mixer, channel); + ch_base = sun8i_channel_base(layer); /* Adjust x and y to be dividable by subsampling factor */ src_x = (state->src.x1 >> 16) & ~(format->hsub - 1); @@ -298,21 +233,19 @@ static int sun8i_vi_layer_update_buffer(struct sun8i_mixer *mixer, int channel, /* Set the line width */ DRM_DEBUG_DRIVER("Layer %d. line width: %d bytes\n", i + 1, fb->pitches[i]); - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_MIXER_CHAN_VI_LAYER_PITCH(ch_base, - overlay, i), + layer->overlay, i), fb->pitches[i]); DRM_DEBUG_DRIVER("Setting %d. buffer address to %pad\n", i + 1, &dma_addr); - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_MIXER_CHAN_VI_LAYER_TOP_LADDR(ch_base, - overlay, i), + layer->overlay, i), lower_32_bits(dma_addr)); } - - return 0; } static int sun8i_vi_layer_atomic_check(struct drm_plane *plane, @@ -323,7 +256,9 @@ static int sun8i_vi_layer_atomic_check(struct drm_plane *plane, struct sun8i_layer *layer = plane_to_sun8i_layer(plane); struct drm_crtc *crtc = new_plane_state->crtc; struct drm_crtc_state *crtc_state; - int min_scale, max_scale; + const struct drm_format_info *fmt; + int min_scale, max_scale, ret; + u32 hw_fmt; if (!crtc) return 0; @@ -332,10 +267,17 @@ static int sun8i_vi_layer_atomic_check(struct drm_plane *plane, if (WARN_ON(!crtc_state)) return -EINVAL; + fmt = new_plane_state->fb->format; + ret = sun8i_mixer_drm_format_to_hw(fmt->format, &hw_fmt); + if (ret) { + DRM_DEBUG_DRIVER("Invalid plane format\n"); + return ret; + } + min_scale = DRM_PLANE_NO_SCALING; max_scale = DRM_PLANE_NO_SCALING; - if (layer->mixer->cfg->scaler_mask & BIT(layer->channel)) { + if (layer->cfg->scaler_mask & BIT(layer->channel)) { min_scale = SUN8I_VI_SCALER_SCALE_MIN; max_scale = SUN8I_VI_SCALER_SCALE_MAX; } @@ -352,20 +294,16 @@ static void sun8i_vi_layer_atomic_update(struct drm_plane *plane, struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane); struct sun8i_layer *layer = plane_to_sun8i_layer(plane); - unsigned int zpos = new_state->normalized_zpos; - struct sun8i_mixer *mixer = layer->mixer; - if (!new_state->crtc || !new_state->visible) + if (!new_state->crtc || !new_state->visible) { + sun8i_vi_layer_disable(layer); return; + } - sun8i_vi_layer_update_coord(mixer, layer->channel, - layer->overlay, plane, zpos); - sun8i_vi_layer_update_alpha(mixer, layer->channel, - layer->overlay, plane); - sun8i_vi_layer_update_formats(mixer, layer->channel, - layer->overlay, plane); - sun8i_vi_layer_update_buffer(mixer, layer->channel, - layer->overlay, plane); + sun8i_vi_layer_update_attributes(layer, plane); + sun8i_vi_layer_update_coord(layer, plane); + sun8i_csc_config(layer, new_state); + sun8i_vi_layer_update_buffer(layer, plane); } static const struct drm_plane_helper_funcs sun8i_vi_layer_helper_funcs = { @@ -471,12 +409,14 @@ static const uint64_t sun8i_layer_modifiers[] = { }; struct sun8i_layer *sun8i_vi_layer_init_one(struct drm_device *drm, - struct sun8i_mixer *mixer, - int index) + enum drm_plane_type type, + struct regmap *regs, + int index, int phy_index, + int plane_cnt, + const struct sun8i_layer_cfg *cfg) { - enum drm_plane_type type = DRM_PLANE_TYPE_OVERLAY; u32 supported_encodings, supported_ranges; - unsigned int plane_cnt, format_count; + unsigned int format_count; struct sun8i_layer *layer; const u32 *formats; int ret; @@ -485,7 +425,14 @@ struct sun8i_layer *sun8i_vi_layer_init_one(struct drm_device *drm, if (!layer) return ERR_PTR(-ENOMEM); - if (mixer->cfg->de_type >= SUN8I_MIXER_DE3) { + layer->type = SUN8I_LAYER_TYPE_VI; + layer->index = index; + layer->channel = phy_index; + layer->overlay = 0; + layer->regs = regs; + layer->cfg = cfg; + + if (layer->cfg->de_type >= SUN8I_MIXER_DE3) { formats = sun8i_vi_layer_de3_formats; format_count = ARRAY_SIZE(sun8i_vi_layer_de3_formats); } else { @@ -493,9 +440,6 @@ struct sun8i_layer *sun8i_vi_layer_init_one(struct drm_device *drm, format_count = ARRAY_SIZE(sun8i_vi_layer_formats); } - if (!mixer->cfg->ui_num && index == 0) - type = DRM_PLANE_TYPE_PRIMARY; - /* possible crtcs are set later */ ret = drm_universal_plane_init(drm, &layer->plane, 0, &sun8i_vi_layer_funcs, @@ -507,9 +451,7 @@ struct sun8i_layer *sun8i_vi_layer_init_one(struct drm_device *drm, return ERR_PTR(ret); } - plane_cnt = mixer->cfg->ui_num + mixer->cfg->vi_num; - - if (mixer->cfg->vi_num == 1 || mixer->cfg->de_type >= SUN8I_MIXER_DE3) { + if (layer->cfg->de2_fcc_alpha || layer->cfg->de_type >= SUN8I_MIXER_DE3) { ret = drm_plane_create_alpha_property(&layer->plane); if (ret) { dev_err(drm->dev, "Couldn't add alpha property\n"); @@ -526,7 +468,7 @@ struct sun8i_layer *sun8i_vi_layer_init_one(struct drm_device *drm, supported_encodings = BIT(DRM_COLOR_YCBCR_BT601) | BIT(DRM_COLOR_YCBCR_BT709); - if (mixer->cfg->de_type >= SUN8I_MIXER_DE3) + if (layer->cfg->de_type >= SUN8I_MIXER_DE3) supported_encodings |= BIT(DRM_COLOR_YCBCR_BT2020); supported_ranges = BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) | @@ -543,10 +485,6 @@ struct sun8i_layer *sun8i_vi_layer_init_one(struct drm_device *drm, } drm_plane_helper_add(&layer->plane, &sun8i_vi_layer_helper_funcs); - layer->mixer = mixer; - layer->type = SUN8I_LAYER_TYPE_VI; - layer->channel = index; - layer->overlay = 0; return layer; } diff --git a/drivers/gpu/drm/sun4i/sun8i_vi_layer.h b/drivers/gpu/drm/sun4i/sun8i_vi_layer.h index 655440cdc78f..29cc5573691f 100644 --- a/drivers/gpu/drm/sun4i/sun8i_vi_layer.h +++ b/drivers/gpu/drm/sun4i/sun8i_vi_layer.h @@ -55,6 +55,9 @@ struct sun8i_mixer; struct sun8i_layer; struct sun8i_layer *sun8i_vi_layer_init_one(struct drm_device *drm, - struct sun8i_mixer *mixer, - int index); + enum drm_plane_type type, + struct regmap *regs, + int index, int phy_index, + int plane_cnt, + const struct sun8i_layer_cfg *cfg); #endif /* _SUN8I_VI_LAYER_H_ */ diff --git a/drivers/gpu/drm/sun4i/sun8i_vi_scaler.c b/drivers/gpu/drm/sun4i/sun8i_vi_scaler.c index 82df6244af88..3dec4eeb1ba2 100644 --- a/drivers/gpu/drm/sun4i/sun8i_vi_scaler.c +++ b/drivers/gpu/drm/sun4i/sun8i_vi_scaler.c @@ -833,16 +833,17 @@ static const u32 bicubic4coefftab32[480] = { 0x1012110d, 0x1012110d, 0x1013110c, 0x1013110c, }; -static u32 sun8i_vi_scaler_base(struct sun8i_mixer *mixer, int channel) +static u32 sun8i_vi_scaler_base(struct sun8i_layer *layer) { - if (mixer->cfg->de_type == SUN8I_MIXER_DE33) - return sun8i_channel_base(mixer, channel) + 0x3000; - else if (mixer->cfg->de_type == SUN8I_MIXER_DE3) + if (layer->cfg->de_type == SUN8I_MIXER_DE33) + return DE33_VI_SCALER_UNIT_BASE + + DE33_CH_SIZE * layer->channel; + else if (layer->cfg->de_type == SUN8I_MIXER_DE3) return DE3_VI_SCALER_UNIT_BASE + - DE3_VI_SCALER_UNIT_SIZE * channel; + DE3_VI_SCALER_UNIT_SIZE * layer->channel; else return DE2_VI_SCALER_UNIT_BASE + - DE2_VI_SCALER_UNIT_SIZE * channel; + DE2_VI_SCALER_UNIT_SIZE * layer->channel; } static int sun8i_vi_scaler_coef_index(unsigned int step) @@ -909,11 +910,11 @@ static void sun8i_vi_scaler_set_coeff(struct regmap *map, u32 base, } } -void sun8i_vi_scaler_enable(struct sun8i_mixer *mixer, int layer, bool enable) +void sun8i_vi_scaler_enable(struct sun8i_layer *layer, bool enable) { u32 val, base; - base = sun8i_vi_scaler_base(mixer, layer); + base = sun8i_vi_scaler_base(layer); if (enable) val = SUN8I_SCALER_VSU_CTRL_EN | @@ -921,11 +922,11 @@ void sun8i_vi_scaler_enable(struct sun8i_mixer *mixer, int layer, bool enable) else val = 0; - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_SCALER_VSU_CTRL(base), val); } -void sun8i_vi_scaler_setup(struct sun8i_mixer *mixer, int layer, +void sun8i_vi_scaler_setup(struct sun8i_layer *layer, u32 src_w, u32 src_h, u32 dst_w, u32 dst_h, u32 hscale, u32 vscale, u32 hphase, u32 vphase, const struct drm_format_info *format) @@ -934,7 +935,7 @@ void sun8i_vi_scaler_setup(struct sun8i_mixer *mixer, int layer, u32 insize, outsize; u32 base; - base = sun8i_vi_scaler_base(mixer, layer); + base = sun8i_vi_scaler_base(layer); hphase <<= SUN8I_VI_SCALER_PHASE_FRAC - 16; vphase <<= SUN8I_VI_SCALER_PHASE_FRAC - 16; @@ -958,7 +959,7 @@ void sun8i_vi_scaler_setup(struct sun8i_mixer *mixer, int layer, cvphase = vphase; } - if (mixer->cfg->de_type >= SUN8I_MIXER_DE3) { + if (layer->cfg->de_type >= SUN8I_MIXER_DE3) { u32 val; if (format->hsub == 1 && format->vsub == 1) @@ -966,36 +967,36 @@ void sun8i_vi_scaler_setup(struct sun8i_mixer *mixer, int layer, else val = SUN50I_SCALER_VSU_SCALE_MODE_NORMAL; - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN50I_SCALER_VSU_SCALE_MODE(base), val); } - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_SCALER_VSU_OUTSIZE(base), outsize); - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_SCALER_VSU_YINSIZE(base), insize); - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_SCALER_VSU_YHSTEP(base), hscale); - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_SCALER_VSU_YVSTEP(base), vscale); - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_SCALER_VSU_YHPHASE(base), hphase); - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_SCALER_VSU_YVPHASE(base), vphase); - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_SCALER_VSU_CINSIZE(base), SUN8I_VI_SCALER_SIZE(src_w / format->hsub, src_h / format->vsub)); - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_SCALER_VSU_CHSTEP(base), hscale / format->hsub); - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_SCALER_VSU_CVSTEP(base), vscale / format->vsub); - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_SCALER_VSU_CHPHASE(base), chphase); - regmap_write(mixer->engine.regs, + regmap_write(layer->regs, SUN8I_SCALER_VSU_CVPHASE(base), cvphase); - sun8i_vi_scaler_set_coeff(mixer->engine.regs, base, + sun8i_vi_scaler_set_coeff(layer->regs, base, hscale, vscale, format); } diff --git a/drivers/gpu/drm/sun4i/sun8i_vi_scaler.h b/drivers/gpu/drm/sun4i/sun8i_vi_scaler.h index 68f6593b369a..245fe2f431c3 100644 --- a/drivers/gpu/drm/sun4i/sun8i_vi_scaler.h +++ b/drivers/gpu/drm/sun4i/sun8i_vi_scaler.h @@ -18,6 +18,8 @@ #define DE3_VI_SCALER_UNIT_BASE 0x20000 #define DE3_VI_SCALER_UNIT_SIZE 0x08000 +#define DE33_VI_SCALER_UNIT_BASE 0x4000 + /* this two macros assumes 16 fractional bits which is standard in DRM */ #define SUN8I_VI_SCALER_SCALE_MIN 1 #define SUN8I_VI_SCALER_SCALE_MAX ((1UL << 20) - 1) @@ -69,8 +71,8 @@ #define SUN50I_SCALER_VSU_ANGLE_SHIFT(x) (((x) << 16) & 0xF) #define SUN50I_SCALER_VSU_ANGLE_OFFSET(x) ((x) & 0xFF) -void sun8i_vi_scaler_enable(struct sun8i_mixer *mixer, int layer, bool enable); -void sun8i_vi_scaler_setup(struct sun8i_mixer *mixer, int layer, +void sun8i_vi_scaler_enable(struct sun8i_layer *layer, bool enable); +void sun8i_vi_scaler_setup(struct sun8i_layer *layer, u32 src_w, u32 src_h, u32 dst_w, u32 dst_h, u32 hscale, u32 vscale, u32 hphase, u32 vphase, const struct drm_format_info *format); diff --git a/drivers/gpu/drm/tidss/tidss_crtc.c b/drivers/gpu/drm/tidss/tidss_crtc.c index 411b1a25e29c..8f81eb560b9e 100644 --- a/drivers/gpu/drm/tidss/tidss_crtc.c +++ b/drivers/gpu/drm/tidss/tidss_crtc.c @@ -248,8 +248,7 @@ static void tidss_crtc_atomic_enable(struct drm_crtc *crtc, dispc_vp_enable(tidss->dispc, tcrtc->hw_videoport); if (crtc->state->event) { - unsigned int pipe = drm_crtc_index(crtc); - struct drm_vblank_crtc *vblank = &ddev->vblank[pipe]; + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); vblank->time = ktime_get(); diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c b/drivers/gpu/drm/tidss/tidss_dispc.c index d8e1a1bcd660..58d5eb033bdb 100644 --- a/drivers/gpu/drm/tidss/tidss_dispc.c +++ b/drivers/gpu/drm/tidss/tidss_dispc.c @@ -58,12 +58,6 @@ static const u16 tidss_k2g_common_regs[DISPC_COMMON_REG_TABLE_LEN] = { }; const struct dispc_features dispc_k2g_feats = { - .min_pclk_khz = 4375, - - .max_pclk_khz = { - [DISPC_VP_DPI] = 150000, - }, - /* * XXX According TRM the RGB input buffer width up to 2560 should * work on 3 taps, but in practice it only works up to 1280. @@ -146,11 +140,6 @@ static const u16 tidss_am65x_common_regs[DISPC_COMMON_REG_TABLE_LEN] = { }; const struct dispc_features dispc_am65x_feats = { - .max_pclk_khz = { - [DISPC_VP_DPI] = 165000, - [DISPC_VP_OLDI_AM65X] = 165000, - }, - .scaling = { .in_width_max_5tap_rgb = 1280, .in_width_max_3tap_rgb = 2560, @@ -246,11 +235,6 @@ static const u16 tidss_j721e_common_regs[DISPC_COMMON_REG_TABLE_LEN] = { }; const struct dispc_features dispc_j721e_feats = { - .max_pclk_khz = { - [DISPC_VP_DPI] = 170000, - [DISPC_VP_INTERNAL] = 600000, - }, - .scaling = { .in_width_max_5tap_rgb = 2048, .in_width_max_3tap_rgb = 4096, @@ -317,11 +301,6 @@ const struct dispc_features dispc_j721e_feats = { }; const struct dispc_features dispc_am625_feats = { - .max_pclk_khz = { - [DISPC_VP_DPI] = 165000, - [DISPC_VP_INTERNAL] = 170000, - }, - .scaling = { .in_width_max_5tap_rgb = 1280, .in_width_max_3tap_rgb = 2560, @@ -378,15 +357,6 @@ const struct dispc_features dispc_am625_feats = { }; const struct dispc_features dispc_am62a7_feats = { - /* - * if the code reaches dispc_mode_valid with VP1, - * it should return MODE_BAD. - */ - .max_pclk_khz = { - [DISPC_VP_TIED_OFF] = 0, - [DISPC_VP_DPI] = 165000, - }, - .scaling = { .in_width_max_5tap_rgb = 1280, .in_width_max_3tap_rgb = 2560, @@ -443,10 +413,6 @@ const struct dispc_features dispc_am62a7_feats = { }; const struct dispc_features dispc_am62l_feats = { - .max_pclk_khz = { - [DISPC_VP_DPI] = 165000, - }, - .subrev = DISPC_AM62L, .common = "common", @@ -1324,33 +1290,61 @@ static void dispc_vp_set_default_color(struct dispc_device *dispc, DISPC_OVR_DEFAULT_COLOR2, (v >> 32) & 0xffff); } +/* + * Calculate the percentage difference between the requested pixel clock rate + * and the effective rate resulting from calculating the clock divider value. + */ +unsigned int dispc_pclk_diff(unsigned long rate, unsigned long real_rate) +{ + int r = rate / 100, rr = real_rate / 100; + + return (unsigned int)(abs(((rr - r) * 100) / r)); +} + +static int check_pixel_clock(struct dispc_device *dispc, u32 hw_videoport, + unsigned long clock) +{ + unsigned long round_clock; + + /* + * For VP's with external clocking, clock operations must be + * delegated to respective driver, so we skip the check here. + */ + if (dispc->tidss->is_ext_vp_clk[hw_videoport]) + return 0; + + round_clock = clk_round_rate(dispc->vp_clk[hw_videoport], clock); + /* + * To keep the check consistent with dispc_vp_set_clk_rate(), we + * use the same 5% check here. + */ + if (dispc_pclk_diff(clock, round_clock) > 5) + return -EINVAL; + + return 0; +} + enum drm_mode_status dispc_vp_mode_valid(struct dispc_device *dispc, u32 hw_videoport, const struct drm_display_mode *mode) { u32 hsw, hfp, hbp, vsw, vfp, vbp; enum dispc_vp_bus_type bus_type; - int max_pclk; bus_type = dispc->feat->vp_bus_type[hw_videoport]; - max_pclk = dispc->feat->max_pclk_khz[bus_type]; - - if (WARN_ON(max_pclk == 0)) + if (WARN_ON(bus_type == DISPC_VP_TIED_OFF)) return MODE_BAD; - if (mode->clock < dispc->feat->min_pclk_khz) - return MODE_CLOCK_LOW; - - if (mode->clock > max_pclk) - return MODE_CLOCK_HIGH; - if (mode->hdisplay > 4096) return MODE_BAD; if (mode->vdisplay > 4096) return MODE_BAD; + if (check_pixel_clock(dispc, hw_videoport, mode->clock * 1000)) + return MODE_CLOCK_RANGE; + /* TODO: add interlace support */ if (mode->flags & DRM_MODE_FLAG_INTERLACE) return MODE_NO_INTERLACE; @@ -1414,17 +1408,6 @@ void dispc_vp_disable_clk(struct dispc_device *dispc, u32 hw_videoport) clk_disable_unprepare(dispc->vp_clk[hw_videoport]); } -/* - * Calculate the percentage difference between the requested pixel clock rate - * and the effective rate resulting from calculating the clock divider value. - */ -unsigned int dispc_pclk_diff(unsigned long rate, unsigned long real_rate) -{ - int r = rate / 100, rr = real_rate / 100; - - return (unsigned int)(abs(((rr - r) * 100) / r)); -} - int dispc_vp_set_clk_rate(struct dispc_device *dispc, u32 hw_videoport, unsigned long rate) { diff --git a/drivers/gpu/drm/tidss/tidss_dispc.h b/drivers/gpu/drm/tidss/tidss_dispc.h index f38493a70122..739d211d0018 100644 --- a/drivers/gpu/drm/tidss/tidss_dispc.h +++ b/drivers/gpu/drm/tidss/tidss_dispc.h @@ -77,9 +77,6 @@ enum dispc_dss_subrevision { }; struct dispc_features { - int min_pclk_khz; - int max_pclk_khz[DISPC_VP_MAX_BUS_TYPE]; - struct dispc_features_scaling scaling; enum dispc_dss_subrevision subrev; diff --git a/drivers/gpu/drm/tidss/tidss_drv.h b/drivers/gpu/drm/tidss/tidss_drv.h index 84454a4855d1..e1c1f41d8b4b 100644 --- a/drivers/gpu/drm/tidss/tidss_drv.h +++ b/drivers/gpu/drm/tidss/tidss_drv.h @@ -24,6 +24,8 @@ struct tidss_device { const struct dispc_features *feat; struct dispc_device *dispc; + bool is_ext_vp_clk[TIDSS_MAX_PORTS]; + unsigned int num_crtcs; struct drm_crtc *crtcs[TIDSS_MAX_PORTS]; diff --git a/drivers/gpu/drm/tidss/tidss_oldi.c b/drivers/gpu/drm/tidss/tidss_oldi.c index 7688251beba2..17c535bfa057 100644 --- a/drivers/gpu/drm/tidss/tidss_oldi.c +++ b/drivers/gpu/drm/tidss/tidss_oldi.c @@ -309,6 +309,25 @@ static u32 *tidss_oldi_atomic_get_input_bus_fmts(struct drm_bridge *bridge, return input_fmts; } +static enum drm_mode_status +tidss_oldi_mode_valid(struct drm_bridge *bridge, + const struct drm_display_info *info, + const struct drm_display_mode *mode) +{ + struct tidss_oldi *oldi = drm_bridge_to_tidss_oldi(bridge); + unsigned long round_clock; + + round_clock = clk_round_rate(oldi->serial, mode->clock * 7 * 1000); + /* + * To keep the check consistent with dispc_vp_set_clk_rate(), + * we use the same 5% check here. + */ + if (dispc_pclk_diff(mode->clock * 7 * 1000, round_clock) > 5) + return -EINVAL; + + return 0; +} + static const struct drm_bridge_funcs tidss_oldi_bridge_funcs = { .attach = tidss_oldi_bridge_attach, .atomic_pre_enable = tidss_oldi_atomic_pre_enable, @@ -317,6 +336,7 @@ static const struct drm_bridge_funcs tidss_oldi_bridge_funcs = { .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, .atomic_reset = drm_atomic_helper_bridge_reset, + .mode_valid = tidss_oldi_mode_valid, }; static int get_oldi_mode(struct device_node *oldi_tx, int *companion_instance) @@ -430,6 +450,7 @@ void tidss_oldi_deinit(struct tidss_device *tidss) for (int i = 0; i < tidss->num_oldis; i++) { if (tidss->oldis[i]) { drm_bridge_remove(&tidss->oldis[i]->bridge); + tidss->is_ext_vp_clk[tidss->oldis[i]->parent_vp] = false; tidss->oldis[i] = NULL; } } @@ -580,6 +601,7 @@ int tidss_oldi_init(struct tidss_device *tidss) oldi->bridge.timings = &default_tidss_oldi_timings; tidss->oldis[tidss->num_oldis++] = oldi; + tidss->is_ext_vp_clk[oldi->parent_vp] = true; oldi->tidss = tidss; drm_bridge_add(&oldi->bridge); diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 5c10e5fbf43b..9a51afaf0749 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -199,7 +199,7 @@ EXPORT_SYMBOL(ttm_device_swapout); * @dev: The core kernel device pointer for DMA mappings and allocations. * @mapping: The address space to use for this bo. * @vma_manager: A pointer to a vma manager. - * @alloc_flags: TTM_ALLOCATION_ flags. + * @alloc_flags: TTM_ALLOCATION_* flags. * * Initializes a struct ttm_device: * Returns: diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 97e9ce505cf6..18b6db015619 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -1067,7 +1067,7 @@ long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt, * @pool: the pool to initialize * @dev: device for DMA allocations and mappings * @nid: NUMA node to use for allocations - * @alloc_flags: TTM_ALLOCATION_POOL_ flags + * @alloc_flags: TTM_ALLOCATION_POOL_* flags * * Initialize the pool and its pool types. */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 54ea1b513950..d32ce1cb579e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -553,6 +553,9 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv, memcpy(&vfbs->uo, uo, sizeof(vfbs->uo)); vmw_user_object_ref(&vfbs->uo); + if (vfbs->uo.buffer) + vfbs->base.base.obj[0] = &vfbs->uo.buffer->tbo.base; + *out = &vfbs->base; ret = drm_framebuffer_init(dev, &vfbs->base.base, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c index aec774fa4d7b..5abd7f5ad2db 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c @@ -247,9 +247,8 @@ vmw_vkms_get_vblank_timestamp(struct drm_crtc *crtc, { struct drm_device *dev = crtc->dev; struct vmw_private *vmw = vmw_priv(dev); - unsigned int pipe = crtc->index; struct vmw_display_unit *du = vmw_crtc_to_du(crtc); - struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); if (!vmw->vkms_enabled) return false; @@ -281,8 +280,7 @@ vmw_vkms_enable_vblank(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct vmw_private *vmw = vmw_priv(dev); - unsigned int pipe = drm_crtc_index(crtc); - struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); struct vmw_display_unit *du = vmw_crtc_to_du(crtc); if (!vmw->vkms_enabled) diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index f8a3a1bfe42e..e4b273b025d2 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -58,7 +58,6 @@ xe-y += xe_bb.o \ xe_gt_freq.o \ xe_gt_idle.o \ xe_gt_mcr.o \ - xe_gt_pagefault.o \ xe_gt_sysfs.o \ xe_gt_throttle.o \ xe_gt_topology.o \ @@ -73,6 +72,7 @@ xe-y += xe_bb.o \ xe_guc_id_mgr.o \ xe_guc_klv_helpers.o \ xe_guc_log.o \ + xe_guc_pagefault.o \ xe_guc_pc.o \ xe_guc_submit.o \ xe_guc_tlb_inval.o \ @@ -94,6 +94,7 @@ xe-y += xe_bb.o \ xe_nvm.o \ xe_oa.o \ xe_observation.o \ + xe_pagefault.o \ xe_pat.o \ xe_pci.o \ xe_pcode.o \ @@ -173,11 +174,14 @@ xe-$(CONFIG_PCI_IOV) += \ xe_lmtt_2l.o \ xe_lmtt_ml.o \ xe_pci_sriov.o \ + xe_sriov_packet.o \ xe_sriov_pf.o \ xe_sriov_pf_control.o \ xe_sriov_pf_debugfs.o \ + xe_sriov_pf_migration.o \ xe_sriov_pf_provision.o \ xe_sriov_pf_service.o \ + xe_sriov_pf_sysfs.o \ xe_tile_sriov_pf_debugfs.o # include helpers for tests even when XE is built-in diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h index 4465c40f8134..b17e3bab23d5 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h @@ -26,8 +26,6 @@ struct i915_vma { struct xe_ggtt_node *node; }; -#define i915_ggtt_clear_scanout(bo) do { } while (0) - #define i915_vma_fence_id(vma) -1 static inline u32 i915_ggtt_offset(const struct i915_vma *vma) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h index d012f02bc84f..d93ddacdf743 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h @@ -91,27 +91,6 @@ static inline u32 intel_uncore_rmw(struct intel_uncore *uncore, return xe_mmio_rmw32(__compat_uncore_to_mmio(uncore), reg, clear, set); } -static inline int intel_wait_for_register(struct intel_uncore *uncore, - i915_reg_t i915_reg, u32 mask, - u32 value, unsigned int timeout) -{ - struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); - - return xe_mmio_wait32(__compat_uncore_to_mmio(uncore), reg, mask, value, - timeout * USEC_PER_MSEC, NULL, false); -} - -static inline int intel_wait_for_register_fw(struct intel_uncore *uncore, - i915_reg_t i915_reg, u32 mask, - u32 value, unsigned int timeout, - u32 *out_value) -{ - struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); - - return xe_mmio_wait32(__compat_uncore_to_mmio(uncore), reg, mask, value, - timeout * USEC_PER_MSEC, out_value, false); -} - static inline int __intel_wait_for_register(struct intel_uncore *uncore, i915_reg_t i915_reg, u32 mask, u32 value, unsigned int fast_timeout_us, @@ -133,6 +112,16 @@ __intel_wait_for_register(struct intel_uncore *uncore, i915_reg_t i915_reg, out_value, atomic); } +static inline int +__intel_wait_for_register_fw(struct intel_uncore *uncore, i915_reg_t i915_reg, + u32 mask, u32 value, unsigned int fast_timeout_us, + unsigned int slow_timeout_ms, u32 *out_value) +{ + return __intel_wait_for_register(uncore, i915_reg, mask, value, + fast_timeout_us, slow_timeout_ms, + out_value); +} + static inline u32 intel_uncore_read_fw(struct intel_uncore *uncore, i915_reg_t i915_reg) { diff --git a/drivers/gpu/drm/xe/display/intel_bo.c b/drivers/gpu/drm/xe/display/intel_bo.c index 27437c22bd70..bad2243b9114 100644 --- a/drivers/gpu/drm/xe/display/intel_bo.c +++ b/drivers/gpu/drm/xe/display/intel_bo.c @@ -5,6 +5,7 @@ #include "xe_bo.h" #include "intel_bo.h" +#include "intel_frontbuffer.h" bool intel_bo_is_tiled(struct drm_gem_object *obj) { @@ -28,10 +29,6 @@ bool intel_bo_is_protected(struct drm_gem_object *obj) return xe_bo_is_protected(gem_to_xe_bo(obj)); } -void intel_bo_flush_if_display(struct drm_gem_object *obj) -{ -} - int intel_bo_fb_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) { return drm_gem_prime_mmap(obj, vma); @@ -44,15 +41,60 @@ int intel_bo_read_from_page(struct drm_gem_object *obj, u64 offset, void *dst, i return xe_bo_read(bo, offset, dst, size); } -struct intel_frontbuffer *intel_bo_get_frontbuffer(struct drm_gem_object *obj) +struct xe_frontbuffer { + struct intel_frontbuffer base; + struct drm_gem_object *obj; + struct kref ref; +}; + +struct intel_frontbuffer *intel_bo_frontbuffer_get(struct drm_gem_object *obj) +{ + struct xe_frontbuffer *front; + + front = kmalloc(sizeof(*front), GFP_KERNEL); + if (!front) + return NULL; + + intel_frontbuffer_init(&front->base, obj->dev); + + kref_init(&front->ref); + + drm_gem_object_get(obj); + front->obj = obj; + + return &front->base; +} + +void intel_bo_frontbuffer_ref(struct intel_frontbuffer *_front) { - return NULL; + struct xe_frontbuffer *front = + container_of(_front, typeof(*front), base); + + kref_get(&front->ref); +} + +static void frontbuffer_release(struct kref *ref) +{ + struct xe_frontbuffer *front = + container_of(ref, typeof(*front), ref); + + intel_frontbuffer_fini(&front->base); + + drm_gem_object_put(front->obj); + + kfree(front); +} + +void intel_bo_frontbuffer_put(struct intel_frontbuffer *_front) +{ + struct xe_frontbuffer *front = + container_of(_front, typeof(*front), base); + + kref_put(&front->ref, frontbuffer_release); } -struct intel_frontbuffer *intel_bo_set_frontbuffer(struct drm_gem_object *obj, - struct intel_frontbuffer *front) +void intel_bo_frontbuffer_flush_for_display(struct intel_frontbuffer *front) { - return front; } void intel_bo_describe(struct seq_file *m, struct drm_gem_object *obj) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index a895a8e801a9..917a088c28f2 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -101,7 +101,6 @@ #define XE2_LMEM_CFG XE_REG(0x48b0) -#define XEHP_TILE_ADDR_RANGE(_idx) XE_REG_MCR(0x4900 + (_idx) * 4) #define XEHP_FLAT_CCS_BASE_ADDR XE_REG_MCR(0x4910) #define XEHP_FLAT_CCS_PTR REG_GENMASK(31, 8) @@ -590,6 +589,7 @@ #define GT_GFX_RC6 XE_REG(0x138108) #define GT0_PERF_LIMIT_REASONS XE_REG(0x1381a8) +/* Common performance limit reason bits - available on all platforms */ #define GT0_PERF_LIMIT_REASONS_MASK 0xde3 #define PROCHOT_MASK REG_BIT(0) #define THERMAL_LIMIT_MASK REG_BIT(1) @@ -599,6 +599,18 @@ #define POWER_LIMIT_4_MASK REG_BIT(8) #define POWER_LIMIT_1_MASK REG_BIT(10) #define POWER_LIMIT_2_MASK REG_BIT(11) +/* Platform-specific performance limit reason bits - for Crescent Island */ +#define CRI_PERF_LIMIT_REASONS_MASK 0xfdff +#define SOC_THERMAL_LIMIT_MASK REG_BIT(1) +#define MEM_THERMAL_MASK REG_BIT(2) +#define VR_THERMAL_MASK REG_BIT(3) +#define ICCMAX_MASK REG_BIT(4) +#define SOC_AVG_THERMAL_MASK REG_BIT(6) +#define FASTVMODE_MASK REG_BIT(7) +#define PSYS_PL1_MASK REG_BIT(12) +#define PSYS_PL2_MASK REG_BIT(13) +#define P0_FREQ_MASK REG_BIT(14) +#define PSYS_CRIT_MASK REG_BIT(15) #define GT_PERF_STATUS XE_REG(0x1381b4) #define VOLTAGE_MASK REG_GENMASK(10, 0) diff --git a/drivers/gpu/drm/xe/regs/xe_pmt.h b/drivers/gpu/drm/xe/regs/xe_pmt.h index 264e9baf949c..0f79c0714454 100644 --- a/drivers/gpu/drm/xe/regs/xe_pmt.h +++ b/drivers/gpu/drm/xe/regs/xe_pmt.h @@ -24,6 +24,7 @@ #define BMG_MODS_RESIDENCY_OFFSET (0x4D0) #define BMG_G2_RESIDENCY_OFFSET (0x530) #define BMG_G6_RESIDENCY_OFFSET (0x538) +#define BMG_G7_RESIDENCY_OFFSET (0x4B0) #define BMG_G8_RESIDENCY_OFFSET (0x540) #define BMG_G10_RESIDENCY_OFFSET (0x548) diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 1926b4044314..ad93c57edd17 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -40,6 +40,8 @@ #define STOLEN_RESERVED XE_REG(0x1082c0) #define WOPCM_SIZE_MASK REG_GENMASK64(9, 7) +#define SG_TILE_ADDR_RANGE(_idx) XE_REG(0x1083a0 + (_idx) * 4) + #define MTL_RP_STATE_CAP XE_REG(0x138000) #define MTL_GT_RPA_FREQUENCY XE_REG(0x138008) diff --git a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c new file mode 100644 index 000000000000..42bfc4bcfbcf --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <kunit/static_stub.h> +#include <kunit/test.h> +#include <kunit/test-bug.h> + +#include "xe_kunit_helpers.h" +#include "xe_pci_test.h" + +#define TEST_MAX_VFS 63 + +static void pf_set_admin_mode(struct xe_device *xe, bool enable) +{ + /* should match logic of xe_sriov_pf_admin_only() */ + xe->info.probe_display = !enable; + KUNIT_EXPECT_EQ(kunit_get_current_test(), enable, xe_sriov_pf_admin_only(xe)); +} + +static const void *num_vfs_gen_param(struct kunit *test, const void *prev, char *desc) +{ + unsigned long next = 1 + (unsigned long)prev; + + if (next > TEST_MAX_VFS) + return NULL; + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%lu VF%s", + next, str_plural(next)); + return (void *)next; +} + +static int pf_gt_config_test_init(struct kunit *test) +{ + struct xe_pci_fake_data fake = { + .sriov_mode = XE_SRIOV_MODE_PF, + .platform = XE_TIGERLAKE, /* any random platform with SR-IOV */ + .subplatform = XE_SUBPLATFORM_NONE, + }; + struct xe_device *xe; + struct xe_gt *gt; + + test->priv = &fake; + xe_kunit_helper_xe_device_test_init(test); + + xe = test->priv; + KUNIT_ASSERT_TRUE(test, IS_SRIOV_PF(xe)); + + gt = xe_root_mmio_gt(xe); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gt); + test->priv = gt; + + /* pretend it can support up to 63 VFs */ + xe->sriov.pf.device_total_vfs = TEST_MAX_VFS; + xe->sriov.pf.driver_max_vfs = TEST_MAX_VFS; + KUNIT_ASSERT_EQ(test, xe_sriov_pf_get_totalvfs(xe), 63); + + pf_set_admin_mode(xe, false); + KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0); + + /* more sanity checks */ + KUNIT_EXPECT_EQ(test, GUC_ID_MAX + 1, SZ_64K); + KUNIT_EXPECT_EQ(test, GUC_NUM_DOORBELLS, SZ_256); + + return 0; +} + +static void fair_contexts_1vf(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + struct xe_device *xe = gt_to_xe(gt); + + pf_set_admin_mode(xe, false); + KUNIT_ASSERT_FALSE(test, xe_sriov_pf_admin_only(xe)); + KUNIT_EXPECT_EQ(test, SZ_32K, pf_profile_fair_ctxs(gt, 1)); + + pf_set_admin_mode(xe, true); + KUNIT_ASSERT_TRUE(test, xe_sriov_pf_admin_only(xe)); + KUNIT_EXPECT_EQ(test, SZ_64K - SZ_1K, pf_profile_fair_ctxs(gt, 1)); +} + +static void fair_contexts(struct kunit *test) +{ + unsigned int num_vfs = (unsigned long)test->param_value; + struct xe_gt *gt = test->priv; + struct xe_device *xe = gt_to_xe(gt); + + pf_set_admin_mode(xe, false); + KUNIT_ASSERT_FALSE(test, xe_sriov_pf_admin_only(xe)); + + KUNIT_EXPECT_TRUE(test, is_power_of_2(pf_profile_fair_ctxs(gt, num_vfs))); + KUNIT_EXPECT_GT(test, GUC_ID_MAX, num_vfs * pf_profile_fair_ctxs(gt, num_vfs)); + + if (num_vfs > 31) + KUNIT_ASSERT_EQ(test, SZ_1K, pf_profile_fair_ctxs(gt, num_vfs)); + else if (num_vfs > 15) + KUNIT_ASSERT_EQ(test, SZ_2K, pf_profile_fair_ctxs(gt, num_vfs)); + else if (num_vfs > 7) + KUNIT_ASSERT_EQ(test, SZ_4K, pf_profile_fair_ctxs(gt, num_vfs)); + else if (num_vfs > 3) + KUNIT_ASSERT_EQ(test, SZ_8K, pf_profile_fair_ctxs(gt, num_vfs)); + else if (num_vfs > 1) + KUNIT_ASSERT_EQ(test, SZ_16K, pf_profile_fair_ctxs(gt, num_vfs)); + else + KUNIT_ASSERT_EQ(test, SZ_32K, pf_profile_fair_ctxs(gt, num_vfs)); +} + +static void fair_doorbells_1vf(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + struct xe_device *xe = gt_to_xe(gt); + + pf_set_admin_mode(xe, false); + KUNIT_ASSERT_FALSE(test, xe_sriov_pf_admin_only(xe)); + KUNIT_EXPECT_EQ(test, 128, pf_profile_fair_dbs(gt, 1)); + + pf_set_admin_mode(xe, true); + KUNIT_ASSERT_TRUE(test, xe_sriov_pf_admin_only(xe)); + KUNIT_EXPECT_EQ(test, 240, pf_profile_fair_dbs(gt, 1)); +} + +static void fair_doorbells(struct kunit *test) +{ + unsigned int num_vfs = (unsigned long)test->param_value; + struct xe_gt *gt = test->priv; + struct xe_device *xe = gt_to_xe(gt); + + pf_set_admin_mode(xe, false); + KUNIT_ASSERT_FALSE(test, xe_sriov_pf_admin_only(xe)); + + KUNIT_EXPECT_TRUE(test, is_power_of_2(pf_profile_fair_dbs(gt, num_vfs))); + KUNIT_EXPECT_GE(test, GUC_NUM_DOORBELLS, (num_vfs + 1) * pf_profile_fair_dbs(gt, num_vfs)); + + if (num_vfs > 31) + KUNIT_ASSERT_EQ(test, SZ_4, pf_profile_fair_dbs(gt, num_vfs)); + else if (num_vfs > 15) + KUNIT_ASSERT_EQ(test, SZ_8, pf_profile_fair_dbs(gt, num_vfs)); + else if (num_vfs > 7) + KUNIT_ASSERT_EQ(test, SZ_16, pf_profile_fair_dbs(gt, num_vfs)); + else if (num_vfs > 3) + KUNIT_ASSERT_EQ(test, SZ_32, pf_profile_fair_dbs(gt, num_vfs)); + else if (num_vfs > 1) + KUNIT_ASSERT_EQ(test, SZ_64, pf_profile_fair_dbs(gt, num_vfs)); + else + KUNIT_ASSERT_EQ(test, SZ_128, pf_profile_fair_dbs(gt, num_vfs)); +} + +static void fair_ggtt_1vf(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + struct xe_device *xe = gt_to_xe(gt); + + pf_set_admin_mode(xe, false); + KUNIT_ASSERT_FALSE(test, xe_sriov_pf_admin_only(xe)); + KUNIT_EXPECT_EQ(test, SZ_2G, pf_profile_fair_ggtt(gt, 1)); + + pf_set_admin_mode(xe, true); + KUNIT_ASSERT_TRUE(test, xe_sriov_pf_admin_only(xe)); + KUNIT_EXPECT_EQ(test, SZ_2G + SZ_1G + SZ_512M, pf_profile_fair_ggtt(gt, 1)); +} + +static void fair_ggtt(struct kunit *test) +{ + unsigned int num_vfs = (unsigned long)test->param_value; + struct xe_gt *gt = test->priv; + struct xe_device *xe = gt_to_xe(gt); + u64 alignment = pf_get_ggtt_alignment(gt); + u64 shareable = SZ_2G + SZ_1G + SZ_512M; + + pf_set_admin_mode(xe, false); + KUNIT_ASSERT_FALSE(test, xe_sriov_pf_admin_only(xe)); + + KUNIT_EXPECT_TRUE(test, IS_ALIGNED(pf_profile_fair_ggtt(gt, num_vfs), alignment)); + KUNIT_EXPECT_GE(test, shareable, num_vfs * pf_profile_fair_ggtt(gt, num_vfs)); + + if (num_vfs > 56) + KUNIT_ASSERT_EQ(test, SZ_64M - SZ_8M, pf_profile_fair_ggtt(gt, num_vfs)); + else if (num_vfs > 28) + KUNIT_ASSERT_EQ(test, SZ_64M, pf_profile_fair_ggtt(gt, num_vfs)); + else if (num_vfs > 14) + KUNIT_ASSERT_EQ(test, SZ_128M, pf_profile_fair_ggtt(gt, num_vfs)); + else if (num_vfs > 7) + KUNIT_ASSERT_EQ(test, SZ_256M, pf_profile_fair_ggtt(gt, num_vfs)); + else if (num_vfs > 3) + KUNIT_ASSERT_EQ(test, SZ_512M, pf_profile_fair_ggtt(gt, num_vfs)); + else if (num_vfs > 1) + KUNIT_ASSERT_EQ(test, SZ_1G, pf_profile_fair_ggtt(gt, num_vfs)); + else + KUNIT_ASSERT_EQ(test, SZ_2G, pf_profile_fair_ggtt(gt, num_vfs)); +} + +static struct kunit_case pf_gt_config_test_cases[] = { + KUNIT_CASE(fair_contexts_1vf), + KUNIT_CASE(fair_doorbells_1vf), + KUNIT_CASE(fair_ggtt_1vf), + KUNIT_CASE_PARAM(fair_contexts, num_vfs_gen_param), + KUNIT_CASE_PARAM(fair_doorbells, num_vfs_gen_param), + KUNIT_CASE_PARAM(fair_ggtt, num_vfs_gen_param), + {} +}; + +static struct kunit_suite pf_gt_config_suite = { + .name = "pf_gt_config", + .test_cases = pf_gt_config_test_cases, + .init = pf_gt_config_test_init, +}; + +kunit_test_suite(pf_gt_config_suite); diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 1c3c9557a9bd..e91da9589c5f 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -142,6 +142,7 @@ static int dgfx_pkg_residencies_show(struct seq_file *m, void *data) } residencies[] = { {BMG_G2_RESIDENCY_OFFSET, "Package G2"}, {BMG_G6_RESIDENCY_OFFSET, "Package G6"}, + {BMG_G7_RESIDENCY_OFFSET, "Package G7"}, {BMG_G8_RESIDENCY_OFFSET, "Package G8"}, {BMG_G10_RESIDENCY_OFFSET, "Package G10"}, {BMG_MODS_RESIDENCY_OFFSET, "Package ModS"} diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 86d5960476af..c7d373c70f0f 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -52,6 +52,7 @@ #include "xe_nvm.h" #include "xe_oa.h" #include "xe_observation.h" +#include "xe_pagefault.h" #include "xe_pat.h" #include "xe_pcode.h" #include "xe_pm.h" @@ -896,6 +897,10 @@ int xe_device_probe(struct xe_device *xe) return err; } + err = xe_pagefault_init(xe); + if (err) + return err; + if (xe->tiles->media_gt && XE_GT_WA(xe->tiles->media_gt, 15015404425_disable)) XE_DEVICE_WA_DISABLE(xe, 15015404425); @@ -988,21 +993,21 @@ void xe_device_remove(struct xe_device *xe) void xe_device_shutdown(struct xe_device *xe) { + struct xe_gt *gt; + u8 id; + drm_dbg(&xe->drm, "Shutting down device\n"); - if (xe_driver_flr_disabled(xe)) { - struct xe_gt *gt; - u8 id; + xe_display_pm_shutdown(xe); - xe_display_pm_shutdown(xe); + xe_irq_suspend(xe); - xe_irq_suspend(xe); + for_each_gt(gt, xe, id) + xe_gt_shutdown(gt); - for_each_gt(gt, xe, id) - xe_gt_shutdown(gt); + xe_display_pm_shutdown_late(xe); - xe_display_pm_shutdown_late(xe); - } else { + if (!xe_driver_flr_disabled(xe)) { /* BOOM! */ __xe_driver_flr(xe); } diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index af0ce275b032..0b2fa7c56d38 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -18,6 +18,7 @@ #include "xe_lmtt_types.h" #include "xe_memirq_types.h" #include "xe_oa_types.h" +#include "xe_pagefault_types.h" #include "xe_platform_types.h" #include "xe_pmu_types.h" #include "xe_pt_types.h" @@ -418,6 +419,16 @@ struct xe_device { u32 next_asid; /** @usm.lock: protects UM state */ struct rw_semaphore lock; + /** @usm.pf_wq: page fault work queue, unbound, high priority */ + struct workqueue_struct *pf_wq; + /* + * We pick 4 here because, in the current implementation, it + * yields the best bandwidth utilization of the kernel paging + * engine. + */ +#define XE_PAGEFAULT_QUEUE_COUNT 4 + /** @usm.pf_queue: Page fault queues */ + struct xe_pagefault_queue pf_queue[XE_PAGEFAULT_QUEUE_COUNT]; } usm; /** @pinned: pinned BO state */ @@ -645,9 +656,6 @@ struct xe_device { */ u32 edram_size_mb; - /* To shut up runtime pm macros.. */ - struct xe_runtime_pm {} runtime_pm; - struct intel_uncore { spinlock_t lock; } uncore; diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index 650e45f6a7c7..97dfb7945b7a 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -49,6 +49,7 @@ struct xe_eu_stall_data_stream { wait_queue_head_t poll_wq; size_t data_record_size; size_t per_xecore_buf_size; + unsigned int fw_ref; struct xe_gt *gt; struct xe_bo *bo; @@ -660,13 +661,12 @@ static int xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream *stream) struct per_xecore_buf *xecore_buf; struct xe_gt *gt = stream->gt; u16 group, instance; - unsigned int fw_ref; int xecore; /* Take runtime pm ref and forcewake to disable RC6 */ xe_pm_runtime_get(gt_to_xe(gt)); - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_RENDER); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_RENDER)) { + stream->fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_RENDER); + if (!xe_force_wake_ref_has_domain(stream->fw_ref, XE_FW_RENDER)) { xe_gt_err(gt, "Failed to get RENDER forcewake\n"); xe_pm_runtime_put(gt_to_xe(gt)); return -ETIMEDOUT; @@ -832,7 +832,7 @@ static int xe_eu_stall_disable_locked(struct xe_eu_stall_data_stream *stream) xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2, _MASKED_BIT_DISABLE(DISABLE_DOP_GATING)); - xe_force_wake_put(gt_to_fw(gt), XE_FW_RENDER); + xe_force_wake_put(gt_to_fw(gt), stream->fw_ref); xe_pm_runtime_put(gt_to_xe(gt)); return 0; diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 521467d976f7..4d81210e41f5 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -21,6 +21,7 @@ #include "xe_sched_job.h" #include "xe_sync.h" #include "xe_svm.h" +#include "xe_trace.h" #include "xe_vm.h" /** @@ -154,6 +155,12 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto err_exec_queue; } + if (atomic_read(&q->job_cnt) >= XE_MAX_JOB_COUNT_PER_EXEC_QUEUE) { + trace_xe_exec_queue_reach_max_job_count(q, XE_MAX_JOB_COUNT_PER_EXEC_QUEUE); + err = -EAGAIN; + goto err_exec_queue; + } + if (args->num_syncs) { syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); if (!syncs) { @@ -166,7 +173,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], - &syncs_user[num_syncs], SYNC_PARSE_FLAG_EXEC | + &syncs_user[num_syncs], NULL, 0, + SYNC_PARSE_FLAG_EXEC | (xe_vm_in_lr_mode(vm) ? SYNC_PARSE_FLAG_LR_MODE : 0)); if (err) @@ -294,10 +302,6 @@ retry: goto err_put_job; if (!xe_vm_in_lr_mode(vm)) { - err = xe_sched_job_last_fence_add_dep(job, vm); - if (err) - goto err_put_job; - err = xe_svm_notifier_lock_interruptible(vm); if (err) goto err_put_job; diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 90cbc95f8e2e..8724f8de67e2 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -10,6 +10,7 @@ #include <drm/drm_device.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> +#include <drm/drm_syncobj.h> #include <uapi/drm/xe_drm.h> #include "xe_dep_scheduler.h" @@ -368,6 +369,16 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, } xe_vm_put(migrate_vm); + if (!IS_ERR(q)) { + int err = drm_syncobj_create(&q->ufence_syncobj, + DRM_SYNCOBJ_CREATE_SIGNALED, + NULL); + if (err) { + xe_exec_queue_put(q); + return ERR_PTR(err); + } + } + return q; } ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); @@ -376,11 +387,20 @@ void xe_exec_queue_destroy(struct kref *ref) { struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); struct xe_exec_queue *eq, *next; + int i; + + xe_assert(gt_to_xe(q->gt), atomic_read(&q->job_cnt) == 0); + + if (q->ufence_syncobj) + drm_syncobj_put(q->ufence_syncobj); if (xe_exec_queue_uses_pxp(q)) xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); xe_exec_queue_last_fence_put_unlocked(q); + for_each_tlb_inval(i) + xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, i); + if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) { list_for_each_entry_safe(eq, next, &q->multi_gt_list, multi_gt_link) @@ -998,7 +1018,9 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, struct xe_vm *vm) { - if (q->flags & EXEC_QUEUE_FLAG_VM) { + if (q->flags & EXEC_QUEUE_FLAG_MIGRATE) { + xe_migrate_job_lock_assert(q); + } else if (q->flags & EXEC_QUEUE_FLAG_VM) { lockdep_assert_held(&vm->lock); } else { xe_vm_assert_held(vm); @@ -1097,32 +1119,104 @@ void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm, struct dma_fence *fence) { xe_exec_queue_last_fence_lockdep_assert(q, vm); + xe_assert(vm->xe, !dma_fence_is_container(fence)); xe_exec_queue_last_fence_put(q, vm); q->last_fence = dma_fence_get(fence); } /** - * xe_exec_queue_last_fence_test_dep - Test last fence dependency of queue + * xe_exec_queue_tlb_inval_last_fence_put() - Drop ref to last TLB invalidation fence * @q: The exec queue - * @vm: The VM the engine does a bind or exec for + * @vm: The VM the engine does a bind for + * @type: Either primary or media GT + */ +void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q, + struct xe_vm *vm, + unsigned int type) +{ + xe_exec_queue_last_fence_lockdep_assert(q, vm); + xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || + type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); + + xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, type); +} + +/** + * xe_exec_queue_tlb_inval_last_fence_put_unlocked() - Drop ref to last TLB + * invalidation fence unlocked + * @q: The exec queue + * @type: Either primary or media GT + * + * Only safe to be called from xe_exec_queue_destroy(). + */ +void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q, + unsigned int type) +{ + xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || + type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); + + dma_fence_put(q->tlb_inval[type].last_fence); + q->tlb_inval[type].last_fence = NULL; +} + +/** + * xe_exec_queue_tlb_inval_last_fence_get() - Get last fence for TLB invalidation + * @q: The exec queue + * @vm: The VM the engine does a bind for + * @type: Either primary or media GT + * + * Get last fence, takes a ref * - * Returns: - * -ETIME if there exists an unsignalled last fence dependency, zero otherwise. + * Returns: last fence if not signaled, dma fence stub if signaled */ -int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, struct xe_vm *vm) +struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q, + struct xe_vm *vm, + unsigned int type) { struct dma_fence *fence; - int err = 0; - fence = xe_exec_queue_last_fence_get(q, vm); - if (fence) { - err = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) ? - 0 : -ETIME; - dma_fence_put(fence); - } + xe_exec_queue_last_fence_lockdep_assert(q, vm); + xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || + type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); + xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM | + EXEC_QUEUE_FLAG_MIGRATE)); - return err; + if (q->tlb_inval[type].last_fence && + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &q->tlb_inval[type].last_fence->flags)) + xe_exec_queue_tlb_inval_last_fence_put(q, vm, type); + + fence = q->tlb_inval[type].last_fence ?: dma_fence_get_stub(); + dma_fence_get(fence); + return fence; +} + +/** + * xe_exec_queue_tlb_inval_last_fence_set() - Set last fence for TLB invalidation + * @q: The exec queue + * @vm: The VM the engine does a bind for + * @fence: The fence + * @type: Either primary or media GT + * + * Set the last fence for the tlb invalidation type on the queue. Increases + * reference count for fence, when closing queue + * xe_exec_queue_tlb_inval_last_fence_put should be called. + */ +void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q, + struct xe_vm *vm, + struct dma_fence *fence, + unsigned int type) +{ + xe_exec_queue_last_fence_lockdep_assert(q, vm); + xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || + type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); + xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM | + EXEC_QUEUE_FLAG_MIGRATE)); + xe_assert(vm->xe, !dma_fence_is_container(fence)); + + xe_exec_queue_tlb_inval_last_fence_put(q, vm, type); + q->tlb_inval[type].last_fence = dma_fence_get(fence); } /** diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index a4dfbe858bda..fda4d4f9bda8 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -14,6 +14,10 @@ struct drm_file; struct xe_device; struct xe_file; +#define for_each_tlb_inval(__i) \ + for (__i = XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT; \ + __i <= XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT; ++__i) + struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, u32 logical_mask, u16 width, struct xe_hw_engine *hw_engine, u32 flags, @@ -84,8 +88,23 @@ struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue * struct xe_vm *vm); void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm, struct dma_fence *fence); -int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, - struct xe_vm *vm); + +void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q, + struct xe_vm *vm, + unsigned int type); + +void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q, + unsigned int type); + +struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q, + struct xe_vm *vm, + unsigned int type); + +void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q, + struct xe_vm *vm, + struct dma_fence *fence, + unsigned int type); + void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q); int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 282505fa1377..771ffe35cd0c 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -15,6 +15,7 @@ #include "xe_hw_fence_types.h" #include "xe_lrc_types.h" +struct drm_syncobj; struct xe_execlist_exec_queue; struct xe_gt; struct xe_guc_exec_queue; @@ -145,6 +146,11 @@ struct xe_exec_queue { * dependency scheduler */ struct xe_dep_scheduler *dep_scheduler; + /** + * @last_fence: last fence for tlb invalidation, protected by + * vm->lock in write mode + */ + struct dma_fence *last_fence; } tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_COUNT]; /** @pxp: PXP info tracking */ @@ -155,6 +161,12 @@ struct xe_exec_queue { struct list_head link; } pxp; + /** @ufence_syncobj: User fence syncobj */ + struct drm_syncobj *ufence_syncobj; + + /** @ufence_timeline_value: User fence timeline value */ + u64 ufence_timeline_value; + /** @ops: submission backend exec queue operations */ const struct xe_exec_queue_ops *ops; @@ -162,6 +174,11 @@ struct xe_exec_queue { const struct xe_ring_ops *ring_ops; /** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */ struct drm_sched_entity *entity; + +#define XE_MAX_JOB_COUNT_PER_EXEC_QUEUE 1000 + /** @job_cnt: number of drm jobs in this exec queue */ + atomic_t job_cnt; + /** * @tlb_flush_seqno: The seqno of the last rebind tlb flush performed * Protected by @vm's resv. Unused if @vm == NULL. diff --git a/drivers/gpu/drm/xe/xe_force_wake_types.h b/drivers/gpu/drm/xe/xe_force_wake_types.h index 12d6e2367455..14b7b86e801b 100644 --- a/drivers/gpu/drm/xe/xe_force_wake_types.h +++ b/drivers/gpu/drm/xe/xe_force_wake_types.h @@ -52,7 +52,22 @@ enum xe_force_wake_domains { }; /** - * struct xe_force_wake_domain - Xe force wake domains + * struct xe_force_wake_domain - Xe force wake power domain + * + * Represents an individual device-internal power domain. The driver must + * ensure the power domain is awake before accessing registers or other + * hardware functionality that is part of the power domain. Since different + * driver threads may access hardware units simultaneously, a reference count + * is used to ensure that the domain remains awake as long as any software + * is using the part of the hardware covered by the power domain. + * + * Hardware provides a register interface to allow the driver to request + * wake/sleep of power domains, although in most cases the actual action of + * powering the hardware up/down is handled by firmware (and may be subject to + * requirements and constraints outside of the driver's visibility) so the + * driver needs to wait for an acknowledgment that a wake request has been + * acted upon before accessing the parts of the hardware that reside within the + * power domain. */ struct xe_force_wake_domain { /** @id: domain force wake id */ @@ -70,7 +85,14 @@ struct xe_force_wake_domain { }; /** - * struct xe_force_wake - Xe force wake + * struct xe_force_wake - Xe force wake collection + * + * Represents a collection of related power domains (struct + * xe_force_wake_domain) associated with a subunit of the device. + * + * Currently only used for GT power domains (where the term "forcewake" is used + * in the hardware documentation), although the interface could be extended to + * power wells in other parts of the hardware in the future. */ struct xe_force_wake { /** @gt: back pointers to GT */ diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 20d226d90c50..ef481b334af4 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -151,6 +151,14 @@ static void xe_ggtt_set_pte_and_flush(struct xe_ggtt *ggtt, u64 addr, u64 pte) ggtt_update_access_counter(ggtt); } +static u64 xe_ggtt_get_pte(struct xe_ggtt *ggtt, u64 addr) +{ + xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK)); + xe_tile_assert(ggtt->tile, addr < ggtt->size); + + return readq(&ggtt->gsm[addr >> XE_PTE_SHIFT]); +} + static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) { u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; @@ -233,16 +241,19 @@ void xe_ggtt_might_lock(struct xe_ggtt *ggtt) static const struct xe_ggtt_pt_ops xelp_pt_ops = { .pte_encode_flags = xelp_ggtt_pte_flags, .ggtt_set_pte = xe_ggtt_set_pte, + .ggtt_get_pte = xe_ggtt_get_pte, }; static const struct xe_ggtt_pt_ops xelpg_pt_ops = { .pte_encode_flags = xelpg_ggtt_pte_flags, .ggtt_set_pte = xe_ggtt_set_pte, + .ggtt_get_pte = xe_ggtt_get_pte, }; static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = { .pte_encode_flags = xelpg_ggtt_pte_flags, .ggtt_set_pte = xe_ggtt_set_pte_and_flush, + .ggtt_get_pte = xe_ggtt_get_pte, }; static void __xe_ggtt_init_early(struct xe_ggtt *ggtt, u32 reserved) @@ -698,6 +709,20 @@ bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node) } /** + * xe_ggtt_node_pt_size() - Get the size of page table entries needed to map a GGTT node. + * @node: the &xe_ggtt_node + * + * Return: GGTT node page table entries size in bytes. + */ +size_t xe_ggtt_node_pt_size(const struct xe_ggtt_node *node) +{ + if (!node) + return 0; + + return node->base.size / XE_PAGE_SIZE * sizeof(u64); +} + +/** * xe_ggtt_map_bo - Map the BO into GGTT * @ggtt: the &xe_ggtt where node will be mapped * @node: the &xe_ggtt_node where this BO is mapped @@ -930,6 +955,85 @@ void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid) xe_ggtt_assign_locked(node->ggtt, &node->base, vfid); mutex_unlock(&node->ggtt->lock); } + +/** + * xe_ggtt_node_save() - Save a &xe_ggtt_node to a buffer. + * @node: the &xe_ggtt_node to be saved + * @dst: destination buffer + * @size: destination buffer size in bytes + * @vfid: VF identifier + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_ggtt_node_save(struct xe_ggtt_node *node, void *dst, size_t size, u16 vfid) +{ + struct xe_ggtt *ggtt; + u64 start, end; + u64 *buf = dst; + u64 pte; + + if (!node) + return -ENOENT; + + guard(mutex)(&node->ggtt->lock); + + if (xe_ggtt_node_pt_size(node) != size) + return -EINVAL; + + ggtt = node->ggtt; + start = node->base.start; + end = start + node->base.size - 1; + + while (start < end) { + pte = ggtt->pt_ops->ggtt_get_pte(ggtt, start); + if (vfid != u64_get_bits(pte, GGTT_PTE_VFID)) + return -EPERM; + + *buf++ = u64_replace_bits(pte, 0, GGTT_PTE_VFID); + start += XE_PAGE_SIZE; + } + + return 0; +} + +/** + * xe_ggtt_node_load() - Load a &xe_ggtt_node from a buffer. + * @node: the &xe_ggtt_node to be loaded + * @src: source buffer + * @size: source buffer size in bytes + * @vfid: VF identifier + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_ggtt_node_load(struct xe_ggtt_node *node, const void *src, size_t size, u16 vfid) +{ + u64 vfid_pte = xe_encode_vfid_pte(vfid); + const u64 *buf = src; + struct xe_ggtt *ggtt; + u64 start, end; + + if (!node) + return -ENOENT; + + guard(mutex)(&node->ggtt->lock); + + if (xe_ggtt_node_pt_size(node) != size) + return -EINVAL; + + ggtt = node->ggtt; + start = node->base.start; + end = start + node->base.size - 1; + + while (start < end) { + vfid_pte = u64_replace_bits(*buf++, vfid, GGTT_PTE_VFID); + ggtt->pt_ops->ggtt_set_pte(ggtt, start, vfid_pte); + start += XE_PAGE_SIZE; + } + xe_ggtt_invalidate(ggtt); + + return 0; +} + #endif /** diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 75fc7a1efea7..93fea4b6079c 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -29,6 +29,7 @@ int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node, u32 size, u32 align, u32 mm_flags); void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate); bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node); +size_t xe_ggtt_node_pt_size(const struct xe_ggtt_node *node); void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, struct xe_bo *bo, u16 pat_index); void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo); @@ -43,6 +44,8 @@ u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer #ifdef CONFIG_PCI_IOV void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid); +int xe_ggtt_node_save(struct xe_ggtt_node *node, void *dst, size_t size, u16 vfid); +int xe_ggtt_node_load(struct xe_ggtt_node *node, const void *src, size_t size, u16 vfid); #endif #ifndef CONFIG_LOCKDEP diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index c5e999d58ff2..dacd796f8184 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -78,6 +78,8 @@ struct xe_ggtt_pt_ops { u64 (*pte_encode_flags)(struct xe_bo *bo, u16 pat_index); /** @ggtt_set_pte: Directly write into GGTT's PTE */ void (*ggtt_set_pte)(struct xe_ggtt *ggtt, u64 addr, u64 pte); + /** @ggtt_get_pte: Directly read from GGTT's PTE */ + u64 (*ggtt_get_pte)(struct xe_ggtt *ggtt, u64 addr); }; #endif diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 89808b33d0a8..6d479948bf21 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -32,7 +32,6 @@ #include "xe_gt_freq.h" #include "xe_gt_idle.h" #include "xe_gt_mcr.h" -#include "xe_gt_pagefault.h" #include "xe_gt_printk.h" #include "xe_gt_sriov_pf.h" #include "xe_gt_sriov_vf.h" @@ -49,6 +48,7 @@ #include "xe_map.h" #include "xe_migrate.h" #include "xe_mmio.h" +#include "xe_pagefault.h" #include "xe_pat.h" #include "xe_pm.h" #include "xe_mocs.h" @@ -607,6 +607,13 @@ static void xe_gt_fini(void *arg) struct xe_gt *gt = arg; int i; + if (disable_work_sync(>->reset.worker)) + /* + * If gt_reset_worker was halted from executing, take care of + * releasing the rpm reference here. + */ + xe_pm_runtime_put(gt_to_xe(gt)); + for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) xe_hw_fence_irq_finish(>->fence_irq[i]); @@ -637,10 +644,6 @@ int xe_gt_init(struct xe_gt *gt) if (err) return err; - err = xe_gt_pagefault_init(gt); - if (err) - return err; - err = xe_gt_idle_init(>->gtidle); if (err) return err; @@ -813,21 +816,18 @@ static int do_gt_restart(struct xe_gt *gt) return 0; } -static int gt_reset(struct xe_gt *gt) +static void gt_reset_worker(struct work_struct *w) { + struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker); unsigned int fw_ref; int err; - if (xe_device_wedged(gt_to_xe(gt))) { - err = -ECANCELED; + if (xe_device_wedged(gt_to_xe(gt))) goto err_pm_put; - } /* We only support GT resets with GuC submission */ - if (!xe_device_uc_enabled(gt_to_xe(gt))) { - err = -ENODEV; + if (!xe_device_uc_enabled(gt_to_xe(gt))) goto err_pm_put; - } xe_gt_info(gt, "reset started\n"); @@ -849,7 +849,7 @@ static int gt_reset(struct xe_gt *gt) xe_uc_gucrc_disable(>->uc); xe_uc_stop_prepare(>->uc); - xe_gt_pagefault_reset(gt); + xe_pagefault_reset(gt_to_xe(gt), gt); xe_uc_stop(>->uc); @@ -864,30 +864,24 @@ static int gt_reset(struct xe_gt *gt) goto err_out; xe_force_wake_put(gt_to_fw(gt), fw_ref); + + /* Pair with get while enqueueing the work in xe_gt_reset_async() */ xe_pm_runtime_put(gt_to_xe(gt)); xe_gt_info(gt, "reset done\n"); - return 0; + return; err_out: xe_force_wake_put(gt_to_fw(gt), fw_ref); XE_WARN_ON(xe_uc_start(>->uc)); + err_fail: xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); - xe_device_declare_wedged(gt_to_xe(gt)); + err_pm_put: xe_pm_runtime_put(gt_to_xe(gt)); - - return err; -} - -static void gt_reset_worker(struct work_struct *w) -{ - struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker); - - gt_reset(gt); } void xe_gt_reset_async(struct xe_gt *gt) @@ -899,6 +893,8 @@ void xe_gt_reset_async(struct xe_gt *gt) return; xe_gt_info(gt, "reset queued\n"); + + /* Pair with put in gt_reset_worker() if work is enqueued */ xe_pm_runtime_get_noresume(gt_to_xe(gt)); if (!queue_work(gt->ordered_wq, >->reset.worker)) xe_pm_runtime_put(gt_to_xe(gt)); diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index e88f113226bc..849ea6c86e8e 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -29,24 +29,26 @@ * PCODE is the ultimate decision maker of the actual running frequency, based * on thermal and other running conditions. * - * Xe's Freq provides a sysfs API for frequency management: + * Xe's Freq provides a sysfs API for frequency management under + * ``<device>/tile#/gt#/freq0/`` directory. * - * device/tile#/gt#/freq0/<item>_freq *read-only* files: + * **Read-only** attributes: * - * - act_freq: The actual resolved frequency decided by PCODE. - * - cur_freq: The current one requested by GuC PC to the PCODE. - * - rpn_freq: The Render Performance (RP) N level, which is the minimal one. - * - rpa_freq: The Render Performance (RP) A level, which is the achievable one. - * Calculated by PCODE at runtime based on multiple running conditions - * - rpe_freq: The Render Performance (RP) E level, which is the efficient one. - * Calculated by PCODE at runtime based on multiple running conditions - * - rp0_freq: The Render Performance (RP) 0 level, which is the maximum one. + * - ``act_freq``: The actual resolved frequency decided by PCODE. + * - ``cur_freq``: The current one requested by GuC PC to the PCODE. + * - ``rpn_freq``: The Render Performance (RP) N level, which is the minimal one. + * - ``rpa_freq``: The Render Performance (RP) A level, which is the achievable one. + * Calculated by PCODE at runtime based on multiple running conditions + * - ``rpe_freq``: The Render Performance (RP) E level, which is the efficient one. + * Calculated by PCODE at runtime based on multiple running conditions + * - ``rp0_freq``: The Render Performance (RP) 0 level, which is the maximum one. * - * device/tile#/gt#/freq0/<item>_freq *read-write* files: + * **Read-write** attributes: * - * - min_freq: Min frequency request. - * - max_freq: Max frequency request. - * If max <= min, then freq_min becomes a fixed frequency request. + * - ``min_freq``: Min frequency request. + * - ``max_freq``: Max frequency request. + * If max <= min, then freq_min becomes a fixed frequency + * request. */ static struct xe_guc_pc * diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c deleted file mode 100644 index a054d6010ae0..000000000000 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ /dev/null @@ -1,679 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2022 Intel Corporation - */ - -#include "xe_gt_pagefault.h" - -#include <linux/bitfield.h> -#include <linux/circ_buf.h> - -#include <drm/drm_exec.h> -#include <drm/drm_managed.h> - -#include "abi/guc_actions_abi.h" -#include "xe_bo.h" -#include "xe_gt.h" -#include "xe_gt_printk.h" -#include "xe_gt_stats.h" -#include "xe_guc.h" -#include "xe_guc_ct.h" -#include "xe_migrate.h" -#include "xe_svm.h" -#include "xe_trace_bo.h" -#include "xe_vm.h" -#include "xe_vram_types.h" - -struct pagefault { - u64 page_addr; - u32 asid; - u16 pdata; - u8 vfid; - u8 access_type; - u8 fault_type; - u8 fault_level; - u8 engine_class; - u8 engine_instance; - u8 fault_unsuccessful; - bool trva_fault; -}; - -enum access_type { - ACCESS_TYPE_READ = 0, - ACCESS_TYPE_WRITE = 1, - ACCESS_TYPE_ATOMIC = 2, - ACCESS_TYPE_RESERVED = 3, -}; - -enum fault_type { - NOT_PRESENT = 0, - WRITE_ACCESS_VIOLATION = 1, - ATOMIC_ACCESS_VIOLATION = 2, -}; - -struct acc { - u64 va_range_base; - u32 asid; - u32 sub_granularity; - u8 granularity; - u8 vfid; - u8 access_type; - u8 engine_class; - u8 engine_instance; -}; - -static bool access_is_atomic(enum access_type access_type) -{ - return access_type == ACCESS_TYPE_ATOMIC; -} - -static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma) -{ - return xe_vm_has_valid_gpu_mapping(tile, vma->tile_present, - vma->tile_invalidated); -} - -static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, - bool need_vram_move, struct xe_vram_region *vram) -{ - struct xe_bo *bo = xe_vma_bo(vma); - struct xe_vm *vm = xe_vma_vm(vma); - int err; - - err = xe_vm_lock_vma(exec, vma); - if (err) - return err; - - if (!bo) - return 0; - - return need_vram_move ? xe_bo_migrate(bo, vram->placement, NULL, exec) : - xe_bo_validate(bo, vm, true, exec); -} - -static int handle_vma_pagefault(struct xe_gt *gt, struct xe_vma *vma, - bool atomic) -{ - struct xe_vm *vm = xe_vma_vm(vma); - struct xe_tile *tile = gt_to_tile(gt); - struct xe_validation_ctx ctx; - struct drm_exec exec; - struct dma_fence *fence; - int err, needs_vram; - - lockdep_assert_held_write(&vm->lock); - - needs_vram = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic); - if (needs_vram < 0 || (needs_vram && xe_vma_is_userptr(vma))) - return needs_vram < 0 ? needs_vram : -EACCES; - - xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, 1); - xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, xe_vma_size(vma) / 1024); - - trace_xe_vma_pagefault(vma); - - /* Check if VMA is valid, opportunistic check only */ - if (vma_is_valid(tile, vma) && !atomic) - return 0; - -retry_userptr: - if (xe_vma_is_userptr(vma) && - xe_vma_userptr_check_repin(to_userptr_vma(vma))) { - struct xe_userptr_vma *uvma = to_userptr_vma(vma); - - err = xe_vma_userptr_pin_pages(uvma); - if (err) - return err; - } - - /* Lock VM and BOs dma-resv */ - xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, (struct xe_val_flags) {}); - drm_exec_until_all_locked(&exec) { - err = xe_pf_begin(&exec, vma, needs_vram == 1, tile->mem.vram); - drm_exec_retry_on_contention(&exec); - xe_validation_retry_on_oom(&ctx, &err); - if (err) - goto unlock_dma_resv; - - /* Bind VMA only to the GT that has faulted */ - trace_xe_vma_pf_bind(vma); - xe_vm_set_validation_exec(vm, &exec); - fence = xe_vma_rebind(vm, vma, BIT(tile->id)); - xe_vm_set_validation_exec(vm, NULL); - if (IS_ERR(fence)) { - err = PTR_ERR(fence); - xe_validation_retry_on_oom(&ctx, &err); - goto unlock_dma_resv; - } - } - - dma_fence_wait(fence, false); - dma_fence_put(fence); - -unlock_dma_resv: - xe_validation_ctx_fini(&ctx); - if (err == -EAGAIN) - goto retry_userptr; - - return err; -} - -static struct xe_vm *asid_to_vm(struct xe_device *xe, u32 asid) -{ - struct xe_vm *vm; - - down_read(&xe->usm.lock); - vm = xa_load(&xe->usm.asid_to_vm, asid); - if (vm && xe_vm_in_fault_mode(vm)) - xe_vm_get(vm); - else - vm = ERR_PTR(-EINVAL); - up_read(&xe->usm.lock); - - return vm; -} - -static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) -{ - struct xe_device *xe = gt_to_xe(gt); - struct xe_vm *vm; - struct xe_vma *vma = NULL; - int err; - bool atomic; - - /* SW isn't expected to handle TRTT faults */ - if (pf->trva_fault) - return -EFAULT; - - vm = asid_to_vm(xe, pf->asid); - if (IS_ERR(vm)) - return PTR_ERR(vm); - - /* - * TODO: Change to read lock? Using write lock for simplicity. - */ - down_write(&vm->lock); - - if (xe_vm_is_closed(vm)) { - err = -ENOENT; - goto unlock_vm; - } - - vma = xe_vm_find_vma_by_addr(vm, pf->page_addr); - if (!vma) { - err = -EINVAL; - goto unlock_vm; - } - - atomic = access_is_atomic(pf->access_type); - - if (xe_vma_is_cpu_addr_mirror(vma)) - err = xe_svm_handle_pagefault(vm, vma, gt, - pf->page_addr, atomic); - else - err = handle_vma_pagefault(gt, vma, atomic); - -unlock_vm: - if (!err) - vm->usm.last_fault_vma = vma; - up_write(&vm->lock); - xe_vm_put(vm); - - return err; -} - -static int send_pagefault_reply(struct xe_guc *guc, - struct xe_guc_pagefault_reply *reply) -{ - u32 action[] = { - XE_GUC_ACTION_PAGE_FAULT_RES_DESC, - reply->dw0, - reply->dw1, - }; - - return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); -} - -static void print_pagefault(struct xe_gt *gt, struct pagefault *pf) -{ - xe_gt_dbg(gt, "\n\tASID: %d\n" - "\tVFID: %d\n" - "\tPDATA: 0x%04x\n" - "\tFaulted Address: 0x%08x%08x\n" - "\tFaultType: %d\n" - "\tAccessType: %d\n" - "\tFaultLevel: %d\n" - "\tEngineClass: %d %s\n" - "\tEngineInstance: %d\n", - pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr), - lower_32_bits(pf->page_addr), - pf->fault_type, pf->access_type, pf->fault_level, - pf->engine_class, xe_hw_engine_class_to_str(pf->engine_class), - pf->engine_instance); -} - -#define PF_MSG_LEN_DW 4 - -static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf) -{ - const struct xe_guc_pagefault_desc *desc; - bool ret = false; - - spin_lock_irq(&pf_queue->lock); - if (pf_queue->tail != pf_queue->head) { - desc = (const struct xe_guc_pagefault_desc *) - (pf_queue->data + pf_queue->tail); - - pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0); - pf->trva_fault = FIELD_GET(XE2_PFD_TRVA_FAULT, desc->dw0); - pf->engine_class = FIELD_GET(PFD_ENG_CLASS, desc->dw0); - pf->engine_instance = FIELD_GET(PFD_ENG_INSTANCE, desc->dw0); - pf->pdata = FIELD_GET(PFD_PDATA_HI, desc->dw1) << - PFD_PDATA_HI_SHIFT; - pf->pdata |= FIELD_GET(PFD_PDATA_LO, desc->dw0); - pf->asid = FIELD_GET(PFD_ASID, desc->dw1); - pf->vfid = FIELD_GET(PFD_VFID, desc->dw2); - pf->access_type = FIELD_GET(PFD_ACCESS_TYPE, desc->dw2); - pf->fault_type = FIELD_GET(PFD_FAULT_TYPE, desc->dw2); - pf->page_addr = (u64)(FIELD_GET(PFD_VIRTUAL_ADDR_HI, desc->dw3)) << - PFD_VIRTUAL_ADDR_HI_SHIFT; - pf->page_addr |= FIELD_GET(PFD_VIRTUAL_ADDR_LO, desc->dw2) << - PFD_VIRTUAL_ADDR_LO_SHIFT; - - pf_queue->tail = (pf_queue->tail + PF_MSG_LEN_DW) % - pf_queue->num_dw; - ret = true; - } - spin_unlock_irq(&pf_queue->lock); - - return ret; -} - -static bool pf_queue_full(struct pf_queue *pf_queue) -{ - lockdep_assert_held(&pf_queue->lock); - - return CIRC_SPACE(pf_queue->head, pf_queue->tail, - pf_queue->num_dw) <= - PF_MSG_LEN_DW; -} - -int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) -{ - struct xe_gt *gt = guc_to_gt(guc); - struct pf_queue *pf_queue; - unsigned long flags; - u32 asid; - bool full; - - if (unlikely(len != PF_MSG_LEN_DW)) - return -EPROTO; - - asid = FIELD_GET(PFD_ASID, msg[1]); - pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE); - - /* - * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0 - */ - xe_gt_assert(gt, !(pf_queue->num_dw % PF_MSG_LEN_DW)); - - spin_lock_irqsave(&pf_queue->lock, flags); - full = pf_queue_full(pf_queue); - if (!full) { - memcpy(pf_queue->data + pf_queue->head, msg, len * sizeof(u32)); - pf_queue->head = (pf_queue->head + len) % - pf_queue->num_dw; - queue_work(gt->usm.pf_wq, &pf_queue->worker); - } else { - xe_gt_warn(gt, "PageFault Queue full, shouldn't be possible\n"); - } - spin_unlock_irqrestore(&pf_queue->lock, flags); - - return full ? -ENOSPC : 0; -} - -#define USM_QUEUE_MAX_RUNTIME_MS 20 - -static void pf_queue_work_func(struct work_struct *w) -{ - struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker); - struct xe_gt *gt = pf_queue->gt; - struct xe_guc_pagefault_reply reply = {}; - struct pagefault pf = {}; - unsigned long threshold; - int ret; - - threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS); - - while (get_pagefault(pf_queue, &pf)) { - ret = handle_pagefault(gt, &pf); - if (unlikely(ret)) { - print_pagefault(gt, &pf); - pf.fault_unsuccessful = 1; - xe_gt_dbg(gt, "Fault response: Unsuccessful %pe\n", ERR_PTR(ret)); - } - - reply.dw0 = FIELD_PREP(PFR_VALID, 1) | - FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) | - FIELD_PREP(PFR_REPLY, PFR_ACCESS) | - FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) | - FIELD_PREP(PFR_ASID, pf.asid); - - reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) | - FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) | - FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) | - FIELD_PREP(PFR_PDATA, pf.pdata); - - send_pagefault_reply(>->uc.guc, &reply); - - if (time_after(jiffies, threshold) && - pf_queue->tail != pf_queue->head) { - queue_work(gt->usm.pf_wq, w); - break; - } - } -} - -static void acc_queue_work_func(struct work_struct *w); - -static void pagefault_fini(void *arg) -{ - struct xe_gt *gt = arg; - struct xe_device *xe = gt_to_xe(gt); - - if (!xe->info.has_usm) - return; - - destroy_workqueue(gt->usm.acc_wq); - destroy_workqueue(gt->usm.pf_wq); -} - -static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) -{ - struct xe_device *xe = gt_to_xe(gt); - xe_dss_mask_t all_dss; - int num_dss, num_eus; - - bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask, - XE_MAX_DSS_FUSE_BITS); - - num_dss = bitmap_weight(all_dss, XE_MAX_DSS_FUSE_BITS); - num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss, - XE_MAX_EU_FUSE_BITS) * num_dss; - - /* - * user can issue separate page faults per EU and per CS - * - * XXX: Multiplier required as compute UMD are getting PF queue errors - * without it. Follow on why this multiplier is required. - */ -#define PF_MULTIPLIER 8 - pf_queue->num_dw = - (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER; - pf_queue->num_dw = roundup_pow_of_two(pf_queue->num_dw); -#undef PF_MULTIPLIER - - pf_queue->gt = gt; - pf_queue->data = devm_kcalloc(xe->drm.dev, pf_queue->num_dw, - sizeof(u32), GFP_KERNEL); - if (!pf_queue->data) - return -ENOMEM; - - spin_lock_init(&pf_queue->lock); - INIT_WORK(&pf_queue->worker, pf_queue_work_func); - - return 0; -} - -int xe_gt_pagefault_init(struct xe_gt *gt) -{ - struct xe_device *xe = gt_to_xe(gt); - int i, ret = 0; - - if (!xe->info.has_usm) - return 0; - - for (i = 0; i < NUM_PF_QUEUE; ++i) { - ret = xe_alloc_pf_queue(gt, >->usm.pf_queue[i]); - if (ret) - return ret; - } - for (i = 0; i < NUM_ACC_QUEUE; ++i) { - gt->usm.acc_queue[i].gt = gt; - spin_lock_init(>->usm.acc_queue[i].lock); - INIT_WORK(>->usm.acc_queue[i].worker, acc_queue_work_func); - } - - gt->usm.pf_wq = alloc_workqueue("xe_gt_page_fault_work_queue", - WQ_UNBOUND | WQ_HIGHPRI, NUM_PF_QUEUE); - if (!gt->usm.pf_wq) - return -ENOMEM; - - gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue", - WQ_UNBOUND | WQ_HIGHPRI, - NUM_ACC_QUEUE); - if (!gt->usm.acc_wq) { - destroy_workqueue(gt->usm.pf_wq); - return -ENOMEM; - } - - return devm_add_action_or_reset(xe->drm.dev, pagefault_fini, gt); -} - -void xe_gt_pagefault_reset(struct xe_gt *gt) -{ - struct xe_device *xe = gt_to_xe(gt); - int i; - - if (!xe->info.has_usm) - return; - - for (i = 0; i < NUM_PF_QUEUE; ++i) { - spin_lock_irq(>->usm.pf_queue[i].lock); - gt->usm.pf_queue[i].head = 0; - gt->usm.pf_queue[i].tail = 0; - spin_unlock_irq(>->usm.pf_queue[i].lock); - } - - for (i = 0; i < NUM_ACC_QUEUE; ++i) { - spin_lock(>->usm.acc_queue[i].lock); - gt->usm.acc_queue[i].head = 0; - gt->usm.acc_queue[i].tail = 0; - spin_unlock(>->usm.acc_queue[i].lock); - } -} - -static int granularity_in_byte(int val) -{ - switch (val) { - case 0: - return SZ_128K; - case 1: - return SZ_2M; - case 2: - return SZ_16M; - case 3: - return SZ_64M; - default: - return 0; - } -} - -static int sub_granularity_in_byte(int val) -{ - return (granularity_in_byte(val) / 32); -} - -static void print_acc(struct xe_gt *gt, struct acc *acc) -{ - xe_gt_warn(gt, "Access counter request:\n" - "\tType: %s\n" - "\tASID: %d\n" - "\tVFID: %d\n" - "\tEngine: %d:%d\n" - "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n" - "\tSub_Granularity Vector: 0x%08x\n" - "\tVA Range base: 0x%016llx\n", - acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL", - acc->asid, acc->vfid, acc->engine_class, acc->engine_instance, - granularity_in_byte(acc->granularity) / SZ_1K, - sub_granularity_in_byte(acc->granularity) / SZ_1K, - acc->sub_granularity, acc->va_range_base); -} - -static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc) -{ - u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity) - 1) * - sub_granularity_in_byte(acc->granularity); - - return xe_vm_find_overlapping_vma(vm, page_va, SZ_4K); -} - -static int handle_acc(struct xe_gt *gt, struct acc *acc) -{ - struct xe_device *xe = gt_to_xe(gt); - struct xe_tile *tile = gt_to_tile(gt); - struct xe_validation_ctx ctx; - struct drm_exec exec; - struct xe_vm *vm; - struct xe_vma *vma; - int ret = 0; - - /* We only support ACC_TRIGGER at the moment */ - if (acc->access_type != ACC_TRIGGER) - return -EINVAL; - - vm = asid_to_vm(xe, acc->asid); - if (IS_ERR(vm)) - return PTR_ERR(vm); - - down_read(&vm->lock); - - /* Lookup VMA */ - vma = get_acc_vma(vm, acc); - if (!vma) { - ret = -EINVAL; - goto unlock_vm; - } - - trace_xe_vma_acc(vma); - - /* Userptr or null can't be migrated, nothing to do */ - if (xe_vma_has_no_bo(vma)) - goto unlock_vm; - - /* Lock VM and BOs dma-resv */ - xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, (struct xe_val_flags) {}); - drm_exec_until_all_locked(&exec) { - ret = xe_pf_begin(&exec, vma, IS_DGFX(vm->xe), tile->mem.vram); - drm_exec_retry_on_contention(&exec); - xe_validation_retry_on_oom(&ctx, &ret); - } - - xe_validation_ctx_fini(&ctx); -unlock_vm: - up_read(&vm->lock); - xe_vm_put(vm); - - return ret; -} - -#define make_u64(hi__, low__) ((u64)(hi__) << 32 | (u64)(low__)) - -#define ACC_MSG_LEN_DW 4 - -static bool get_acc(struct acc_queue *acc_queue, struct acc *acc) -{ - const struct xe_guc_acc_desc *desc; - bool ret = false; - - spin_lock(&acc_queue->lock); - if (acc_queue->tail != acc_queue->head) { - desc = (const struct xe_guc_acc_desc *) - (acc_queue->data + acc_queue->tail); - - acc->granularity = FIELD_GET(ACC_GRANULARITY, desc->dw2); - acc->sub_granularity = FIELD_GET(ACC_SUBG_HI, desc->dw1) << 31 | - FIELD_GET(ACC_SUBG_LO, desc->dw0); - acc->engine_class = FIELD_GET(ACC_ENG_CLASS, desc->dw1); - acc->engine_instance = FIELD_GET(ACC_ENG_INSTANCE, desc->dw1); - acc->asid = FIELD_GET(ACC_ASID, desc->dw1); - acc->vfid = FIELD_GET(ACC_VFID, desc->dw2); - acc->access_type = FIELD_GET(ACC_TYPE, desc->dw0); - acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI, - desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO); - - acc_queue->tail = (acc_queue->tail + ACC_MSG_LEN_DW) % - ACC_QUEUE_NUM_DW; - ret = true; - } - spin_unlock(&acc_queue->lock); - - return ret; -} - -static void acc_queue_work_func(struct work_struct *w) -{ - struct acc_queue *acc_queue = container_of(w, struct acc_queue, worker); - struct xe_gt *gt = acc_queue->gt; - struct acc acc = {}; - unsigned long threshold; - int ret; - - threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS); - - while (get_acc(acc_queue, &acc)) { - ret = handle_acc(gt, &acc); - if (unlikely(ret)) { - print_acc(gt, &acc); - xe_gt_warn(gt, "ACC: Unsuccessful %pe\n", ERR_PTR(ret)); - } - - if (time_after(jiffies, threshold) && - acc_queue->tail != acc_queue->head) { - queue_work(gt->usm.acc_wq, w); - break; - } - } -} - -static bool acc_queue_full(struct acc_queue *acc_queue) -{ - lockdep_assert_held(&acc_queue->lock); - - return CIRC_SPACE(acc_queue->head, acc_queue->tail, ACC_QUEUE_NUM_DW) <= - ACC_MSG_LEN_DW; -} - -int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len) -{ - struct xe_gt *gt = guc_to_gt(guc); - struct acc_queue *acc_queue; - u32 asid; - bool full; - - /* - * The below logic doesn't work unless ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW == 0 - */ - BUILD_BUG_ON(ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW); - - if (unlikely(len != ACC_MSG_LEN_DW)) - return -EPROTO; - - asid = FIELD_GET(ACC_ASID, msg[1]); - acc_queue = >->usm.acc_queue[asid % NUM_ACC_QUEUE]; - - spin_lock(&acc_queue->lock); - full = acc_queue_full(acc_queue); - if (!full) { - memcpy(acc_queue->data + acc_queue->head, msg, - len * sizeof(u32)); - acc_queue->head = (acc_queue->head + len) % ACC_QUEUE_NUM_DW; - queue_work(gt->usm.acc_wq, &acc_queue->worker); - } else { - xe_gt_warn(gt, "ACC Queue full, dropping ACC\n"); - } - spin_unlock(&acc_queue->lock); - - return full ? -ENOSPC : 0; -} diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.h b/drivers/gpu/drm/xe/xe_gt_pagefault.h deleted file mode 100644 index 839c065a5e4c..000000000000 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2022 Intel Corporation - */ - -#ifndef _XE_GT_PAGEFAULT_H_ -#define _XE_GT_PAGEFAULT_H_ - -#include <linux/types.h> - -struct xe_gt; -struct xe_guc; - -int xe_gt_pagefault_init(struct xe_gt *gt); -void xe_gt_pagefault_reset(struct xe_gt *gt); -int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len); -int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len); - -#endif /* _XE_GT_PAGEFAULT_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index c0c0215c0703..62f6cc45a764 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -9,6 +9,7 @@ #include "abi/guc_actions_sriov_abi.h" #include "abi/guc_klvs_abi.h" +#include "regs/xe_gtt_defs.h" #include "regs/xe_guc_regs.h" #include "xe_bo.h" @@ -697,6 +698,22 @@ static u64 pf_estimate_fair_ggtt(struct xe_gt *gt, unsigned int num_vfs) return fair; } +static u64 pf_profile_fair_ggtt(struct xe_gt *gt, unsigned int num_vfs) +{ + bool admin_only_pf = xe_sriov_pf_admin_only(gt_to_xe(gt)); + u64 shareable = ALIGN_DOWN(GUC_GGTT_TOP, SZ_512M); + u64 alignment = pf_get_ggtt_alignment(gt); + + if (admin_only_pf && num_vfs == 1) + return ALIGN_DOWN(shareable, alignment); + + /* need to hardcode due to ~512M of GGTT being reserved */ + if (num_vfs > 56) + return SZ_64M - SZ_8M; + + return rounddown_pow_of_two(shareable / num_vfs); +} + /** * xe_gt_sriov_pf_config_set_fair_ggtt - Provision many VFs with fair GGTT. * @gt: the &xe_gt (can't be media) @@ -710,6 +727,7 @@ static u64 pf_estimate_fair_ggtt(struct xe_gt *gt, unsigned int num_vfs) int xe_gt_sriov_pf_config_set_fair_ggtt(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs) { + u64 profile = pf_profile_fair_ggtt(gt, num_vfs); u64 fair; xe_gt_assert(gt, vfid); @@ -723,9 +741,71 @@ int xe_gt_sriov_pf_config_set_fair_ggtt(struct xe_gt *gt, unsigned int vfid, if (!fair) return -ENOSPC; + fair = min(fair, profile); + if (fair < profile) + xe_gt_sriov_info(gt, "Using non-profile provisioning (%s %llu vs %llu)\n", + "GGTT", fair, profile); + return xe_gt_sriov_pf_config_bulk_set_ggtt(gt, vfid, num_vfs, fair); } +/** + * xe_gt_sriov_pf_config_ggtt_save() - Save a VF provisioned GGTT data into a buffer. + * @gt: the &xe_gt + * @vfid: VF identifier (can't be 0) + * @buf: the GGTT data destination buffer (or NULL to query the buf size) + * @size: the size of the buffer (or 0 to query the buf size) + * + * This function can only be called on PF. + * + * Return: size of the buffer needed to save GGTT data if querying, + * 0 on successful save or a negative error code on failure. + */ +ssize_t xe_gt_sriov_pf_config_ggtt_save(struct xe_gt *gt, unsigned int vfid, + void *buf, size_t size) +{ + struct xe_ggtt_node *node; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid); + xe_gt_assert(gt, !(!buf ^ !size)); + + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); + + node = pf_pick_vf_config(gt, vfid)->ggtt_region; + + if (!buf) + return xe_ggtt_node_pt_size(node); + + return xe_ggtt_node_save(node, buf, size, vfid); +} + +/** + * xe_gt_sriov_pf_config_ggtt_restore() - Restore a VF provisioned GGTT data from a buffer. + * @gt: the &xe_gt + * @vfid: VF identifier (can't be 0) + * @buf: the GGTT data source buffer + * @size: the size of the buffer + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_ggtt_restore(struct xe_gt *gt, unsigned int vfid, + const void *buf, size_t size) +{ + struct xe_ggtt_node *node; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid); + + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); + + node = pf_pick_vf_config(gt, vfid)->ggtt_region; + + return xe_ggtt_node_load(node, buf, size, vfid); +} + static u32 pf_get_min_spare_ctxs(struct xe_gt *gt) { /* XXX: preliminary */ @@ -924,7 +1004,8 @@ static int pf_config_bulk_set_u32_done(struct xe_gt *gt, unsigned int first, uns const char *what, const char *(*unit)(u32), unsigned int last, int err) { - xe_gt_assert(gt, first); + char name[8]; + xe_gt_assert(gt, num_vfs); xe_gt_assert(gt, first <= last); @@ -932,8 +1013,9 @@ static int pf_config_bulk_set_u32_done(struct xe_gt *gt, unsigned int first, uns return pf_config_set_u32_done(gt, first, value, get(gt, first), what, unit, err); if (unlikely(err)) { - xe_gt_sriov_notice(gt, "Failed to bulk provision VF%u..VF%u with %s\n", - first, first + num_vfs - 1, what); + xe_gt_sriov_notice(gt, "Failed to bulk provision %s..VF%u with %s\n", + xe_sriov_function_name(first, name, sizeof(name)), + first + num_vfs - 1, what); if (last > first) pf_config_bulk_set_u32_done(gt, first, last - first, value, get, what, unit, last, 0); @@ -942,8 +1024,9 @@ static int pf_config_bulk_set_u32_done(struct xe_gt *gt, unsigned int first, uns /* pick actual value from first VF - bulk provisioning shall be equal across all VFs */ value = get(gt, first); - xe_gt_sriov_info(gt, "VF%u..VF%u provisioned with %u%s %s\n", - first, first + num_vfs - 1, value, unit(value), what); + xe_gt_sriov_info(gt, "%s..VF%u provisioned with %u%s %s\n", + xe_sriov_function_name(first, name, sizeof(name)), + first + num_vfs - 1, value, unit(value), what); return 0; } @@ -982,6 +1065,16 @@ int xe_gt_sriov_pf_config_bulk_set_ctxs(struct xe_gt *gt, unsigned int vfid, "GuC context IDs", no_unit, n, err); } +static u32 pf_profile_fair_ctxs(struct xe_gt *gt, unsigned int num_vfs) +{ + bool admin_only_pf = xe_sriov_pf_admin_only(gt_to_xe(gt)); + + if (admin_only_pf && num_vfs == 1) + return ALIGN_DOWN(GUC_ID_MAX, SZ_1K); + + return rounddown_pow_of_two(GUC_ID_MAX / num_vfs); +} + static u32 pf_estimate_fair_ctxs(struct xe_gt *gt, unsigned int num_vfs) { struct xe_guc_id_mgr *idm = >->uc.guc.submission_state.idm; @@ -1014,6 +1107,7 @@ static u32 pf_estimate_fair_ctxs(struct xe_gt *gt, unsigned int num_vfs) int xe_gt_sriov_pf_config_set_fair_ctxs(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs) { + u32 profile = pf_profile_fair_ctxs(gt, num_vfs); u32 fair; xe_gt_assert(gt, vfid); @@ -1026,6 +1120,11 @@ int xe_gt_sriov_pf_config_set_fair_ctxs(struct xe_gt *gt, unsigned int vfid, if (!fair) return -ENOSPC; + fair = min(fair, profile); + if (fair < profile) + xe_gt_sriov_info(gt, "Using non-profile provisioning (%s %u vs %u)\n", + "GuC context IDs", fair, profile); + return xe_gt_sriov_pf_config_bulk_set_ctxs(gt, vfid, num_vfs, fair); } @@ -1230,6 +1329,17 @@ int xe_gt_sriov_pf_config_bulk_set_dbs(struct xe_gt *gt, unsigned int vfid, "GuC doorbell IDs", no_unit, n, err); } +static u32 pf_profile_fair_dbs(struct xe_gt *gt, unsigned int num_vfs) +{ + bool admin_only_pf = xe_sriov_pf_admin_only(gt_to_xe(gt)); + + /* XXX: preliminary */ + if (admin_only_pf && num_vfs == 1) + return GUC_NUM_DOORBELLS - SZ_16; + + return rounddown_pow_of_two(GUC_NUM_DOORBELLS / (num_vfs + 1)); +} + static u32 pf_estimate_fair_dbs(struct xe_gt *gt, unsigned int num_vfs) { struct xe_guc_db_mgr *dbm = >->uc.guc.dbm; @@ -1262,6 +1372,7 @@ static u32 pf_estimate_fair_dbs(struct xe_gt *gt, unsigned int num_vfs) int xe_gt_sriov_pf_config_set_fair_dbs(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs) { + u32 profile = pf_profile_fair_dbs(gt, num_vfs); u32 fair; xe_gt_assert(gt, vfid); @@ -1274,6 +1385,11 @@ int xe_gt_sriov_pf_config_set_fair_dbs(struct xe_gt *gt, unsigned int vfid, if (!fair) return -ENOSPC; + fair = min(fair, profile); + if (fair < profile) + xe_gt_sriov_info(gt, "Using non-profile provisioning (%s %u vs %u)\n", + "GuC doorbell IDs", fair, profile); + return xe_gt_sriov_pf_config_bulk_set_dbs(gt, vfid, num_vfs, fair); } @@ -1599,6 +1715,32 @@ int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, "LMEM", n, err); } +static struct xe_bo *pf_get_vf_config_lmem_obj(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + + return config->lmem_obj; +} + +/** + * xe_gt_sriov_pf_config_get_lmem_obj() - Take a reference to the struct &xe_bo backing VF LMEM. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be 0) + * + * This function can only be called on PF. + * The caller is responsible for calling xe_bo_put() on the returned object. + * + * Return: pointer to struct &xe_bo backing VF LMEM (if any). + */ +struct xe_bo *xe_gt_sriov_pf_config_get_lmem_obj(struct xe_gt *gt, unsigned int vfid) +{ + xe_gt_assert(gt, vfid); + + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); + + return xe_bo_get(pf_get_vf_config_lmem_obj(gt, vfid)); +} + static u64 pf_query_free_lmem(struct xe_gt *gt) { struct xe_tile *tile = gt->tile; @@ -1724,7 +1866,7 @@ static int pf_provision_exec_quantum(struct xe_gt *gt, unsigned int vfid, return 0; } -static int pf_get_exec_quantum(struct xe_gt *gt, unsigned int vfid) +static u32 pf_get_exec_quantum(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); @@ -1732,47 +1874,107 @@ static int pf_get_exec_quantum(struct xe_gt *gt, unsigned int vfid) } /** - * xe_gt_sriov_pf_config_set_exec_quantum - Configure execution quantum for the VF. + * xe_gt_sriov_pf_config_set_exec_quantum_locked() - Configure PF/VF execution quantum. * @gt: the &xe_gt - * @vfid: the VF identifier + * @vfid: the PF or VF identifier * @exec_quantum: requested execution quantum in milliseconds (0 is infinity) * - * This function can only be called on PF. + * This function can only be called on PF with the master mutex hold. + * It will log the provisioned value or an error in case of the failure. * * Return: 0 on success or a negative error code on failure. */ -int xe_gt_sriov_pf_config_set_exec_quantum(struct xe_gt *gt, unsigned int vfid, - u32 exec_quantum) +int xe_gt_sriov_pf_config_set_exec_quantum_locked(struct xe_gt *gt, unsigned int vfid, + u32 exec_quantum) { int err; - mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_provision_exec_quantum(gt, vfid, exec_quantum); - mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); return pf_config_set_u32_done(gt, vfid, exec_quantum, - xe_gt_sriov_pf_config_get_exec_quantum(gt, vfid), + pf_get_exec_quantum(gt, vfid), "execution quantum", exec_quantum_unit, err); } /** - * xe_gt_sriov_pf_config_get_exec_quantum - Get VF's execution quantum. + * xe_gt_sriov_pf_config_set_exec_quantum() - Configure PF/VF execution quantum. * @gt: the &xe_gt - * @vfid: the VF identifier + * @vfid: the PF or VF identifier + * @exec_quantum: requested execution quantum in milliseconds (0 is infinity) + * + * This function can only be called on PF. + * It will log the provisioned value or an error in case of the failure. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_exec_quantum(struct xe_gt *gt, unsigned int vfid, + u32 exec_quantum) +{ + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); + + return xe_gt_sriov_pf_config_set_exec_quantum_locked(gt, vfid, exec_quantum); +} + +/** + * xe_gt_sriov_pf_config_get_exec_quantum_locked() - Get PF/VF execution quantum. + * @gt: the &xe_gt + * @vfid: the PF or VF identifier + * + * This function can only be called on PF with the master mutex hold. + * + * Return: execution quantum in milliseconds (or 0 if infinity). + */ +u32 xe_gt_sriov_pf_config_get_exec_quantum_locked(struct xe_gt *gt, unsigned int vfid) +{ + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_get_exec_quantum(gt, vfid); +} + +/** + * xe_gt_sriov_pf_config_get_exec_quantum() - Get PF/VF execution quantum. + * @gt: the &xe_gt + * @vfid: the PF or VF identifier * * This function can only be called on PF. * - * Return: VF's (or PF's) execution quantum in milliseconds. + * Return: execution quantum in milliseconds (or 0 if infinity). */ u32 xe_gt_sriov_pf_config_get_exec_quantum(struct xe_gt *gt, unsigned int vfid) { - u32 exec_quantum; + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); - mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); - exec_quantum = pf_get_exec_quantum(gt, vfid); - mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + return pf_get_exec_quantum(gt, vfid); +} + +/** + * xe_gt_sriov_pf_config_bulk_set_exec_quantum_locked() - Configure EQ for PF and VFs. + * @gt: the &xe_gt to configure + * @exec_quantum: requested execution quantum in milliseconds (0 is infinity) + * + * This function can only be called on PF with the master mutex hold. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_bulk_set_exec_quantum_locked(struct xe_gt *gt, u32 exec_quantum) +{ + unsigned int totalvfs = xe_gt_sriov_pf_get_totalvfs(gt); + unsigned int n; + int err = 0; - return exec_quantum; + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + for (n = 0; n <= totalvfs; n++) { + err = pf_provision_exec_quantum(gt, VFID(n), exec_quantum); + if (err) + break; + } + + return pf_config_bulk_set_u32_done(gt, 0, 1 + totalvfs, exec_quantum, + pf_get_exec_quantum, "execution quantum", + exec_quantum_unit, n, err); } static const char *preempt_timeout_unit(u32 preempt_timeout) @@ -1795,7 +1997,7 @@ static int pf_provision_preempt_timeout(struct xe_gt *gt, unsigned int vfid, return 0; } -static int pf_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid) +static u32 pf_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); @@ -1803,47 +2005,106 @@ static int pf_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid) } /** - * xe_gt_sriov_pf_config_set_preempt_timeout - Configure preemption timeout for the VF. + * xe_gt_sriov_pf_config_set_preempt_timeout_locked() - Configure PF/VF preemption timeout. * @gt: the &xe_gt - * @vfid: the VF identifier + * @vfid: the PF or VF identifier * @preempt_timeout: requested preemption timeout in microseconds (0 is infinity) * - * This function can only be called on PF. + * This function can only be called on PF with the master mutex hold. + * It will log the provisioned value or an error in case of the failure. * * Return: 0 on success or a negative error code on failure. */ -int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid, - u32 preempt_timeout) +int xe_gt_sriov_pf_config_set_preempt_timeout_locked(struct xe_gt *gt, unsigned int vfid, + u32 preempt_timeout) { int err; - mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_provision_preempt_timeout(gt, vfid, preempt_timeout); - mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); return pf_config_set_u32_done(gt, vfid, preempt_timeout, - xe_gt_sriov_pf_config_get_preempt_timeout(gt, vfid), + pf_get_preempt_timeout(gt, vfid), "preemption timeout", preempt_timeout_unit, err); } /** - * xe_gt_sriov_pf_config_get_preempt_timeout - Get VF's preemption timeout. + * xe_gt_sriov_pf_config_set_preempt_timeout() - Configure PF/VF preemption timeout. * @gt: the &xe_gt - * @vfid: the VF identifier + * @vfid: the PF or VF identifier + * @preempt_timeout: requested preemption timeout in microseconds (0 is infinity) + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid, + u32 preempt_timeout) +{ + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); + + return xe_gt_sriov_pf_config_set_preempt_timeout_locked(gt, vfid, preempt_timeout); +} + +/** + * xe_gt_sriov_pf_config_get_preempt_timeout_locked() - Get PF/VF preemption timeout. + * @gt: the &xe_gt + * @vfid: the PF or VF identifier + * + * This function can only be called on PF with the master mutex hold. + * + * Return: preemption timeout in microseconds (or 0 if infinity). + */ +u32 xe_gt_sriov_pf_config_get_preempt_timeout_locked(struct xe_gt *gt, unsigned int vfid) +{ + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_get_preempt_timeout(gt, vfid); +} + +/** + * xe_gt_sriov_pf_config_get_preempt_timeout() - Get PF/VF preemption timeout. + * @gt: the &xe_gt + * @vfid: the PF or VF identifier * * This function can only be called on PF. * - * Return: VF's (or PF's) preemption timeout in microseconds. + * Return: preemption timeout in microseconds (or 0 if infinity). */ u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid) { - u32 preempt_timeout; + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); - mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); - preempt_timeout = pf_get_preempt_timeout(gt, vfid); - mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + return pf_get_preempt_timeout(gt, vfid); +} - return preempt_timeout; +/** + * xe_gt_sriov_pf_config_bulk_set_preempt_timeout_locked() - Configure PT for PF and VFs. + * @gt: the &xe_gt to configure + * @preempt_timeout: requested preemption timeout in microseconds (0 is infinity) + * + * This function can only be called on PF with the master mutex hold. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_bulk_set_preempt_timeout_locked(struct xe_gt *gt, u32 preempt_timeout) +{ + unsigned int totalvfs = xe_gt_sriov_pf_get_totalvfs(gt); + unsigned int n; + int err = 0; + + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + for (n = 0; n <= totalvfs; n++) { + err = pf_provision_preempt_timeout(gt, VFID(n), preempt_timeout); + if (err) + break; + } + + return pf_config_bulk_set_u32_done(gt, 0, 1 + totalvfs, preempt_timeout, + pf_get_preempt_timeout, "preemption timeout", + preempt_timeout_unit, n, err); } static const char *sched_priority_unit(u32 priority) @@ -2671,3 +2932,7 @@ int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_prin return 0; } + +#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_gt_sriov_pf_config_kunit.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h index 513e6512a575..4975730423d7 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h @@ -36,14 +36,25 @@ int xe_gt_sriov_pf_config_set_lmem(struct xe_gt *gt, unsigned int vfid, u64 size int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs); int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs, u64 size); +struct xe_bo *xe_gt_sriov_pf_config_get_lmem_obj(struct xe_gt *gt, unsigned int vfid); u32 xe_gt_sriov_pf_config_get_exec_quantum(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_config_set_exec_quantum(struct xe_gt *gt, unsigned int vfid, u32 exec_quantum); +u32 xe_gt_sriov_pf_config_get_exec_quantum_locked(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_set_exec_quantum_locked(struct xe_gt *gt, unsigned int vfid, + u32 exec_quantum); +int xe_gt_sriov_pf_config_bulk_set_exec_quantum_locked(struct xe_gt *gt, u32 exec_quantum); + u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u32 preempt_timeout); +u32 xe_gt_sriov_pf_config_get_preempt_timeout_locked(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_set_preempt_timeout_locked(struct xe_gt *gt, unsigned int vfid, + u32 preempt_timeout); +int xe_gt_sriov_pf_config_bulk_set_preempt_timeout_locked(struct xe_gt *gt, u32 preempt_timeout); + u32 xe_gt_sriov_pf_config_get_sched_priority(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_config_set_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority); @@ -61,6 +72,11 @@ ssize_t xe_gt_sriov_pf_config_save(struct xe_gt *gt, unsigned int vfid, void *bu int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid, const void *buf, size_t size); +ssize_t xe_gt_sriov_pf_config_ggtt_save(struct xe_gt *gt, unsigned int vfid, + void *buf, size_t size); +int xe_gt_sriov_pf_config_ggtt_restore(struct xe_gt *gt, unsigned int vfid, + const void *buf, size_t size); + bool xe_gt_sriov_pf_config_is_empty(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_config_init(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c index 9de05db1f090..bf48b05797de 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -18,7 +18,10 @@ #include "xe_gt_sriov_printk.h" #include "xe_guc_ct.h" #include "xe_sriov.h" +#include "xe_sriov_packet.h" +#include "xe_sriov_packet_types.h" #include "xe_sriov_pf_control.h" +#include "xe_sriov_pf_migration.h" #include "xe_sriov_pf_service.h" #include "xe_tile.h" @@ -181,9 +184,20 @@ static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit) CASE2STR(PAUSE_SEND_PAUSE); CASE2STR(PAUSE_WAIT_GUC); CASE2STR(PAUSE_GUC_DONE); - CASE2STR(PAUSE_SAVE_GUC); CASE2STR(PAUSE_FAILED); CASE2STR(PAUSED); + CASE2STR(SAVE_WIP); + CASE2STR(SAVE_PROCESS_DATA); + CASE2STR(SAVE_WAIT_DATA); + CASE2STR(SAVE_DATA_DONE); + CASE2STR(SAVE_FAILED); + CASE2STR(SAVED); + CASE2STR(RESTORE_WIP); + CASE2STR(RESTORE_PROCESS_DATA); + CASE2STR(RESTORE_WAIT_DATA); + CASE2STR(RESTORE_DATA_DONE); + CASE2STR(RESTORE_FAILED); + CASE2STR(RESTORED); CASE2STR(RESUME_WIP); CASE2STR(RESUME_SEND_RESUME); CASE2STR(RESUME_FAILED); @@ -208,6 +222,8 @@ static unsigned long pf_get_default_timeout(enum xe_gt_sriov_control_bits bit) case XE_GT_SRIOV_STATE_FLR_WIP: case XE_GT_SRIOV_STATE_FLR_RESET_CONFIG: return 5 * HZ; + case XE_GT_SRIOV_STATE_RESTORE_WIP: + return 20 * HZ; default: return HZ; } @@ -225,7 +241,7 @@ static unsigned long *pf_peek_vf_state(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); - return &cs->state; + return cs->state; } static bool pf_check_vf_state(struct xe_gt *gt, unsigned int vfid, @@ -329,6 +345,8 @@ static void pf_exit_vf_mismatch(struct xe_gt *gt, unsigned int vfid) pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED); pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED); pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED); } #define pf_enter_vf_state_machine_bug(gt, vfid) ({ \ @@ -359,6 +377,8 @@ static void pf_queue_vf(struct xe_gt *gt, unsigned int vfid) static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid); static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid); +static void pf_exit_vf_save_wip(struct xe_gt *gt, unsigned int vfid); +static void pf_exit_vf_restore_wip(struct xe_gt *gt, unsigned int vfid); static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid); static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid); @@ -380,6 +400,8 @@ static void pf_exit_vf_wip(struct xe_gt *gt, unsigned int vfid) pf_exit_vf_flr_wip(gt, vfid); pf_exit_vf_stop_wip(gt, vfid); + pf_exit_vf_save_wip(gt, vfid); + pf_exit_vf_restore_wip(gt, vfid); pf_exit_vf_pause_wip(gt, vfid); pf_exit_vf_resume_wip(gt, vfid); @@ -399,6 +421,8 @@ static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid) pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED); pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED); pf_exit_vf_mismatch(gt, vfid); pf_exit_vf_wip(gt, vfid); } @@ -429,8 +453,7 @@ static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid) * : PAUSE_GUC_DONE o-----restart * : | : * : | o---<--busy : - * : v / / : - * : PAUSE_SAVE_GUC : + * : / : * : / : * : / : * :....o..............o...............o...........: @@ -450,7 +473,6 @@ static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid) pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE); pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC); pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE); - pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC); } } @@ -481,41 +503,12 @@ static void pf_enter_vf_pause_rejected(struct xe_gt *gt, unsigned int vfid) pf_enter_vf_pause_failed(gt, vfid); } -static void pf_enter_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid) -{ - if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC)) - pf_enter_vf_state_machine_bug(gt, vfid); -} - -static bool pf_exit_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid) -{ - int err; - - if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC)) - return false; - - err = xe_gt_sriov_pf_migration_save_guc_state(gt, vfid); - if (err) { - /* retry if busy */ - if (err == -EBUSY) { - pf_enter_vf_pause_save_guc(gt, vfid); - return true; - } - /* give up on error */ - if (err == -EIO) - pf_enter_vf_mismatch(gt, vfid); - } - - pf_enter_vf_pause_completed(gt, vfid); - return true; -} - static bool pf_exit_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid) { if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE)) return false; - pf_enter_vf_pause_save_guc(gt, vfid); + pf_enter_vf_pause_completed(gt, vfid); return true; } @@ -675,6 +668,8 @@ static void pf_enter_vf_resumed(struct xe_gt *gt, unsigned int vfid) { pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED); pf_exit_vf_mismatch(gt, vfid); pf_exit_vf_wip(gt, vfid); } @@ -753,6 +748,16 @@ int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid) return -EPERM; } + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) { + xe_gt_sriov_dbg(gt, "VF%u save is in progress!\n", vfid); + return -EBUSY; + } + + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) { + xe_gt_sriov_dbg(gt, "VF%u restore is in progress!\n", vfid); + return -EBUSY; + } + if (!pf_enter_vf_resume_wip(gt, vfid)) { xe_gt_sriov_dbg(gt, "VF%u resume already in progress!\n", vfid); return -EALREADY; @@ -777,6 +782,562 @@ int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid) } /** + * DOC: The VF SAVE state machine + * + * SAVE extends the PAUSED state. + * + * The VF SAVE state machine looks like:: + * + * ....PAUSED.................................................... + * : : + * : (O)<---------o : + * : | \ : + * : save (SAVED) (SAVE_FAILED) : + * : | ^ ^ : + * : | | | : + * : ....V...............o...........o......SAVE_WIP......... : + * : : | | | : : + * : : | empty | : : + * : : | | | : : + * : : | | | : : + * : : | DATA_DONE | : : + * : : | ^ | : : + * : : | | error : : + * : : | no_data / : : + * : : | / / : : + * : : | / / : : + * : : | / / : : + * : : o---------->PROCESS_DATA<----consume : : + * : : \ \ : : + * : : \ \ : : + * : : \ \ : : + * : : ring_full----->WAIT_DATA : : + * : : : : + * : :......................................................: : + * :............................................................: + * + * For the full state machine view, see `The VF state machine`_. + */ + +static void pf_exit_vf_save_wip(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) { + xe_gt_sriov_pf_migration_ring_free(gt, vfid); + + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA); + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA); + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE); + } +} + +static void pf_enter_vf_saved(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED)) + pf_enter_vf_state_machine_bug(gt, vfid); + + xe_gt_sriov_dbg(gt, "VF%u saved!\n", vfid); + + pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); + pf_exit_vf_mismatch(gt, vfid); + pf_exit_vf_wip(gt, vfid); +} + +static void pf_enter_vf_save_failed(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED)) + pf_enter_vf_state_machine_bug(gt, vfid); + + wake_up_all(xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid)); + + pf_exit_vf_wip(gt, vfid); +} + +static int pf_handle_vf_save_data(struct xe_gt *gt, unsigned int vfid) +{ + int ret; + + if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid, + XE_SRIOV_PACKET_TYPE_GUC)) { + ret = xe_gt_sriov_pf_migration_guc_save(gt, vfid); + if (ret) + return ret; + + xe_gt_sriov_pf_migration_save_data_complete(gt, vfid, + XE_SRIOV_PACKET_TYPE_GUC); + + return -EAGAIN; + } + + if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid, + XE_SRIOV_PACKET_TYPE_GGTT)) { + ret = xe_gt_sriov_pf_migration_ggtt_save(gt, vfid); + if (ret) + return ret; + + xe_gt_sriov_pf_migration_save_data_complete(gt, vfid, + XE_SRIOV_PACKET_TYPE_GGTT); + + return -EAGAIN; + } + + if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid, + XE_SRIOV_PACKET_TYPE_MMIO)) { + ret = xe_gt_sriov_pf_migration_mmio_save(gt, vfid); + if (ret) + return ret; + + xe_gt_sriov_pf_migration_save_data_complete(gt, vfid, + XE_SRIOV_PACKET_TYPE_MMIO); + + return -EAGAIN; + } + + if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid, + XE_SRIOV_PACKET_TYPE_VRAM)) { + ret = xe_gt_sriov_pf_migration_vram_save(gt, vfid); + if (ret == -EAGAIN) + return -EAGAIN; + else if (ret) + return ret; + + xe_gt_sriov_pf_migration_save_data_complete(gt, vfid, + XE_SRIOV_PACKET_TYPE_VRAM); + + return -EAGAIN; + } + + return 0; +} + +static bool pf_handle_vf_save(struct xe_gt *gt, unsigned int vfid) +{ + int ret; + + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA)) + return false; + + if (xe_gt_sriov_pf_migration_ring_full(gt, vfid)) { + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA); + return true; + } + + ret = pf_handle_vf_save_data(gt, vfid); + if (ret == -EAGAIN) + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA); + else if (ret) + pf_enter_vf_save_failed(gt, vfid); + else + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE); + + return true; +} + +static void pf_exit_vf_save_wait_data(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA)) + return; + + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA); + pf_queue_vf(gt, vfid); +} + +static bool pf_enter_vf_save_wip(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) { + xe_gt_sriov_pf_migration_save_init(gt, vfid); + pf_enter_vf_wip(gt, vfid); + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA); + pf_queue_vf(gt, vfid); + return true; + } + + return false; +} + +/** + * xe_gt_sriov_pf_control_check_save_data_done() - Check if all save migration data was produced. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: true if all migration data was produced, false otherwise. + */ +bool xe_gt_sriov_pf_control_check_save_data_done(struct xe_gt *gt, unsigned int vfid) +{ + return pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE); +} + +/** + * xe_gt_sriov_pf_control_check_save_failed() - Check if save processing has failed. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: true if save processing failed, false otherwise. + */ +bool xe_gt_sriov_pf_control_check_save_failed(struct xe_gt *gt, unsigned int vfid) +{ + return pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED); +} + +/** + * xe_gt_sriov_pf_control_process_save_data() - Queue VF save migration data processing. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_process_save_data(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED)) + return -EIO; + + pf_exit_vf_save_wait_data(gt, vfid); + + return 0; +} + +/** + * xe_gt_sriov_pf_control_trigger_save_vf() - Start an SR-IOV VF migration data save sequence. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_trigger_save_vf(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { + xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid); + return -EPERM; + } + + if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { + xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid); + return -EPERM; + } + + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) { + xe_gt_sriov_dbg(gt, "VF%u restore is in progress!\n", vfid); + return -EBUSY; + } + + if (!pf_enter_vf_save_wip(gt, vfid)) { + xe_gt_sriov_dbg(gt, "VF%u save already in progress!\n", vfid); + return -EALREADY; + } + + return 0; +} + +/** + * xe_gt_sriov_pf_control_finish_save_vf() - Complete a VF migration data save sequence. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_finish_save_vf(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE)) { + xe_gt_sriov_err(gt, "VF%u save is still in progress!\n", vfid); + return -EIO; + } + + pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE); + pf_enter_vf_saved(gt, vfid); + + return 0; +} + +/** + * DOC: The VF RESTORE state machine + * + * RESTORE extends the PAUSED state. + * + * The VF RESTORE state machine looks like:: + * + * ....PAUSED.................................................... + * : : + * : (O)<---------o : + * : | \ : + * : restore (RESTORED) (RESTORE_FAILED) : + * : | ^ ^ : + * : | | | : + * : ....V...............o...........o......RESTORE_WIP...... : + * : : | | | : : + * : : | empty | : : + * : : | | | : : + * : : | | | : : + * : : | DATA_DONE | : : + * : : | ^ | : : + * : : | | error : : + * : : | trailer / : : + * : : | / / : : + * : : | / / : : + * : : | / / : : + * : : o---------->PROCESS_DATA<----produce : : + * : : \ \ : : + * : : \ \ : : + * : : \ \ : : + * : : ring_empty---->WAIT_DATA : : + * : : : : + * : :......................................................: : + * :............................................................: + * + * For the full state machine view, see `The VF state machine`_. + */ + +static void pf_exit_vf_restore_wip(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) { + xe_gt_sriov_pf_migration_ring_free(gt, vfid); + + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA); + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA); + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_DATA_DONE); + } +} + +static void pf_enter_vf_restored(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED)) + pf_enter_vf_state_machine_bug(gt, vfid); + + xe_gt_sriov_dbg(gt, "VF%u restored!\n", vfid); + + pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); + pf_exit_vf_mismatch(gt, vfid); + pf_exit_vf_wip(gt, vfid); +} + +static void pf_enter_vf_restore_failed(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED)) + pf_enter_vf_state_machine_bug(gt, vfid); + + wake_up_all(xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid)); + + pf_exit_vf_wip(gt, vfid); +} + +static int pf_handle_vf_restore_data(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_sriov_packet *data = xe_gt_sriov_pf_migration_restore_consume(gt, vfid); + int ret = 0; + + switch (data->hdr.type) { + case XE_SRIOV_PACKET_TYPE_GGTT: + ret = xe_gt_sriov_pf_migration_ggtt_restore(gt, vfid, data); + break; + case XE_SRIOV_PACKET_TYPE_MMIO: + ret = xe_gt_sriov_pf_migration_mmio_restore(gt, vfid, data); + break; + case XE_SRIOV_PACKET_TYPE_GUC: + ret = xe_gt_sriov_pf_migration_guc_restore(gt, vfid, data); + break; + case XE_SRIOV_PACKET_TYPE_VRAM: + ret = xe_gt_sriov_pf_migration_vram_restore(gt, vfid, data); + break; + default: + xe_gt_sriov_notice(gt, "Skipping VF%u unknown data type: %d\n", + vfid, data->hdr.type); + break; + } + + xe_sriov_packet_free(data); + + return ret; +} + +static bool pf_handle_vf_restore(struct xe_gt *gt, unsigned int vfid) +{ + int ret; + + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA)) + return false; + + if (xe_gt_sriov_pf_migration_ring_empty(gt, vfid)) { + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_DATA_DONE)) + pf_enter_vf_restored(gt, vfid); + else + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA); + + return true; + } + + ret = pf_handle_vf_restore_data(gt, vfid); + if (ret) + pf_enter_vf_restore_failed(gt, vfid); + else + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA); + + return true; +} + +static void pf_exit_vf_restore_wait_data(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA)) + return; + + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA); + pf_queue_vf(gt, vfid); +} + +static bool pf_enter_vf_restore_wip(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) { + pf_enter_vf_wip(gt, vfid); + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA); + pf_queue_vf(gt, vfid); + return true; + } + + return false; +} + +/** + * xe_gt_sriov_pf_control_check_restore_failed() - Check if restore processing has failed. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: true if restore processing failed, false otherwise. + */ +bool xe_gt_sriov_pf_control_check_restore_failed(struct xe_gt *gt, unsigned int vfid) +{ + return pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED); +} + +/** + * xe_gt_sriov_pf_control_restore_data_done() - Indicate the end of VF migration data stream. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_restore_data_done(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_DATA_DONE)) { + pf_enter_vf_state_machine_bug(gt, vfid); + return -EIO; + } + + return xe_gt_sriov_pf_control_process_restore_data(gt, vfid); +} + +/** + * xe_gt_sriov_pf_control_process_restore_data() - Queue VF restore migration data processing. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_process_restore_data(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED)) { + xe_gt_sriov_pf_migration_ring_free(gt, vfid); + return -EIO; + } + + pf_exit_vf_restore_wait_data(gt, vfid); + + return 0; +} + +/** + * xe_gt_sriov_pf_control_trigger restore_vf() - Start an SR-IOV VF migration data restore sequence. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_trigger_restore_vf(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { + xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid); + return -EPERM; + } + + if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { + xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid); + return -EPERM; + } + + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) { + xe_gt_sriov_dbg(gt, "VF%u save is in progress!\n", vfid); + return -EBUSY; + } + + if (!pf_enter_vf_restore_wip(gt, vfid)) { + xe_gt_sriov_dbg(gt, "VF%u restore already in progress!\n", vfid); + return -EALREADY; + } + + return 0; +} + +static int pf_wait_vf_restore_done(struct xe_gt *gt, unsigned int vfid) +{ + unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_RESTORE_WIP); + int err; + + err = pf_wait_vf_wip_done(gt, vfid, timeout); + if (err) { + xe_gt_sriov_notice(gt, "VF%u RESTORE didn't finish in %u ms (%pe)\n", + vfid, jiffies_to_msecs(timeout), ERR_PTR(err)); + return err; + } + + if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED)) + return -EIO; + + return 0; +} + +/** + * xe_gt_sriov_pf_control_finish_restore_vf() - Complete a VF migration data restore sequence. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_finish_restore_vf(struct xe_gt *gt, unsigned int vfid) +{ + int ret; + + ret = pf_wait_vf_restore_done(gt, vfid); + if (ret) + return ret; + + if (!pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED)) { + pf_enter_vf_mismatch(gt, vfid); + return -EIO; + } + + pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); + + return 0; +} + +/** * DOC: The VF STOP state machine * * The VF STOP state machine looks like:: @@ -817,6 +1378,8 @@ static void pf_enter_vf_stopped(struct xe_gt *gt, unsigned int vfid) pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED); + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED); pf_exit_vf_mismatch(gt, vfid); pf_exit_vf_wip(gt, vfid); } @@ -1460,7 +2023,22 @@ static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid) if (pf_exit_vf_pause_guc_done(gt, vfid)) return true; - if (pf_exit_vf_pause_save_guc(gt, vfid)) + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA)) { + xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid, + control_bit_to_string(XE_GT_SRIOV_STATE_SAVE_WAIT_DATA)); + return false; + } + + if (pf_handle_vf_save(gt, vfid)) + return true; + + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA)) { + xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid, + control_bit_to_string(XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA)); + return false; + } + + if (pf_handle_vf_restore(gt, vfid)) return true; if (pf_exit_vf_resume_send_resume(gt, vfid)) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h index 8a72ef3778d4..c36c8767f3ad 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h @@ -16,6 +16,16 @@ void xe_gt_sriov_pf_control_restart(struct xe_gt *gt); int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid); +bool xe_gt_sriov_pf_control_check_save_data_done(struct xe_gt *gt, unsigned int vfid); +bool xe_gt_sriov_pf_control_check_save_failed(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_control_process_save_data(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_control_trigger_save_vf(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_control_finish_save_vf(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_control_restore_data_done(struct xe_gt *gt, unsigned int vfid); +bool xe_gt_sriov_pf_control_check_restore_failed(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_control_process_restore_data(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_control_trigger_restore_vf(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_control_finish_restore_vf(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_control_sync_flr(struct xe_gt *gt, unsigned int vfid, bool sync); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h index c80b7e77f1ad..6027ba05a7f2 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h @@ -28,9 +28,20 @@ * @XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE: indicates that the PF is about to send a PAUSE command. * @XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC: indicates that the PF awaits for a response from the GuC. * @XE_GT_SRIOV_STATE_PAUSE_GUC_DONE: indicates that the PF has received a response from the GuC. - * @XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC: indicates that the PF needs to save the VF GuC state. * @XE_GT_SRIOV_STATE_PAUSE_FAILED: indicates that a VF pause operation has failed. * @XE_GT_SRIOV_STATE_PAUSED: indicates that the VF is paused. + * @XE_GT_SRIOV_STATE_SAVE_WIP: indicates that VF save operation is in progress. + * @XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA: indicates that VF migration data is being produced. + * @XE_GT_SRIOV_STATE_SAVE_WAIT_DATA: indicates that PF awaits for space in migration data ring. + * @XE_GT_SRIOV_STATE_SAVE_DATA_DONE: indicates that all migration data was produced by Xe. + * @XE_GT_SRIOV_STATE_SAVE_FAILED: indicates that VF save operation has failed. + * @XE_GT_SRIOV_STATE_SAVED: indicates that VF data is saved. + * @XE_GT_SRIOV_STATE_RESTORE_WIP: indicates that VF restore operation is in progress. + * @XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA: indicates that VF migration data is being consumed. + * @XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA: indicates that PF awaits for data in migration data ring. + * @XE_GT_SRIOV_STATE_RESTORE_DATA_DONE: indicates that all migration data was produced by the user. + * @XE_GT_SRIOV_STATE_RESTORE_FAILED: indicates that VF restore operation has failed. + * @XE_GT_SRIOV_STATE_RESTORED: indicates that VF data is restored. * @XE_GT_SRIOV_STATE_RESUME_WIP: indicates the a VF resume operation is in progress. * @XE_GT_SRIOV_STATE_RESUME_SEND_RESUME: indicates that the PF is about to send RESUME command. * @XE_GT_SRIOV_STATE_RESUME_FAILED: indicates that a VF resume operation has failed. @@ -59,10 +70,23 @@ enum xe_gt_sriov_control_bits { XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE, - XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC, XE_GT_SRIOV_STATE_PAUSE_FAILED, XE_GT_SRIOV_STATE_PAUSED, + XE_GT_SRIOV_STATE_SAVE_WIP, + XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA, + XE_GT_SRIOV_STATE_SAVE_WAIT_DATA, + XE_GT_SRIOV_STATE_SAVE_DATA_DONE, + XE_GT_SRIOV_STATE_SAVE_FAILED, + XE_GT_SRIOV_STATE_SAVED, + + XE_GT_SRIOV_STATE_RESTORE_WIP, + XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA, + XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA, + XE_GT_SRIOV_STATE_RESTORE_DATA_DONE, + XE_GT_SRIOV_STATE_RESTORE_FAILED, + XE_GT_SRIOV_STATE_RESTORED, + XE_GT_SRIOV_STATE_RESUME_WIP, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME, XE_GT_SRIOV_STATE_RESUME_FAILED, @@ -73,9 +97,11 @@ enum xe_gt_sriov_control_bits { XE_GT_SRIOV_STATE_STOP_FAILED, XE_GT_SRIOV_STATE_STOPPED, - XE_GT_SRIOV_STATE_MISMATCH = BITS_PER_LONG - 1, + XE_GT_SRIOV_STATE_MISMATCH, /* always keep as last */ }; +#define XE_GT_SRIOV_NUM_STATES (XE_GT_SRIOV_STATE_MISMATCH + 1) + /** * struct xe_gt_sriov_control_state - GT-level per-VF control state. * @@ -83,7 +109,7 @@ enum xe_gt_sriov_control_bits { */ struct xe_gt_sriov_control_state { /** @state: VF state bits */ - unsigned long state; + DECLARE_BITMAP(state, XE_GT_SRIOV_NUM_STATES); /** @done: completion of async operations */ struct completion done; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index 838beb7f6327..5278ea4fd655 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -327,9 +327,6 @@ static const struct { { "stop", xe_gt_sriov_pf_control_stop_vf }, { "pause", xe_gt_sriov_pf_control_pause_vf }, { "resume", xe_gt_sriov_pf_control_resume_vf }, -#ifdef CONFIG_DRM_XE_DEBUG_SRIOV - { "restore!", xe_gt_sriov_pf_migration_restore_guc_state }, -#endif }; static ssize_t control_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) @@ -399,47 +396,6 @@ static const struct file_operations control_ops = { * : ├── vf1 * : ├── tile0 * : ├── gt0 - * : ├── guc_state - */ - -static ssize_t guc_state_read(struct file *file, char __user *buf, - size_t count, loff_t *pos) -{ - struct dentry *dent = file_dentry(file); - struct dentry *parent = dent->d_parent; - struct xe_gt *gt = extract_gt(parent); - unsigned int vfid = extract_vfid(parent); - - return xe_gt_sriov_pf_migration_read_guc_state(gt, vfid, buf, count, pos); -} - -static ssize_t guc_state_write(struct file *file, const char __user *buf, - size_t count, loff_t *pos) -{ - struct dentry *dent = file_dentry(file); - struct dentry *parent = dent->d_parent; - struct xe_gt *gt = extract_gt(parent); - unsigned int vfid = extract_vfid(parent); - - if (*pos) - return -EINVAL; - - return xe_gt_sriov_pf_migration_write_guc_state(gt, vfid, buf, count); -} - -static const struct file_operations guc_state_ops = { - .owner = THIS_MODULE, - .read = guc_state_read, - .write = guc_state_write, - .llseek = default_llseek, -}; - -/* - * /sys/kernel/debug/dri/BDF/ - * ├── sriov - * : ├── vf1 - * : ├── tile0 - * : ├── gt0 * : ├── config_blob */ @@ -568,9 +524,6 @@ static void pf_populate_gt(struct xe_gt *gt, struct dentry *dent, unsigned int v /* for testing/debugging purposes only! */ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { - debugfs_create_file("guc_state", - IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? 0600 : 0400, - dent, NULL, &guc_state_ops); debugfs_create_file("config_blob", IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? 0600 : 0400, dent, NULL, &config_blob_ops); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c index 44cc612b0a75..35a12d48dcc1 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c @@ -5,14 +5,149 @@ #include <drm/drm_managed.h> +#include "regs/xe_guc_regs.h" + #include "abi/guc_actions_sriov_abi.h" #include "xe_bo.h" +#include "xe_ggtt.h" +#include "xe_gt.h" +#include "xe_gt_sriov_pf.h" +#include "xe_gt_sriov_pf_config.h" +#include "xe_gt_sriov_pf_control.h" #include "xe_gt_sriov_pf_helpers.h" #include "xe_gt_sriov_pf_migration.h" #include "xe_gt_sriov_printk.h" -#include "xe_guc.h" +#include "xe_guc_buf.h" #include "xe_guc_ct.h" +#include "xe_migrate.h" +#include "xe_mmio.h" #include "xe_sriov.h" +#include "xe_sriov_packet.h" +#include "xe_sriov_packet_types.h" +#include "xe_sriov_pf_migration.h" + +#define XE_GT_SRIOV_PF_MIGRATION_RING_SIZE 5 + +static struct xe_gt_sriov_migration_data *pf_pick_gt_migration(struct xe_gt *gt, unsigned int vfid) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid != PFID); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); + + return >->sriov.pf.vfs[vfid].migration; +} + +static void pf_dump_mig_data(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data, + const char *what) +{ + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { + struct drm_printer p = xe_gt_dbg_printer(gt); + + drm_printf(&p, "VF%u %s (%llu bytes)\n", vfid, what, data->hdr.size); + drm_print_hex_dump(&p, "mig_hdr: ", (void *)&data->hdr, sizeof(data->hdr)); + drm_print_hex_dump(&p, "mig_data: ", data->vaddr, min(SZ_64, data->hdr.size)); + } +} + +static ssize_t pf_migration_ggtt_size(struct xe_gt *gt, unsigned int vfid) +{ + if (!xe_gt_is_main_type(gt)) + return 0; + + return xe_gt_sriov_pf_config_ggtt_save(gt, vfid, NULL, 0); +} + +static int pf_save_vf_ggtt_mig_data(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_sriov_packet *data; + size_t size; + int ret; + + size = pf_migration_ggtt_size(gt, vfid); + xe_gt_assert(gt, size); + + data = xe_sriov_packet_alloc(gt_to_xe(gt)); + if (!data) + return -ENOMEM; + + ret = xe_sriov_packet_init(data, gt->tile->id, gt->info.id, + XE_SRIOV_PACKET_TYPE_GGTT, 0, size); + if (ret) + goto fail; + + ret = xe_gt_sriov_pf_config_ggtt_save(gt, vfid, data->vaddr, size); + if (ret) + goto fail; + + pf_dump_mig_data(gt, vfid, data, "GGTT data save"); + + ret = xe_gt_sriov_pf_migration_save_produce(gt, vfid, data); + if (ret) + goto fail; + + return 0; + +fail: + xe_sriov_packet_free(data); + xe_gt_sriov_err(gt, "Failed to save VF%u GGTT data (%pe)\n", vfid, ERR_PTR(ret)); + return ret; +} + +static int pf_restore_vf_ggtt_mig_data(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data) +{ + int ret; + + pf_dump_mig_data(gt, vfid, data, "GGTT data restore"); + + ret = xe_gt_sriov_pf_config_ggtt_restore(gt, vfid, data->vaddr, data->hdr.size); + if (ret) { + xe_gt_sriov_err(gt, "Failed to restore VF%u GGTT data (%pe)\n", + vfid, ERR_PTR(ret)); + return ret; + } + + return 0; +} + +/** + * xe_gt_sriov_pf_migration_ggtt_save() - Save VF GGTT migration data. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be 0) + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_migration_ggtt_save(struct xe_gt *gt, unsigned int vfid) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid != PFID); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); + + return pf_save_vf_ggtt_mig_data(gt, vfid); +} + +/** + * xe_gt_sriov_pf_migration_ggtt_restore() - Restore VF GGTT migration data. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be 0) + * @data: the &xe_sriov_packet containing migration data + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_migration_ggtt_restore(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid != PFID); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); + + return pf_restore_vf_ggtt_mig_data(gt, vfid, data); +} /* Return: number of dwords saved/restored/required or a negative error code on failure */ static int guc_action_vf_save_restore(struct xe_guc *guc, u32 vfid, u32 opcode, @@ -33,7 +168,7 @@ static int guc_action_vf_save_restore(struct xe_guc *guc, u32 vfid, u32 opcode, } /* Return: size of the state in dwords or a negative error code on failure */ -static int pf_send_guc_query_vf_state_size(struct xe_gt *gt, unsigned int vfid) +static int pf_send_guc_query_vf_mig_data_size(struct xe_gt *gt, unsigned int vfid) { int ret; @@ -42,353 +177,839 @@ static int pf_send_guc_query_vf_state_size(struct xe_gt *gt, unsigned int vfid) } /* Return: number of state dwords saved or a negative error code on failure */ -static int pf_send_guc_save_vf_state(struct xe_gt *gt, unsigned int vfid, - void *buff, size_t size) +static int pf_send_guc_save_vf_mig_data(struct xe_gt *gt, unsigned int vfid, + void *dst, size_t size) { const int ndwords = size / sizeof(u32); - struct xe_tile *tile = gt_to_tile(gt); - struct xe_device *xe = tile_to_xe(tile); struct xe_guc *guc = >->uc.guc; - struct xe_bo *bo; + CLASS(xe_guc_buf, buf)(&guc->buf, ndwords); int ret; xe_gt_assert(gt, size % sizeof(u32) == 0); xe_gt_assert(gt, size == ndwords * sizeof(u32)); - bo = xe_bo_create_pin_map_novm(xe, tile, - ALIGN(size, PAGE_SIZE), - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE, false); - if (IS_ERR(bo)) - return PTR_ERR(bo); + if (!xe_guc_buf_is_valid(buf)) + return -ENOBUFS; + + /* FW expects this buffer to be zero-initialized */ + memset(xe_guc_buf_cpu_ptr(buf), 0, size); ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_SAVE, - xe_bo_ggtt_addr(bo), ndwords); + xe_guc_buf_flush(buf), ndwords); if (!ret) ret = -ENODATA; else if (ret > ndwords) ret = -EPROTO; else if (ret > 0) - xe_map_memcpy_from(xe, buff, &bo->vmap, 0, ret * sizeof(u32)); + memcpy(dst, xe_guc_buf_sync_read(buf), ret * sizeof(u32)); - xe_bo_unpin_map_no_vm(bo); return ret; } /* Return: number of state dwords restored or a negative error code on failure */ -static int pf_send_guc_restore_vf_state(struct xe_gt *gt, unsigned int vfid, - const void *buff, size_t size) +static int pf_send_guc_restore_vf_mig_data(struct xe_gt *gt, unsigned int vfid, + const void *src, size_t size) { const int ndwords = size / sizeof(u32); - struct xe_tile *tile = gt_to_tile(gt); - struct xe_device *xe = tile_to_xe(tile); struct xe_guc *guc = >->uc.guc; - struct xe_bo *bo; + CLASS(xe_guc_buf_from_data, buf)(&guc->buf, src, size); int ret; xe_gt_assert(gt, size % sizeof(u32) == 0); xe_gt_assert(gt, size == ndwords * sizeof(u32)); - bo = xe_bo_create_pin_map_novm(xe, tile, - ALIGN(size, PAGE_SIZE), - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE, false); - if (IS_ERR(bo)) - return PTR_ERR(bo); - - xe_map_memcpy_to(xe, &bo->vmap, 0, buff, size); + if (!xe_guc_buf_is_valid(buf)) + return -ENOBUFS; ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_RESTORE, - xe_bo_ggtt_addr(bo), ndwords); + xe_guc_buf_flush(buf), ndwords); if (!ret) ret = -ENODATA; else if (ret > ndwords) ret = -EPROTO; - xe_bo_unpin_map_no_vm(bo); return ret; } static bool pf_migration_supported(struct xe_gt *gt) { - xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); - return gt->sriov.pf.migration.supported; + return xe_sriov_pf_migration_supported(gt_to_xe(gt)); } -static struct mutex *pf_migration_mutex(struct xe_gt *gt) +static int pf_save_vf_guc_mig_data(struct xe_gt *gt, unsigned int vfid) { - xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); - return >->sriov.pf.migration.snapshot_lock; + struct xe_sriov_packet *data; + size_t size; + int ret; + + ret = pf_send_guc_query_vf_mig_data_size(gt, vfid); + if (ret < 0) + goto fail; + + size = ret * sizeof(u32); + + data = xe_sriov_packet_alloc(gt_to_xe(gt)); + if (!data) { + ret = -ENOMEM; + goto fail; + } + + ret = xe_sriov_packet_init(data, gt->tile->id, gt->info.id, + XE_SRIOV_PACKET_TYPE_GUC, 0, size); + if (ret) + goto fail_free; + + ret = pf_send_guc_save_vf_mig_data(gt, vfid, data->vaddr, size); + if (ret < 0) + goto fail_free; + size = ret * sizeof(u32); + xe_gt_assert(gt, size); + xe_gt_assert(gt, size <= data->hdr.size); + data->hdr.size = size; + data->remaining = size; + + pf_dump_mig_data(gt, vfid, data, "GuC data save"); + + ret = xe_gt_sriov_pf_migration_save_produce(gt, vfid, data); + if (ret) + goto fail_free; + + return 0; + +fail_free: + xe_sriov_packet_free(data); +fail: + xe_gt_sriov_err(gt, "Failed to save VF%u GuC data (%pe)\n", + vfid, ERR_PTR(ret)); + return ret; } -static struct xe_gt_sriov_state_snapshot *pf_pick_vf_snapshot(struct xe_gt *gt, - unsigned int vfid) +static ssize_t pf_migration_guc_size(struct xe_gt *gt, unsigned int vfid) +{ + ssize_t size; + + if (!pf_migration_supported(gt)) + return -ENOPKG; + + size = pf_send_guc_query_vf_mig_data_size(gt, vfid); + if (size >= 0) + size *= sizeof(u32); + + return size; +} + +/** + * xe_gt_sriov_pf_migration_guc_save() - Save VF GuC migration data. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_migration_guc_save(struct xe_gt *gt, unsigned int vfid) { xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid != PFID); xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); - lockdep_assert_held(pf_migration_mutex(gt)); - return >->sriov.pf.vfs[vfid].snapshot; + if (!pf_migration_supported(gt)) + return -ENOPKG; + + return pf_save_vf_guc_mig_data(gt, vfid); } -static unsigned int pf_snapshot_index(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot) +static int pf_restore_vf_guc_state(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data) { - return container_of(snapshot, struct xe_gt_sriov_metadata, snapshot) - gt->sriov.pf.vfs; + int ret; + + xe_gt_assert(gt, data->hdr.size); + + pf_dump_mig_data(gt, vfid, data, "GuC data restore"); + + ret = pf_send_guc_restore_vf_mig_data(gt, vfid, data->vaddr, data->hdr.size); + if (ret < 0) + goto fail; + + return 0; + +fail: + xe_gt_sriov_err(gt, "Failed to restore VF%u GuC data (%pe)\n", + vfid, ERR_PTR(ret)); + return ret; } -static void pf_free_guc_state(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot) +/** + * xe_gt_sriov_pf_migration_guc_restore() - Restore VF GuC migration data. + * @gt: the &xe_gt + * @vfid: the VF identifier + * @data: the &xe_sriov_packet containing migration data + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_migration_guc_restore(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data) { - struct xe_device *xe = gt_to_xe(gt); + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid != PFID); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); + + if (!pf_migration_supported(gt)) + return -ENOPKG; - drmm_kfree(&xe->drm, snapshot->guc.buff); - snapshot->guc.buff = NULL; - snapshot->guc.size = 0; + return pf_restore_vf_guc_state(gt, vfid, data); } -static int pf_alloc_guc_state(struct xe_gt *gt, - struct xe_gt_sriov_state_snapshot *snapshot, - size_t size) +static ssize_t pf_migration_mmio_size(struct xe_gt *gt, unsigned int vfid) { - struct xe_device *xe = gt_to_xe(gt); - void *p; - - pf_free_guc_state(gt, snapshot); + if (xe_gt_is_media_type(gt)) + return MED_VF_SW_FLAG_COUNT * sizeof(u32); + else + return VF_SW_FLAG_COUNT * sizeof(u32); +} - if (!size) - return -ENODATA; +static int pf_migration_mmio_save(struct xe_gt *gt, unsigned int vfid, void *buf, size_t size) +{ + struct xe_mmio mmio; + u32 *regs = buf; + int n; - if (size % sizeof(u32)) + if (size != pf_migration_mmio_size(gt, vfid)) return -EINVAL; - if (size > SZ_2M) - return -EFBIG; + xe_mmio_init_vf_view(&mmio, >->mmio, vfid); - p = drmm_kzalloc(&xe->drm, size, GFP_KERNEL); - if (!p) - return -ENOMEM; + if (xe_gt_is_media_type(gt)) + for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++) + regs[n] = xe_mmio_read32(>->mmio, MED_VF_SW_FLAG(n)); + else + for (n = 0; n < VF_SW_FLAG_COUNT; n++) + regs[n] = xe_mmio_read32(>->mmio, VF_SW_FLAG(n)); - snapshot->guc.buff = p; - snapshot->guc.size = size; return 0; } -static void pf_dump_guc_state(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot) +static int pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid, + const void *buf, size_t size) { - if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { - unsigned int vfid __maybe_unused = pf_snapshot_index(gt, snapshot); + const u32 *regs = buf; + struct xe_mmio mmio; + int n; - xe_gt_sriov_dbg_verbose(gt, "VF%u GuC state is %zu dwords:\n", - vfid, snapshot->guc.size / sizeof(u32)); - print_hex_dump_bytes("state: ", DUMP_PREFIX_OFFSET, - snapshot->guc.buff, min(SZ_64, snapshot->guc.size)); - } + if (size != pf_migration_mmio_size(gt, vfid)) + return -EINVAL; + + xe_mmio_init_vf_view(&mmio, >->mmio, vfid); + + if (xe_gt_is_media_type(gt)) + for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++) + xe_mmio_write32(>->mmio, MED_VF_SW_FLAG(n), regs[n]); + else + for (n = 0; n < VF_SW_FLAG_COUNT; n++) + xe_mmio_write32(>->mmio, VF_SW_FLAG(n), regs[n]); + + return 0; } -static int pf_save_vf_guc_state(struct xe_gt *gt, unsigned int vfid) +static int pf_save_vf_mmio_mig_data(struct xe_gt *gt, unsigned int vfid) { - struct xe_gt_sriov_state_snapshot *snapshot = pf_pick_vf_snapshot(gt, vfid); + struct xe_sriov_packet *data; size_t size; int ret; - ret = pf_send_guc_query_vf_state_size(gt, vfid); - if (ret < 0) + size = pf_migration_mmio_size(gt, vfid); + xe_gt_assert(gt, size); + + data = xe_sriov_packet_alloc(gt_to_xe(gt)); + if (!data) + return -ENOMEM; + + ret = xe_sriov_packet_init(data, gt->tile->id, gt->info.id, + XE_SRIOV_PACKET_TYPE_MMIO, 0, size); + if (ret) goto fail; - size = ret * sizeof(u32); - xe_gt_sriov_dbg_verbose(gt, "VF%u state size is %d dwords (%zu bytes)\n", vfid, ret, size); - ret = pf_alloc_guc_state(gt, snapshot, size); - if (ret < 0) + ret = pf_migration_mmio_save(gt, vfid, data->vaddr, size); + if (ret) goto fail; - ret = pf_send_guc_save_vf_state(gt, vfid, snapshot->guc.buff, size); - if (ret < 0) + pf_dump_mig_data(gt, vfid, data, "MMIO data save"); + + ret = xe_gt_sriov_pf_migration_save_produce(gt, vfid, data); + if (ret) goto fail; - size = ret * sizeof(u32); - xe_gt_assert(gt, size); - xe_gt_assert(gt, size <= snapshot->guc.size); - snapshot->guc.size = size; - pf_dump_guc_state(gt, snapshot); return 0; fail: - xe_gt_sriov_dbg(gt, "Unable to save VF%u state (%pe)\n", vfid, ERR_PTR(ret)); - pf_free_guc_state(gt, snapshot); + xe_sriov_packet_free(data); + xe_gt_sriov_err(gt, "Failed to save VF%u MMIO data (%pe)\n", vfid, ERR_PTR(ret)); return ret; } +static int pf_restore_vf_mmio_mig_data(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data) +{ + int ret; + + pf_dump_mig_data(gt, vfid, data, "MMIO data restore"); + + ret = pf_migration_mmio_restore(gt, vfid, data->vaddr, data->hdr.size); + if (ret) { + xe_gt_sriov_err(gt, "Failed to restore VF%u MMIO data (%pe)\n", + vfid, ERR_PTR(ret)); + + return ret; + } + + return 0; +} + /** - * xe_gt_sriov_pf_migration_save_guc_state() - Take a GuC VF state snapshot. + * xe_gt_sriov_pf_migration_mmio_save() - Save VF MMIO migration data. * @gt: the &xe_gt - * @vfid: the VF identifier + * @vfid: the VF identifier (can't be 0) * * This function is for PF only. * * Return: 0 on success or a negative error code on failure. */ -int xe_gt_sriov_pf_migration_save_guc_state(struct xe_gt *gt, unsigned int vfid) +int xe_gt_sriov_pf_migration_mmio_save(struct xe_gt *gt, unsigned int vfid) { - int err; + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid != PFID); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); + return pf_save_vf_mmio_mig_data(gt, vfid); +} + +/** + * xe_gt_sriov_pf_migration_mmio_restore() - Restore VF MMIO migration data. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be 0) + * @data: the &xe_sriov_packet containing migration data + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data) +{ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); xe_gt_assert(gt, vfid != PFID); xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); - if (!pf_migration_supported(gt)) - return -ENOPKG; + return pf_restore_vf_mmio_mig_data(gt, vfid, data); +} - mutex_lock(pf_migration_mutex(gt)); - err = pf_save_vf_guc_state(gt, vfid); - mutex_unlock(pf_migration_mutex(gt)); +static ssize_t pf_migration_vram_size(struct xe_gt *gt, unsigned int vfid) +{ + if (!xe_gt_is_main_type(gt)) + return 0; - return err; + return xe_gt_sriov_pf_config_get_lmem(gt, vfid); } -static int pf_restore_vf_guc_state(struct xe_gt *gt, unsigned int vfid) +static struct dma_fence *__pf_save_restore_vram(struct xe_gt *gt, unsigned int vfid, + struct xe_bo *vram, u64 vram_offset, + struct xe_bo *sysmem, u64 sysmem_offset, + size_t size, bool save) { - struct xe_gt_sriov_state_snapshot *snapshot = pf_pick_vf_snapshot(gt, vfid); + struct dma_fence *ret = NULL; + struct drm_exec exec; + int err; + + drm_exec_init(&exec, 0, 0); + drm_exec_until_all_locked(&exec) { + err = drm_exec_lock_obj(&exec, &vram->ttm.base); + drm_exec_retry_on_contention(&exec); + if (err) { + ret = ERR_PTR(err); + goto err; + } + + err = drm_exec_lock_obj(&exec, &sysmem->ttm.base); + drm_exec_retry_on_contention(&exec); + if (err) { + ret = ERR_PTR(err); + goto err; + } + } + + ret = xe_migrate_vram_copy_chunk(vram, vram_offset, sysmem, sysmem_offset, size, + save ? XE_MIGRATE_COPY_TO_SRAM : XE_MIGRATE_COPY_TO_VRAM); + +err: + drm_exec_fini(&exec); + + return ret; +} + +#define PF_VRAM_SAVE_RESTORE_TIMEOUT (5 * HZ) +static int pf_save_vram_chunk(struct xe_gt *gt, unsigned int vfid, + struct xe_bo *src_vram, u64 src_vram_offset, + size_t size) +{ + struct xe_sriov_packet *data; + struct dma_fence *fence; int ret; - if (!snapshot->guc.size) - return -ENODATA; + data = xe_sriov_packet_alloc(gt_to_xe(gt)); + if (!data) + return -ENOMEM; - xe_gt_sriov_dbg_verbose(gt, "restoring %zu dwords of VF%u GuC state\n", - snapshot->guc.size / sizeof(u32), vfid); - ret = pf_send_guc_restore_vf_state(gt, vfid, snapshot->guc.buff, snapshot->guc.size); - if (ret < 0) + ret = xe_sriov_packet_init(data, gt->tile->id, gt->info.id, + XE_SRIOV_PACKET_TYPE_VRAM, src_vram_offset, + size); + if (ret) + goto fail; + + fence = __pf_save_restore_vram(gt, vfid, + src_vram, src_vram_offset, + data->bo, 0, size, true); + + ret = dma_fence_wait_timeout(fence, false, PF_VRAM_SAVE_RESTORE_TIMEOUT); + dma_fence_put(fence); + if (!ret) { + ret = -ETIME; + goto fail; + } + + pf_dump_mig_data(gt, vfid, data, "VRAM data save"); + + ret = xe_gt_sriov_pf_migration_save_produce(gt, vfid, data); + if (ret) goto fail; - xe_gt_sriov_dbg_verbose(gt, "restored %d dwords of VF%u GuC state\n", ret, vfid); return 0; fail: - xe_gt_sriov_dbg(gt, "Failed to restore VF%u GuC state (%pe)\n", vfid, ERR_PTR(ret)); + xe_sriov_packet_free(data); + return ret; +} + +#define VF_VRAM_STATE_CHUNK_MAX_SIZE SZ_512M +static int pf_save_vf_vram_mig_data(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid); + loff_t *offset = &migration->save.vram_offset; + struct xe_bo *vram; + size_t vram_size, chunk_size; + int ret; + + vram = xe_gt_sriov_pf_config_get_lmem_obj(gt, vfid); + if (!vram) + return -ENXIO; + + vram_size = xe_bo_size(vram); + + xe_gt_assert(gt, *offset < vram_size); + + chunk_size = min(vram_size - *offset, VF_VRAM_STATE_CHUNK_MAX_SIZE); + + ret = pf_save_vram_chunk(gt, vfid, vram, *offset, chunk_size); + if (ret) + goto fail; + + *offset += chunk_size; + + xe_bo_put(vram); + + if (*offset < vram_size) + return -EAGAIN; + + return 0; + +fail: + xe_bo_put(vram); + xe_gt_sriov_err(gt, "Failed to save VF%u VRAM data (%pe)\n", vfid, ERR_PTR(ret)); + return ret; +} + +static int pf_restore_vf_vram_mig_data(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data) +{ + u64 end = data->hdr.offset + data->hdr.size; + struct dma_fence *fence; + struct xe_bo *vram; + size_t size; + int ret = 0; + + vram = xe_gt_sriov_pf_config_get_lmem_obj(gt, vfid); + if (!vram) + return -ENXIO; + + size = xe_bo_size(vram); + + if (end > size || end < data->hdr.size) { + ret = -EINVAL; + goto err; + } + + pf_dump_mig_data(gt, vfid, data, "VRAM data restore"); + + fence = __pf_save_restore_vram(gt, vfid, vram, data->hdr.offset, + data->bo, 0, data->hdr.size, false); + ret = dma_fence_wait_timeout(fence, false, PF_VRAM_SAVE_RESTORE_TIMEOUT); + dma_fence_put(fence); + if (!ret) { + ret = -ETIME; + goto err; + } + + return 0; +err: + xe_bo_put(vram); + xe_gt_sriov_err(gt, "Failed to restore VF%u VRAM data (%pe)\n", vfid, ERR_PTR(ret)); return ret; } /** - * xe_gt_sriov_pf_migration_restore_guc_state() - Restore a GuC VF state. + * xe_gt_sriov_pf_migration_vram_save() - Save VF VRAM migration data. * @gt: the &xe_gt - * @vfid: the VF identifier + * @vfid: the VF identifier (can't be 0) * * This function is for PF only. * * Return: 0 on success or a negative error code on failure. */ -int xe_gt_sriov_pf_migration_restore_guc_state(struct xe_gt *gt, unsigned int vfid) +int xe_gt_sriov_pf_migration_vram_save(struct xe_gt *gt, unsigned int vfid) { - int ret; - xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); xe_gt_assert(gt, vfid != PFID); xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); - if (!pf_migration_supported(gt)) - return -ENOPKG; + return pf_save_vf_vram_mig_data(gt, vfid); +} - mutex_lock(pf_migration_mutex(gt)); - ret = pf_restore_vf_guc_state(gt, vfid); - mutex_unlock(pf_migration_mutex(gt)); +/** + * xe_gt_sriov_pf_migration_vram_restore() - Restore VF VRAM migration data. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be 0) + * @data: the &xe_sriov_packet containing migration data + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_migration_vram_restore(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid != PFID); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); - return ret; + return pf_restore_vf_vram_mig_data(gt, vfid, data); } -#ifdef CONFIG_DEBUG_FS /** - * xe_gt_sriov_pf_migration_read_guc_state() - Read a GuC VF state. + * xe_gt_sriov_pf_migration_size() - Total size of migration data from all components within a GT. * @gt: the &xe_gt - * @vfid: the VF identifier - * @buf: the user space buffer to read to - * @count: the maximum number of bytes to read - * @pos: the current position in the buffer + * @vfid: the VF identifier (can't be 0) * * This function is for PF only. * - * This function reads up to @count bytes from the saved VF GuC state buffer - * at offset @pos into the user space address starting at @buf. - * - * Return: the number of bytes read or a negative error code on failure. + * Return: total migration data size in bytes or a negative error code on failure. */ -ssize_t xe_gt_sriov_pf_migration_read_guc_state(struct xe_gt *gt, unsigned int vfid, - char __user *buf, size_t count, loff_t *pos) +ssize_t xe_gt_sriov_pf_migration_size(struct xe_gt *gt, unsigned int vfid) { - struct xe_gt_sriov_state_snapshot *snapshot; - ssize_t ret; + ssize_t total = 0; + ssize_t size; xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); xe_gt_assert(gt, vfid != PFID); xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); - if (!pf_migration_supported(gt)) - return -ENOPKG; + size = pf_migration_guc_size(gt, vfid); + if (size < 0) + return size; + if (size > 0) + size += sizeof(struct xe_sriov_packet_hdr); + total += size; + + size = pf_migration_ggtt_size(gt, vfid); + if (size < 0) + return size; + if (size > 0) + size += sizeof(struct xe_sriov_packet_hdr); + total += size; + + size = pf_migration_mmio_size(gt, vfid); + if (size < 0) + return size; + if (size > 0) + size += sizeof(struct xe_sriov_packet_hdr); + total += size; + + size = pf_migration_vram_size(gt, vfid); + if (size < 0) + return size; + if (size > 0) + size += sizeof(struct xe_sriov_packet_hdr); + total += size; + + return total; +} - mutex_lock(pf_migration_mutex(gt)); - snapshot = pf_pick_vf_snapshot(gt, vfid); - if (snapshot->guc.size) - ret = simple_read_from_buffer(buf, count, pos, snapshot->guc.buff, - snapshot->guc.size); - else - ret = -ENODATA; - mutex_unlock(pf_migration_mutex(gt)); +/** + * xe_gt_sriov_pf_migration_ring_empty() - Check if a migration ring is empty. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * Return: true if the ring is empty, otherwise false. + */ +bool xe_gt_sriov_pf_migration_ring_empty(struct xe_gt *gt, unsigned int vfid) +{ + return ptr_ring_empty(&pf_pick_gt_migration(gt, vfid)->ring); +} - return ret; +/** + * xe_gt_sriov_pf_migration_ring_full() - Check if a migration ring is full. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * Return: true if the ring is full, otherwise false. + */ +bool xe_gt_sriov_pf_migration_ring_full(struct xe_gt *gt, unsigned int vfid) +{ + return ptr_ring_full(&pf_pick_gt_migration(gt, vfid)->ring); +} + +/** + * xe_gt_sriov_pf_migration_ring_free() - Consume and free all data in migration ring + * @gt: the &xe_gt + * @vfid: the VF identifier + */ +void xe_gt_sriov_pf_migration_ring_free(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid); + struct xe_sriov_packet *data; + + if (ptr_ring_empty(&migration->ring)) + return; + + xe_gt_sriov_notice(gt, "VF%u unprocessed migration data left in the ring!\n", vfid); + + while ((data = ptr_ring_consume(&migration->ring))) + xe_sriov_packet_free(data); +} + +static void pf_migration_save_data_todo(struct xe_gt *gt, unsigned int vfid, + enum xe_sriov_packet_type type) +{ + set_bit(type, &pf_pick_gt_migration(gt, vfid)->save.data_remaining); +} + +/** + * xe_gt_sriov_pf_migration_save_init() - Initialize per-GT migration related data. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be 0) + */ +void xe_gt_sriov_pf_migration_save_init(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid); + + migration->save.data_remaining = 0; + migration->save.vram_offset = 0; + + xe_gt_assert(gt, pf_migration_guc_size(gt, vfid) > 0); + pf_migration_save_data_todo(gt, vfid, XE_SRIOV_PACKET_TYPE_GUC); + + if (pf_migration_ggtt_size(gt, vfid) > 0) + pf_migration_save_data_todo(gt, vfid, XE_SRIOV_PACKET_TYPE_GGTT); + + xe_gt_assert(gt, pf_migration_mmio_size(gt, vfid) > 0); + pf_migration_save_data_todo(gt, vfid, XE_SRIOV_PACKET_TYPE_MMIO); + + if (pf_migration_vram_size(gt, vfid) > 0) + pf_migration_save_data_todo(gt, vfid, XE_SRIOV_PACKET_TYPE_VRAM); +} + +/** + * xe_gt_sriov_pf_migration_save_data_pending() - Check if migration data type needs to be saved. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be 0) + * @type: the &xe_sriov_packet_type of data to be checked + * + * Return: true if the data needs saving, otherwise false. + */ +bool xe_gt_sriov_pf_migration_save_data_pending(struct xe_gt *gt, unsigned int vfid, + enum xe_sriov_packet_type type) +{ + return test_bit(type, &pf_pick_gt_migration(gt, vfid)->save.data_remaining); } /** - * xe_gt_sriov_pf_migration_write_guc_state() - Write a GuC VF state. + * xe_gt_sriov_pf_migration_save_data_complete() - Complete migration data type save. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be 0) + * @type: the &xe_sriov_packet_type to be marked as completed. + */ +void xe_gt_sriov_pf_migration_save_data_complete(struct xe_gt *gt, unsigned int vfid, + enum xe_sriov_packet_type type) +{ + clear_bit(type, &pf_pick_gt_migration(gt, vfid)->save.data_remaining); +} + +/** + * xe_gt_sriov_pf_migration_save_produce() - Add VF save data packet to migration ring. * @gt: the &xe_gt * @vfid: the VF identifier - * @buf: the user space buffer with GuC VF state - * @size: the size of GuC VF state (in bytes) + * @data: the &xe_sriov_packet * - * This function is for PF only. + * Called by the save migration data producer (PF SR-IOV Control worker) when + * processing migration data. + * Wakes up the save migration data consumer (userspace), that is potentially + * waiting for data when the ring was empty. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_migration_save_produce(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data) +{ + int ret; + + ret = ptr_ring_produce(&pf_pick_gt_migration(gt, vfid)->ring, data); + if (ret) + return ret; + + wake_up_all(xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid)); + + return 0; +} + +/** + * xe_gt_sriov_pf_migration_restore_consume() - Get VF restore data packet from migration ring. + * @gt: the &xe_gt + * @vfid: the VF identifier * - * This function reads @size bytes of the VF GuC state stored at user space - * address @buf and writes it into a internal VF state buffer. + * Called by the restore migration data consumer (PF SR-IOV Control worker) when + * processing migration data. + * Wakes up the restore migration data producer (userspace), that is + * potentially waiting to add more data when the ring is full. * - * Return: the number of bytes used or a negative error code on failure. + * Return: Pointer to &xe_sriov_packet on success, + * NULL if ring is empty. */ -ssize_t xe_gt_sriov_pf_migration_write_guc_state(struct xe_gt *gt, unsigned int vfid, - const char __user *buf, size_t size) +struct xe_sriov_packet * +xe_gt_sriov_pf_migration_restore_consume(struct xe_gt *gt, unsigned int vfid) { - struct xe_gt_sriov_state_snapshot *snapshot; - loff_t pos = 0; - ssize_t ret; + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid); + struct wait_queue_head *wq = xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid); + struct xe_sriov_packet *data; - xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); - xe_gt_assert(gt, vfid != PFID); - xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); + data = ptr_ring_consume(&migration->ring); + if (data) + wake_up_all(wq); - if (!pf_migration_supported(gt)) - return -ENOPKG; + return data; +} - mutex_lock(pf_migration_mutex(gt)); - snapshot = pf_pick_vf_snapshot(gt, vfid); - ret = pf_alloc_guc_state(gt, snapshot, size); - if (!ret) { - ret = simple_write_to_buffer(snapshot->guc.buff, size, &pos, buf, size); - if (ret < 0) - pf_free_guc_state(gt, snapshot); - else - pf_dump_guc_state(gt, snapshot); +static bool pf_restore_data_ready(struct xe_gt *gt, unsigned int vfid) +{ + if (xe_gt_sriov_pf_control_check_restore_failed(gt, vfid) || + !ptr_ring_full(&pf_pick_gt_migration(gt, vfid)->ring)) + return true; + + return false; +} + +/** + * xe_gt_sriov_pf_migration_restore_produce() - Add VF restore data packet to migration ring. + * @gt: the &xe_gt + * @vfid: the VF identifier + * @data: the &xe_sriov_packet + * + * Called by the restore migration data producer (userspace) when processing + * migration data. + * If the ring is full, waits until there is space. + * Queues the restore migration data consumer (PF SR-IOV Control worker), that + * is potentially waiting for data when the ring was empty. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_migration_restore_produce(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data) +{ + int ret; + + xe_gt_assert(gt, data->hdr.tile_id == gt->tile->id); + xe_gt_assert(gt, data->hdr.gt_id == gt->info.id); + + for (;;) { + if (xe_gt_sriov_pf_control_check_restore_failed(gt, vfid)) + return -EIO; + + ret = ptr_ring_produce(&pf_pick_gt_migration(gt, vfid)->ring, data); + if (!ret) + break; + + ret = wait_event_interruptible(*xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid), + pf_restore_data_ready(gt, vfid)); + if (ret) + return ret; } - mutex_unlock(pf_migration_mutex(gt)); - return ret; + return xe_gt_sriov_pf_control_process_restore_data(gt, vfid); } -#endif /* CONFIG_DEBUG_FS */ -static bool pf_check_migration_support(struct xe_gt *gt) +/** + * xe_gt_sriov_pf_migration_save_consume() - Get VF save data packet from migration ring. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * Called by the save migration data consumer (userspace) when + * processing migration data. + * Queues the save migration data producer (PF SR-IOV Control worker), that is + * potentially waiting to add more data when the ring is full. + * + * Return: Pointer to &xe_sriov_packet on success, + * NULL if ring is empty and there's no more data available, + * ERR_PTR(-EAGAIN) if the ring is empty, but data is still produced. + */ +struct xe_sriov_packet * +xe_gt_sriov_pf_migration_save_consume(struct xe_gt *gt, unsigned int vfid) { - /* GuC 70.25 with save/restore v2 is required */ - xe_gt_assert(gt, GUC_FIRMWARE_VER(>->uc.guc) >= MAKE_GUC_VER(70, 25, 0)); + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid); + struct xe_sriov_packet *data; + int ret; + + data = ptr_ring_consume(&migration->ring); + if (data) { + ret = xe_gt_sriov_pf_control_process_save_data(gt, vfid); + if (ret) { + xe_sriov_packet_free(data); + return ERR_PTR(ret); + } + + return data; + } + + if (xe_gt_sriov_pf_control_check_save_data_done(gt, vfid)) + return NULL; + + if (xe_gt_sriov_pf_control_check_save_failed(gt, vfid)) + return ERR_PTR(-EIO); - /* XXX: for now this is for feature enabling only */ - return IS_ENABLED(CONFIG_DRM_XE_DEBUG); + return ERR_PTR(-EAGAIN); +} + +static void destroy_pf_packet(void *ptr) +{ + struct xe_sriov_packet *data = ptr; + + xe_sriov_packet_free(data); +} + +static void action_ring_cleanup(void *arg) +{ + struct ptr_ring *r = arg; + + ptr_ring_cleanup(r, destroy_pf_packet); } /** @@ -402,18 +1023,27 @@ static bool pf_check_migration_support(struct xe_gt *gt) int xe_gt_sriov_pf_migration_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); + unsigned int n, totalvfs; int err; xe_gt_assert(gt, IS_SRIOV_PF(xe)); - gt->sriov.pf.migration.supported = pf_check_migration_support(gt); - if (!pf_migration_supported(gt)) return 0; - err = drmm_mutex_init(&xe->drm, >->sriov.pf.migration.snapshot_lock); - if (err) - return err; + totalvfs = xe_sriov_pf_get_totalvfs(xe); + for (n = 1; n <= totalvfs; n++) { + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, n); + + err = ptr_ring_init(&migration->ring, + XE_GT_SRIOV_PF_MIGRATION_RING_SIZE, GFP_KERNEL); + if (err) + return err; + + err = devm_add_action_or_reset(xe->drm.dev, action_ring_cleanup, &migration->ring); + if (err) + return err; + } return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h index 09faeae00ddb..181207a637b9 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h @@ -9,16 +9,46 @@ #include <linux/types.h> struct xe_gt; +struct xe_sriov_packet; +enum xe_sriov_packet_type; + +/* TODO: get this information by querying GuC in the future */ +#define XE_GT_SRIOV_PF_MIGRATION_GUC_DATA_MAX_SIZE SZ_8M int xe_gt_sriov_pf_migration_init(struct xe_gt *gt); -int xe_gt_sriov_pf_migration_save_guc_state(struct xe_gt *gt, unsigned int vfid); -int xe_gt_sriov_pf_migration_restore_guc_state(struct xe_gt *gt, unsigned int vfid); - -#ifdef CONFIG_DEBUG_FS -ssize_t xe_gt_sriov_pf_migration_read_guc_state(struct xe_gt *gt, unsigned int vfid, - char __user *buf, size_t count, loff_t *pos); -ssize_t xe_gt_sriov_pf_migration_write_guc_state(struct xe_gt *gt, unsigned int vfid, - const char __user *buf, size_t count); -#endif +int xe_gt_sriov_pf_migration_guc_save(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_migration_guc_restore(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data); +int xe_gt_sriov_pf_migration_ggtt_save(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_migration_ggtt_restore(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data); +int xe_gt_sriov_pf_migration_mmio_save(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data); +int xe_gt_sriov_pf_migration_vram_save(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_migration_vram_restore(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data); + +ssize_t xe_gt_sriov_pf_migration_size(struct xe_gt *gt, unsigned int vfid); + +bool xe_gt_sriov_pf_migration_ring_empty(struct xe_gt *gt, unsigned int vfid); +bool xe_gt_sriov_pf_migration_ring_full(struct xe_gt *gt, unsigned int vfid); +void xe_gt_sriov_pf_migration_ring_free(struct xe_gt *gt, unsigned int vfid); + +void xe_gt_sriov_pf_migration_save_init(struct xe_gt *gt, unsigned int vfid); +bool xe_gt_sriov_pf_migration_save_data_pending(struct xe_gt *gt, unsigned int vfid, + enum xe_sriov_packet_type type); +void xe_gt_sriov_pf_migration_save_data_complete(struct xe_gt *gt, unsigned int vfid, + enum xe_sriov_packet_type type); + +int xe_gt_sriov_pf_migration_save_produce(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data); +struct xe_sriov_packet * +xe_gt_sriov_pf_migration_restore_consume(struct xe_gt *gt, unsigned int vfid); + +int xe_gt_sriov_pf_migration_restore_produce(struct xe_gt *gt, unsigned int vfid, + struct xe_sriov_packet *data); +struct xe_sriov_packet * +xe_gt_sriov_pf_migration_save_consume(struct xe_gt *gt, unsigned int vfid); #endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h index 1f3110b6d44f..f50c64241e9c 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h @@ -6,35 +6,23 @@ #ifndef _XE_GT_SRIOV_PF_MIGRATION_TYPES_H_ #define _XE_GT_SRIOV_PF_MIGRATION_TYPES_H_ -#include <linux/mutex.h> -#include <linux/types.h> +#include <linux/ptr_ring.h> /** - * struct xe_gt_sriov_state_snapshot - GT-level per-VF state snapshot data. + * struct xe_gt_sriov_migration_data - GT-level per-VF migration data. * * Used by the PF driver to maintain per-VF migration data. */ -struct xe_gt_sriov_state_snapshot { - /** @guc: GuC VF state snapshot */ +struct xe_gt_sriov_migration_data { + /** @ring: queue containing VF save / restore migration data */ + struct ptr_ring ring; + /** @save: structure for currently processed save migration data */ struct { - /** @guc.buff: buffer with the VF state */ - u32 *buff; - /** @guc.size: size of the buffer (must be dwords aligned) */ - u32 size; - } guc; -}; - -/** - * struct xe_gt_sriov_pf_migration - GT-level data. - * - * Used by the PF driver to maintain non-VF specific per-GT data. - */ -struct xe_gt_sriov_pf_migration { - /** @supported: indicates whether the feature is supported */ - bool supported; - - /** @snapshot_lock: protects all VFs snapshots */ - struct mutex snapshot_lock; + /** @save.data_remaining: bitmap of migration types that need to be saved */ + unsigned long data_remaining; + /** @save.vram_offset: last saved offset within VRAM, used for chunked VRAM save */ + loff_t vram_offset; + } save; }; #endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c index 76dd9233ef9f..2eb21610e5a0 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c @@ -99,11 +99,30 @@ static const struct xe_reg ver_3000_runtime_regs[] = { HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ }; +static const struct xe_reg ver_35_runtime_regs[] = { + RPM_CONFIG0, /* _MMIO(0x0d00) */ + XEHP_FUSE4, /* _MMIO(0x9114) */ + MIRROR_FUSE3, /* _MMIO(0x9118) */ + MIRROR_L3BANK_ENABLE, /* _MMIO(0x9130) */ + XELP_EU_ENABLE, /* _MMIO(0x9134) */ + XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */ + GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */ + XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */ + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */ + XE2_GT_COMPUTE_DSS_2, /* _MMIO(0x914c) */ + XE2_GT_GEOMETRY_DSS_1, /* _MMIO(0x9150) */ + XE2_GT_GEOMETRY_DSS_2, /* _MMIO(0x9154) */ + SERVICE_COPY_ENABLE, /* _MMIO(0x9170) */ +}; + static const struct xe_reg *pick_runtime_regs(struct xe_device *xe, unsigned int *count) { const struct xe_reg *regs; - if (GRAPHICS_VERx100(xe) >= 3000) { + if (GRAPHICS_VER(xe) >= 35) { + *count = ARRAY_SIZE(ver_35_runtime_regs); + regs = ver_35_runtime_regs; + } else if (GRAPHICS_VERx100(xe) >= 3000) { *count = ARRAY_SIZE(ver_3000_runtime_regs); regs = ver_3000_runtime_regs; } else if (GRAPHICS_VERx100(xe) >= 2000) { diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h index a64a6835ad65..667b8310478d 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h @@ -31,8 +31,8 @@ struct xe_gt_sriov_metadata { /** @version: negotiated VF/PF ABI version */ struct xe_gt_sriov_pf_service_version version; - /** @snapshot: snapshot of the VF state data */ - struct xe_gt_sriov_state_snapshot snapshot; + /** @migration: per-VF migration data. */ + struct xe_gt_sriov_migration_data migration; }; /** @@ -58,7 +58,6 @@ struct xe_gt_sriov_pf { struct xe_gt_sriov_pf_service service; struct xe_gt_sriov_pf_control control; struct xe_gt_sriov_pf_policy policy; - struct xe_gt_sriov_pf_migration migration; struct xe_gt_sriov_spare_config spare; struct xe_gt_sriov_metadata *vfs; }; diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c index aa962c783cdf..82c5fbcdfbe3 100644 --- a/drivers/gpu/drm/xe/xe_gt_throttle.c +++ b/drivers/gpu/drm/xe/xe_gt_throttle.c @@ -8,221 +8,222 @@ #include <regs/xe_gt_regs.h> #include "xe_device.h" #include "xe_gt.h" -#include "xe_gt_printk.h" #include "xe_gt_sysfs.h" #include "xe_gt_throttle.h" #include "xe_mmio.h" +#include "xe_platform_types.h" #include "xe_pm.h" /** * DOC: Xe GT Throttle * - * Provides sysfs entries and other helpers for frequency throttle reasons in GT + * The GT frequency may be throttled by hardware/firmware for various reasons + * that are provided through attributes under the ``freq0/throttle/`` directory. + * Their availability depend on the platform and some may not be visible if that + * reason is not available. * - * device/gt#/freq0/throttle/status - Overall status - * device/gt#/freq0/throttle/reason_pl1 - Frequency throttle due to PL1 - * device/gt#/freq0/throttle/reason_pl2 - Frequency throttle due to PL2 - * device/gt#/freq0/throttle/reason_pl4 - Frequency throttle due to PL4, Iccmax etc. - * device/gt#/freq0/throttle/reason_thermal - Frequency throttle due to thermal - * device/gt#/freq0/throttle/reason_prochot - Frequency throttle due to prochot - * device/gt#/freq0/throttle/reason_ratl - Frequency throttle due to RATL - * device/gt#/freq0/throttle/reason_vr_thermalert - Frequency throttle due to VR THERMALERT - * device/gt#/freq0/throttle/reason_vr_tdc - Frequency throttle due to VR TDC + * The ``reasons`` attribute can be used by sysadmin to monitor all possible + * reasons for throttling and report them. It's preferred over monitoring + * ``status`` and then reading the reason from individual attributes since that + * is racy. If there's no throttling happening, "none" is returned. + * + * The following attributes are available on Crescent Island platform: + * + * - ``status``: Overall throttle status (0: no throttling, 1: throttling) + * - ``reasons``: Array of reasons causing throttling separated by space + * - ``reason_pl1``: package PL1 + * - ``reason_pl2``: package PL2 + * - ``reason_pl4``: package PL4 + * - ``reason_prochot``: prochot + * - ``reason_soc_thermal``: SoC thermal + * - ``reason_mem_thermal``: Memory thermal + * - ``reason_vr_thermal``: VR thermal + * - ``reason_iccmax``: ICCMAX + * - ``reason_ratl``: RATL thermal algorithm + * - ``reason_soc_avg_thermal``: SoC average temp + * - ``reason_fastvmode``: VR is hitting FastVMode + * - ``reason_psys_pl1``: PSYS PL1 + * - ``reason_psys_pl2``: PSYS PL2 + * - ``reason_p0_freq``: P0 frequency + * - ``reason_psys_crit``: PSYS critical + * + * Other platforms support the following reasons: + * + * - ``status``: Overall throttle status (0: no throttling, 1: throttling) + * - ``reasons``: Array of reasons causing throttling separated by space + * - ``reason_pl1``: package PL1 + * - ``reason_pl2``: package PL2 + * - ``reason_pl4``: package PL4, Iccmax etc. + * - ``reason_thermal``: thermal + * - ``reason_prochot``: prochot + * - ``reason_ratl``: RATL hermal algorithm + * - ``reason_vr_thermalert``: VR THERMALERT + * - ``reason_vr_tdc``: VR TDC */ -static struct xe_gt * -dev_to_gt(struct device *dev) -{ - return kobj_to_gt(dev->kobj.parent); -} - -u32 xe_gt_throttle_get_limit_reasons(struct xe_gt *gt) -{ - u32 reg; - - xe_pm_runtime_get(gt_to_xe(gt)); - if (xe_gt_is_media_type(gt)) - reg = xe_mmio_read32(>->mmio, MTL_MEDIA_PERF_LIMIT_REASONS); - else - reg = xe_mmio_read32(>->mmio, GT0_PERF_LIMIT_REASONS); - xe_pm_runtime_put(gt_to_xe(gt)); - - return reg; -} - -static u32 read_status(struct xe_gt *gt) -{ - u32 status = xe_gt_throttle_get_limit_reasons(gt) & GT0_PERF_LIMIT_REASONS_MASK; - - xe_gt_dbg(gt, "throttle reasons: 0x%08x\n", status); - return status; -} +struct throttle_attribute { + struct kobj_attribute attr; + u32 mask; +}; -static u32 read_reason_pl1(struct xe_gt *gt) +static struct xe_gt *dev_to_gt(struct device *dev) { - u32 pl1 = xe_gt_throttle_get_limit_reasons(gt) & POWER_LIMIT_1_MASK; - - return pl1; + return kobj_to_gt(dev->kobj.parent); } -static u32 read_reason_pl2(struct xe_gt *gt) +static struct xe_gt *throttle_to_gt(struct kobject *kobj) { - u32 pl2 = xe_gt_throttle_get_limit_reasons(gt) & POWER_LIMIT_2_MASK; - - return pl2; + return dev_to_gt(kobj_to_dev(kobj)); } -static u32 read_reason_pl4(struct xe_gt *gt) +static struct throttle_attribute *kobj_attribute_to_throttle(struct kobj_attribute *attr) { - u32 pl4 = xe_gt_throttle_get_limit_reasons(gt) & POWER_LIMIT_4_MASK; - - return pl4; + return container_of(attr, struct throttle_attribute, attr); } -static u32 read_reason_thermal(struct xe_gt *gt) -{ - u32 thermal = xe_gt_throttle_get_limit_reasons(gt) & THERMAL_LIMIT_MASK; - - return thermal; -} - -static u32 read_reason_prochot(struct xe_gt *gt) +u32 xe_gt_throttle_get_limit_reasons(struct xe_gt *gt) { - u32 prochot = xe_gt_throttle_get_limit_reasons(gt) & PROCHOT_MASK; - - return prochot; -} + struct xe_device *xe = gt_to_xe(gt); + struct xe_reg reg; + u32 val, mask; -static u32 read_reason_ratl(struct xe_gt *gt) -{ - u32 ratl = xe_gt_throttle_get_limit_reasons(gt) & RATL_MASK; + if (xe_gt_is_media_type(gt)) + reg = MTL_MEDIA_PERF_LIMIT_REASONS; + else + reg = GT0_PERF_LIMIT_REASONS; - return ratl; -} + if (xe->info.platform == XE_CRESCENTISLAND) + mask = CRI_PERF_LIMIT_REASONS_MASK; + else + mask = GT0_PERF_LIMIT_REASONS_MASK; -static u32 read_reason_vr_thermalert(struct xe_gt *gt) -{ - u32 thermalert = xe_gt_throttle_get_limit_reasons(gt) & VR_THERMALERT_MASK; + xe_pm_runtime_get(xe); + val = xe_mmio_read32(>->mmio, reg) & mask; + xe_pm_runtime_put(xe); - return thermalert; + return val; } -static u32 read_reason_vr_tdc(struct xe_gt *gt) +static bool is_throttled_by(struct xe_gt *gt, u32 mask) { - u32 tdc = xe_gt_throttle_get_limit_reasons(gt) & VR_TDC_MASK; - - return tdc; + return xe_gt_throttle_get_limit_reasons(gt) & mask; } -static ssize_t status_show(struct kobject *kobj, +static ssize_t reason_show(struct kobject *kobj, struct kobj_attribute *attr, char *buff) { - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool status = !!read_status(gt); + struct throttle_attribute *ta = kobj_attribute_to_throttle(attr); + struct xe_gt *gt = throttle_to_gt(kobj); - return sysfs_emit(buff, "%u\n", status); + return sysfs_emit(buff, "%u\n", is_throttled_by(gt, ta->mask)); } -static struct kobj_attribute attr_status = __ATTR_RO(status); -static ssize_t reason_pl1_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buff) -{ - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool pl1 = !!read_reason_pl1(gt); +static const struct attribute_group *get_platform_throttle_group(struct xe_device *xe); - return sysfs_emit(buff, "%u\n", pl1); -} -static struct kobj_attribute attr_reason_pl1 = __ATTR_RO(reason_pl1); - -static ssize_t reason_pl2_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buff) +static ssize_t reasons_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool pl2 = !!read_reason_pl2(gt); + struct xe_gt *gt = throttle_to_gt(kobj); + struct xe_device *xe = gt_to_xe(gt); + const struct attribute_group *group; + struct attribute **pother; + ssize_t ret = 0; + u32 reasons; - return sysfs_emit(buff, "%u\n", pl2); -} -static struct kobj_attribute attr_reason_pl2 = __ATTR_RO(reason_pl2); + reasons = xe_gt_throttle_get_limit_reasons(gt); + if (!reasons) + goto ret_none; -static ssize_t reason_pl4_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buff) -{ - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool pl4 = !!read_reason_pl4(gt); + group = get_platform_throttle_group(xe); + for (pother = group->attrs; *pother; pother++) { + struct kobj_attribute *kattr = container_of(*pother, struct kobj_attribute, attr); + struct throttle_attribute *other_ta = kobj_attribute_to_throttle(kattr); - return sysfs_emit(buff, "%u\n", pl4); -} -static struct kobj_attribute attr_reason_pl4 = __ATTR_RO(reason_pl4); + if (other_ta->mask != U32_MAX && reasons & other_ta->mask) + ret += sysfs_emit_at(buff, ret, "%s ", (*pother)->name); + } -static ssize_t reason_thermal_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buff) -{ - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool thermal = !!read_reason_thermal(gt); + if (drm_WARN_ONCE(&xe->drm, !ret, "Unknown reason: %#x\n", reasons)) + goto ret_none; - return sysfs_emit(buff, "%u\n", thermal); -} -static struct kobj_attribute attr_reason_thermal = __ATTR_RO(reason_thermal); + /* Drop extra space from last iteration above */ + ret--; + ret += sysfs_emit_at(buff, ret, "\n"); -static ssize_t reason_prochot_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buff) -{ - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool prochot = !!read_reason_prochot(gt); + return ret; - return sysfs_emit(buff, "%u\n", prochot); +ret_none: + return sysfs_emit(buff, "none\n"); } -static struct kobj_attribute attr_reason_prochot = __ATTR_RO(reason_prochot); -static ssize_t reason_ratl_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buff) -{ - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool ratl = !!read_reason_ratl(gt); - - return sysfs_emit(buff, "%u\n", ratl); -} -static struct kobj_attribute attr_reason_ratl = __ATTR_RO(reason_ratl); - -static ssize_t reason_vr_thermalert_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buff) -{ - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool thermalert = !!read_reason_vr_thermalert(gt); - - return sysfs_emit(buff, "%u\n", thermalert); -} -static struct kobj_attribute attr_reason_vr_thermalert = __ATTR_RO(reason_vr_thermalert); - -static ssize_t reason_vr_tdc_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buff) -{ - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool tdc = !!read_reason_vr_tdc(gt); - - return sysfs_emit(buff, "%u\n", tdc); -} -static struct kobj_attribute attr_reason_vr_tdc = __ATTR_RO(reason_vr_tdc); +#define THROTTLE_ATTR_RO(name, _mask) \ + struct throttle_attribute attr_##name = { \ + .attr = __ATTR(name, 0444, reason_show, NULL), \ + .mask = _mask, \ + } + +#define THROTTLE_ATTR_RO_FUNC(name, _mask, _show) \ + struct throttle_attribute attr_##name = { \ + .attr = __ATTR(name, 0444, _show, NULL), \ + .mask = _mask, \ + } + +static THROTTLE_ATTR_RO_FUNC(reasons, 0, reasons_show); +static THROTTLE_ATTR_RO(status, U32_MAX); +static THROTTLE_ATTR_RO(reason_pl1, POWER_LIMIT_1_MASK); +static THROTTLE_ATTR_RO(reason_pl2, POWER_LIMIT_2_MASK); +static THROTTLE_ATTR_RO(reason_pl4, POWER_LIMIT_4_MASK); +static THROTTLE_ATTR_RO(reason_thermal, THERMAL_LIMIT_MASK); +static THROTTLE_ATTR_RO(reason_prochot, PROCHOT_MASK); +static THROTTLE_ATTR_RO(reason_ratl, RATL_MASK); +static THROTTLE_ATTR_RO(reason_vr_thermalert, VR_THERMALERT_MASK); +static THROTTLE_ATTR_RO(reason_vr_tdc, VR_TDC_MASK); static struct attribute *throttle_attrs[] = { - &attr_status.attr, - &attr_reason_pl1.attr, - &attr_reason_pl2.attr, - &attr_reason_pl4.attr, - &attr_reason_thermal.attr, - &attr_reason_prochot.attr, - &attr_reason_ratl.attr, - &attr_reason_vr_thermalert.attr, - &attr_reason_vr_tdc.attr, + &attr_reasons.attr.attr, + &attr_status.attr.attr, + &attr_reason_pl1.attr.attr, + &attr_reason_pl2.attr.attr, + &attr_reason_pl4.attr.attr, + &attr_reason_thermal.attr.attr, + &attr_reason_prochot.attr.attr, + &attr_reason_ratl.attr.attr, + &attr_reason_vr_thermalert.attr.attr, + &attr_reason_vr_tdc.attr.attr, + NULL +}; + +static THROTTLE_ATTR_RO(reason_vr_thermal, VR_THERMAL_MASK); +static THROTTLE_ATTR_RO(reason_soc_thermal, SOC_THERMAL_LIMIT_MASK); +static THROTTLE_ATTR_RO(reason_mem_thermal, MEM_THERMAL_MASK); +static THROTTLE_ATTR_RO(reason_iccmax, ICCMAX_MASK); +static THROTTLE_ATTR_RO(reason_soc_avg_thermal, SOC_AVG_THERMAL_MASK); +static THROTTLE_ATTR_RO(reason_fastvmode, FASTVMODE_MASK); +static THROTTLE_ATTR_RO(reason_psys_pl1, PSYS_PL1_MASK); +static THROTTLE_ATTR_RO(reason_psys_pl2, PSYS_PL2_MASK); +static THROTTLE_ATTR_RO(reason_p0_freq, P0_FREQ_MASK); +static THROTTLE_ATTR_RO(reason_psys_crit, PSYS_CRIT_MASK); + +static struct attribute *cri_throttle_attrs[] = { + /* Common */ + &attr_reasons.attr.attr, + &attr_status.attr.attr, + &attr_reason_pl1.attr.attr, + &attr_reason_pl2.attr.attr, + &attr_reason_pl4.attr.attr, + &attr_reason_prochot.attr.attr, + &attr_reason_ratl.attr.attr, + /* CRI */ + &attr_reason_vr_thermal.attr.attr, + &attr_reason_soc_thermal.attr.attr, + &attr_reason_mem_thermal.attr.attr, + &attr_reason_iccmax.attr.attr, + &attr_reason_soc_avg_thermal.attr.attr, + &attr_reason_fastvmode.attr.attr, + &attr_reason_psys_pl1.attr.attr, + &attr_reason_psys_pl2.attr.attr, + &attr_reason_p0_freq.attr.attr, + &attr_reason_psys_crit.attr.attr, NULL }; @@ -231,19 +232,37 @@ static const struct attribute_group throttle_group_attrs = { .attrs = throttle_attrs, }; +static const struct attribute_group cri_throttle_group_attrs = { + .name = "throttle", + .attrs = cri_throttle_attrs, +}; + +static const struct attribute_group *get_platform_throttle_group(struct xe_device *xe) +{ + switch (xe->info.platform) { + case XE_CRESCENTISLAND: + return &cri_throttle_group_attrs; + default: + return &throttle_group_attrs; + } +} + static void gt_throttle_sysfs_fini(void *arg) { struct xe_gt *gt = arg; + struct xe_device *xe = gt_to_xe(gt); + const struct attribute_group *group = get_platform_throttle_group(xe); - sysfs_remove_group(gt->freq, &throttle_group_attrs); + sysfs_remove_group(gt->freq, group); } int xe_gt_throttle_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); + const struct attribute_group *group = get_platform_throttle_group(xe); int err; - err = sysfs_create_group(gt->freq, &throttle_group_attrs); + err = sysfs_create_group(gt->freq, group); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 0b525643a048..0a728180b6fe 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -220,71 +220,6 @@ struct xe_gt { * operations (e.g. migrations, fixing page tables) */ u16 reserved_bcs_instance; - /** @usm.pf_wq: page fault work queue, unbound, high priority */ - struct workqueue_struct *pf_wq; - /** @usm.acc_wq: access counter work queue, unbound, high priority */ - struct workqueue_struct *acc_wq; - /** - * @usm.pf_queue: Page fault queue used to sync faults so faults can - * be processed not under the GuC CT lock. The queue is sized so - * it can sync all possible faults (1 per physical engine). - * Multiple queues exist for page faults from different VMs to be - * processed in parallel. - */ - struct pf_queue { - /** @usm.pf_queue.gt: back pointer to GT */ - struct xe_gt *gt; - /** @usm.pf_queue.data: data in the page fault queue */ - u32 *data; - /** - * @usm.pf_queue.num_dw: number of DWORDS in the page - * fault queue. Dynamically calculated based on the number - * of compute resources available. - */ - u32 num_dw; - /** - * @usm.pf_queue.tail: tail pointer in DWs for page fault queue, - * moved by worker which processes faults (consumer). - */ - u16 tail; - /** - * @usm.pf_queue.head: head pointer in DWs for page fault queue, - * moved by G2H handler (producer). - */ - u16 head; - /** @usm.pf_queue.lock: protects page fault queue */ - spinlock_t lock; - /** @usm.pf_queue.worker: to process page faults */ - struct work_struct worker; -#define NUM_PF_QUEUE 4 - } pf_queue[NUM_PF_QUEUE]; - /** - * @usm.acc_queue: Same as page fault queue, cannot process access - * counters under CT lock. - */ - struct acc_queue { - /** @usm.acc_queue.gt: back pointer to GT */ - struct xe_gt *gt; -#define ACC_QUEUE_NUM_DW 128 - /** @usm.acc_queue.data: data in the page fault queue */ - u32 data[ACC_QUEUE_NUM_DW]; - /** - * @usm.acc_queue.tail: tail pointer in DWs for access counter queue, - * moved by worker which processes counters - * (consumer). - */ - u16 tail; - /** - * @usm.acc_queue.head: head pointer in DWs for access counter queue, - * moved by G2H handler (producer). - */ - u16 head; - /** @usm.acc_queue.lock: protects page fault queue */ - spinlock_t lock; - /** @usm.acc_queue.worker: to process access counters */ - struct work_struct worker; -#define NUM_ACC_QUEUE 4 - } acc_queue[NUM_ACC_QUEUE]; } usm; /** @ordered_wq: used to serialize GT resets and TDRs */ diff --git a/drivers/gpu/drm/xe/xe_guard.h b/drivers/gpu/drm/xe/xe_guard.h new file mode 100644 index 000000000000..333f8e13b5a1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guard.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_GUARD_H_ +#define _XE_GUARD_H_ + +#include <linux/spinlock.h> + +/** + * struct xe_guard - Simple logic to protect a feature. + * + * Implements simple semaphore-like logic that can be used to lockdown the + * feature unless it is already in use. Allows enabling of the otherwise + * incompatible features, where we can't follow the strict owner semantics + * required by the &rw_semaphore. + * + * NOTE! It shouldn't be used to protect a data, use &rw_semaphore instead. + */ +struct xe_guard { + /** + * @counter: implements simple exclusive/lockdown logic: + * if == 0 then guard/feature is idle/not in use, + * if < 0 then feature is active and can't be locked-down, + * if > 0 then feature is lockded-down and can't be activated. + */ + int counter; + + /** @name: the name of the guard (useful for debug) */ + const char *name; + + /** @owner: the info about the last owner of the guard (for debug) */ + void *owner; + + /** @lock: protects guard's data */ + spinlock_t lock; +}; + +/** + * xe_guard_init() - Initialize the guard. + * @guard: the &xe_guard to init + * @name: name of the guard + */ +static inline void xe_guard_init(struct xe_guard *guard, const char *name) +{ + spin_lock_init(&guard->lock); + guard->counter = 0; + guard->name = name; +} + +/** + * xe_guard_arm() - Arm the guard for the exclusive/lockdown mode. + * @guard: the &xe_guard to arm + * @lockdown: arm for lockdown(true) or exclusive(false) mode + * @who: optional owner info (for debug only) + * + * Multiple lockdown requests are allowed. + * Only single exclusive access can be granted. + * Will fail if the guard is already in exclusive mode. + * On success, must call the xe_guard_disarm() to release. + * + * Return: 0 on success or a negative error code on failure. + */ +static inline int xe_guard_arm(struct xe_guard *guard, bool lockdown, void *who) +{ + guard(spinlock)(&guard->lock); + + if (lockdown) { + if (guard->counter < 0) + return -EBUSY; + guard->counter++; + } else { + if (guard->counter > 0) + return -EPERM; + if (guard->counter < 0) + return -EUSERS; + guard->counter--; + } + + guard->owner = who; + return 0; +} + +/** + * xe_guard_disarm() - Disarm the guard from exclusive/lockdown mode. + * @guard: the &xe_guard to disarm + * @lockdown: disarm from lockdown(true) or exclusive(false) mode + * + * Return: true if successfully disarmed or false in case of mismatch. + */ +static inline bool xe_guard_disarm(struct xe_guard *guard, bool lockdown) +{ + guard(spinlock)(&guard->lock); + + if (lockdown) { + if (guard->counter <= 0) + return false; + guard->counter--; + } else { + if (guard->counter != -1) + return false; + guard->counter++; + } + return true; +} + +/** + * xe_guard_mode_str() - Convert guard mode into a string. + * @lockdown: flag used to select lockdown or exclusive mode + * + * Return: "lockdown" or "exclusive" string. + */ +static inline const char *xe_guard_mode_str(bool lockdown) +{ + return lockdown ? "lockdown" : "exclusive"; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index ecc3e091b89e..a686b04879d6 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -24,6 +24,7 @@ #include "xe_gt_printk.h" #include "xe_gt_sriov_vf.h" #include "xe_gt_throttle.h" +#include "xe_gt_sriov_pf_migration.h" #include "xe_guc_ads.h" #include "xe_guc_buf.h" #include "xe_guc_capture.h" @@ -40,6 +41,7 @@ #include "xe_mmio.h" #include "xe_platform_types.h" #include "xe_sriov.h" +#include "xe_sriov_pf_migration.h" #include "xe_uc.h" #include "xe_uc_fw.h" #include "xe_wa.h" @@ -821,6 +823,14 @@ static int vf_guc_init_post_hwconfig(struct xe_guc *guc) return 0; } +static u32 guc_additional_cache_size(struct xe_device *xe) +{ + if (IS_SRIOV_PF(xe) && xe_sriov_pf_migration_supported(xe)) + return XE_GT_SRIOV_PF_MIGRATION_GUC_DATA_MAX_SIZE; + else + return 0; /* Fallback to default size */ +} + /** * xe_guc_init_post_hwconfig - initialize GuC post hwconfig load * @guc: The GuC object @@ -860,7 +870,8 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc) if (ret) return ret; - ret = xe_guc_buf_cache_init(&guc->buf); + ret = xe_guc_buf_cache_init_with_size(&guc->buf, + guc_additional_cache_size(guc_to_xe(guc))); if (ret) return ret; diff --git a/drivers/gpu/drm/xe/xe_guc_buf.c b/drivers/gpu/drm/xe/xe_guc_buf.c index 502ca3a4ee60..3ce442500130 100644 --- a/drivers/gpu/drm/xe/xe_guc_buf.c +++ b/drivers/gpu/drm/xe/xe_guc_buf.c @@ -13,6 +13,8 @@ #include "xe_guc_buf.h" #include "xe_sa.h" +#define XE_GUC_BUF_CACHE_DEFAULT_SIZE SZ_8K + static struct xe_guc *cache_to_guc(struct xe_guc_buf_cache *cache) { return container_of(cache, struct xe_guc, buf); @@ -23,21 +25,12 @@ static struct xe_gt *cache_to_gt(struct xe_guc_buf_cache *cache) return guc_to_gt(cache_to_guc(cache)); } -/** - * xe_guc_buf_cache_init() - Initialize the GuC Buffer Cache. - * @cache: the &xe_guc_buf_cache to initialize - * - * The Buffer Cache allows to obtain a reusable buffer that can be used to pass - * indirect H2G data to GuC without a need to create a ad-hoc allocation. - * - * Return: 0 on success or a negative error code on failure. - */ -int xe_guc_buf_cache_init(struct xe_guc_buf_cache *cache) +static int guc_buf_cache_init(struct xe_guc_buf_cache *cache, u32 size) { struct xe_gt *gt = cache_to_gt(cache); struct xe_sa_manager *sam; - sam = __xe_sa_bo_manager_init(gt_to_tile(gt), SZ_8K, 0, sizeof(u32)); + sam = __xe_sa_bo_manager_init(gt_to_tile(gt), size, 0, sizeof(u32)); if (IS_ERR(sam)) return PTR_ERR(sam); cache->sam = sam; @@ -49,6 +42,35 @@ int xe_guc_buf_cache_init(struct xe_guc_buf_cache *cache) } /** + * xe_guc_buf_cache_init() - Initialize the GuC Buffer Cache. + * @cache: the &xe_guc_buf_cache to initialize + * + * The Buffer Cache allows to obtain a reusable buffer that can be used to pass + * data to GuC or read data from GuC without a need to create a ad-hoc allocation. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_guc_buf_cache_init(struct xe_guc_buf_cache *cache) +{ + return guc_buf_cache_init(cache, XE_GUC_BUF_CACHE_DEFAULT_SIZE); +} + +/** + * xe_guc_buf_cache_init_with_size() - Initialize the GuC Buffer Cache. + * @cache: the &xe_guc_buf_cache to initialize + * @size: size in bytes + * + * Like xe_guc_buf_cache_init(), except it allows the caller to make the cache + * buffer larger, allowing to accommodate larger objects. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_guc_buf_cache_init_with_size(struct xe_guc_buf_cache *cache, u32 size) +{ + return guc_buf_cache_init(cache, max(XE_GUC_BUF_CACHE_DEFAULT_SIZE, size)); +} + +/** * xe_guc_buf_cache_dwords() - Number of dwords the GuC Buffer Cache supports. * @cache: the &xe_guc_buf_cache to query * @@ -116,6 +138,19 @@ void xe_guc_buf_release(const struct xe_guc_buf buf) } /** + * xe_guc_buf_sync_read() - Copy the data from the GPU memory to the sub-allocation. + * @buf: the &xe_guc_buf to sync + * + * Return: a CPU pointer of the sub-allocation. + */ +void *xe_guc_buf_sync_read(const struct xe_guc_buf buf) +{ + xe_sa_bo_sync_read(buf.sa); + + return xe_sa_bo_cpu_addr(buf.sa); +} + +/** * xe_guc_buf_flush() - Copy the data from the sub-allocation to the GPU memory. * @buf: the &xe_guc_buf to flush * diff --git a/drivers/gpu/drm/xe/xe_guc_buf.h b/drivers/gpu/drm/xe/xe_guc_buf.h index 0d67604d96bd..e3cca553fb00 100644 --- a/drivers/gpu/drm/xe/xe_guc_buf.h +++ b/drivers/gpu/drm/xe/xe_guc_buf.h @@ -12,6 +12,7 @@ #include "xe_guc_buf_types.h" int xe_guc_buf_cache_init(struct xe_guc_buf_cache *cache); +int xe_guc_buf_cache_init_with_size(struct xe_guc_buf_cache *cache, u32 size); u32 xe_guc_buf_cache_dwords(struct xe_guc_buf_cache *cache); struct xe_guc_buf xe_guc_buf_reserve(struct xe_guc_buf_cache *cache, u32 dwords); struct xe_guc_buf xe_guc_buf_from_data(struct xe_guc_buf_cache *cache, @@ -30,6 +31,7 @@ static inline bool xe_guc_buf_is_valid(const struct xe_guc_buf buf) } void *xe_guc_buf_cpu_ptr(const struct xe_guc_buf buf); +void *xe_guc_buf_sync_read(const struct xe_guc_buf buf); u64 xe_guc_buf_flush(const struct xe_guc_buf buf); u64 xe_guc_buf_gpu_addr(const struct xe_guc_buf buf); u64 xe_guc_cache_gpu_addr_from_ptr(struct xe_guc_buf_cache *cache, const void *ptr, u32 size); diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index e68953ef3a00..2697d711adb2 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -21,12 +21,12 @@ #include "xe_devcoredump.h" #include "xe_device.h" #include "xe_gt.h" -#include "xe_gt_pagefault.h" #include "xe_gt_printk.h" #include "xe_gt_sriov_pf_control.h" #include "xe_gt_sriov_pf_monitor.h" #include "xe_guc.h" #include "xe_guc_log.h" +#include "xe_guc_pagefault.h" #include "xe_guc_relay.h" #include "xe_guc_submit.h" #include "xe_guc_tlb_inval.h" @@ -199,6 +199,9 @@ static void guc_ct_fini(struct drm_device *drm, void *arg) { struct xe_guc_ct *ct = arg; +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) + cancel_work_sync(&ct->dead.worker); +#endif ct_exit_safe_mode(ct); destroy_workqueue(ct->g2h_wq); xa_destroy(&ct->fence_lookup); @@ -1545,10 +1548,6 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) case XE_GUC_ACTION_TLB_INVALIDATION_DONE: ret = xe_guc_tlb_inval_done_handler(guc, payload, adj_len); break; - case XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY: - ret = xe_guc_access_counter_notify_handler(guc, payload, - adj_len); - break; case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF: ret = xe_guc_relay_process_guc2pf(&guc->relay, hxg, hxg_len); break; diff --git a/drivers/gpu/drm/xe/xe_guc_pagefault.c b/drivers/gpu/drm/xe/xe_guc_pagefault.c new file mode 100644 index 000000000000..719a18187a31 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_pagefault.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "abi/guc_actions_abi.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_guc_pagefault.h" +#include "xe_pagefault.h" + +static void guc_ack_fault(struct xe_pagefault *pf, int err) +{ + u32 vfid = FIELD_GET(PFD_VFID, pf->producer.msg[2]); + u32 engine_instance = FIELD_GET(PFD_ENG_INSTANCE, pf->producer.msg[0]); + u32 engine_class = FIELD_GET(PFD_ENG_CLASS, pf->producer.msg[0]); + u32 pdata = FIELD_GET(PFD_PDATA_LO, pf->producer.msg[0]) | + (FIELD_GET(PFD_PDATA_HI, pf->producer.msg[1]) << + PFD_PDATA_HI_SHIFT); + u32 action[] = { + XE_GUC_ACTION_PAGE_FAULT_RES_DESC, + + FIELD_PREP(PFR_VALID, 1) | + FIELD_PREP(PFR_SUCCESS, !!err) | + FIELD_PREP(PFR_REPLY, PFR_ACCESS) | + FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) | + FIELD_PREP(PFR_ASID, pf->consumer.asid), + + FIELD_PREP(PFR_VFID, vfid) | + FIELD_PREP(PFR_ENG_INSTANCE, engine_instance) | + FIELD_PREP(PFR_ENG_CLASS, engine_class) | + FIELD_PREP(PFR_PDATA, pdata), + }; + struct xe_guc *guc = pf->producer.private; + + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); +} + +static const struct xe_pagefault_ops guc_pagefault_ops = { + .ack_fault = guc_ack_fault, +}; + +/** + * xe_guc_pagefault_handler() - G2H page fault handler + * @guc: GuC object + * @msg: G2H message + * @len: Length of G2H message + * + * Parse GuC to host (G2H) message into a struct xe_pagefault and forward onto + * the Xe page fault layer. + * + * Return: 0 on success, errno on failure + */ +int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_pagefault pf; + int i; + +#define GUC_PF_MSG_LEN_DW \ + (sizeof(struct xe_guc_pagefault_desc) / sizeof(u32)) + + BUILD_BUG_ON(GUC_PF_MSG_LEN_DW > XE_PAGEFAULT_PRODUCER_MSG_LEN_DW); + + if (len != GUC_PF_MSG_LEN_DW) + return -EPROTO; + + pf.gt = guc_to_gt(guc); + + /* + * XXX: These values happen to match the enum in xe_pagefault_types.h. + * If that changes, we’ll need to remap them here. + */ + pf.consumer.page_addr = ((u64)FIELD_GET(PFD_VIRTUAL_ADDR_HI, msg[3]) + << PFD_VIRTUAL_ADDR_HI_SHIFT) | + (FIELD_GET(PFD_VIRTUAL_ADDR_LO, msg[2]) << + PFD_VIRTUAL_ADDR_LO_SHIFT); + pf.consumer.asid = FIELD_GET(PFD_ASID, msg[1]); + pf.consumer.access_type = FIELD_GET(PFD_ACCESS_TYPE, msg[2]); + pf.consumer.fault_type = FIELD_GET(PFD_FAULT_TYPE, msg[2]); + if (FIELD_GET(XE2_PFD_TRVA_FAULT, msg[0])) + pf.consumer.fault_level = XE_PAGEFAULT_LEVEL_NACK; + else + pf.consumer.fault_level = FIELD_GET(PFD_FAULT_LEVEL, msg[0]); + pf.consumer.engine_class = FIELD_GET(PFD_ENG_CLASS, msg[0]); + pf.consumer.engine_instance = FIELD_GET(PFD_ENG_INSTANCE, msg[0]); + + pf.producer.private = guc; + pf.producer.ops = &guc_pagefault_ops; + for (i = 0; i < GUC_PF_MSG_LEN_DW; ++i) + pf.producer.msg[i] = msg[i]; + +#undef GUC_PF_MSG_LEN_DW + + return xe_pagefault_handler(guc_to_xe(guc), &pf); +} diff --git a/drivers/gpu/drm/xe/xe_guc_pagefault.h b/drivers/gpu/drm/xe/xe_guc_pagefault.h new file mode 100644 index 000000000000..3bd599e7207c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_pagefault.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_GUC_PAGEFAULT_H_ +#define _XE_GUC_PAGEFAULT_H_ + +#include <linux/types.h> + +struct xe_guc; + +int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len); + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index ff22235857f8..951a49fb1d3e 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -331,7 +331,7 @@ static int pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) * Our goal is to have the admin choices respected. */ pc_action_set_param(pc, SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, - freq < pc->rpe_freq); + freq < xe_guc_pc_get_rpe_freq(pc)); return pc_action_set_param(pc, SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, @@ -363,7 +363,7 @@ static int pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) freq); } -static void mtl_update_rpa_value(struct xe_guc_pc *pc) +static u32 mtl_get_rpa_freq(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); u32 reg; @@ -373,10 +373,10 @@ static void mtl_update_rpa_value(struct xe_guc_pc *pc) else reg = xe_mmio_read32(>->mmio, MTL_GT_RPA_FREQUENCY); - pc->rpa_freq = decode_freq(REG_FIELD_GET(MTL_RPA_MASK, reg)); + return decode_freq(REG_FIELD_GET(MTL_RPA_MASK, reg)); } -static void mtl_update_rpe_value(struct xe_guc_pc *pc) +static u32 mtl_get_rpe_freq(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); u32 reg; @@ -386,68 +386,56 @@ static void mtl_update_rpe_value(struct xe_guc_pc *pc) else reg = xe_mmio_read32(>->mmio, MTL_GT_RPE_FREQUENCY); - pc->rpe_freq = decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg)); + return decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg)); } -static void tgl_update_rpa_value(struct xe_guc_pc *pc) +static u32 pvc_get_rpa_freq(struct xe_guc_pc *pc) { - struct xe_gt *gt = pc_to_gt(pc); - struct xe_device *xe = gt_to_xe(gt); - u32 reg; - /* * For PVC we still need to use fused RP0 as the approximation for RPa * For other platforms than PVC we get the resolved RPa directly from * PCODE at a different register */ - if (xe->info.platform == XE_PVC) { - reg = xe_mmio_read32(>->mmio, PVC_RP_STATE_CAP); - pc->rpa_freq = REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER; - } else { - reg = xe_mmio_read32(>->mmio, FREQ_INFO_REC); - pc->rpa_freq = REG_FIELD_GET(RPA_MASK, reg) * GT_FREQUENCY_MULTIPLIER; - } + + struct xe_gt *gt = pc_to_gt(pc); + u32 reg; + + reg = xe_mmio_read32(>->mmio, PVC_RP_STATE_CAP); + return REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER; } -static void tgl_update_rpe_value(struct xe_guc_pc *pc) +static u32 tgl_get_rpa_freq(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 reg; + + reg = xe_mmio_read32(>->mmio, FREQ_INFO_REC); + return REG_FIELD_GET(RPA_MASK, reg) * GT_FREQUENCY_MULTIPLIER; +} + +static u32 pvc_get_rpe_freq(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); - struct xe_device *xe = gt_to_xe(gt); u32 reg; /* * For PVC we still need to use fused RP1 as the approximation for RPe - * For other platforms than PVC we get the resolved RPe directly from - * PCODE at a different register */ - if (xe->info.platform == XE_PVC) { - reg = xe_mmio_read32(>->mmio, PVC_RP_STATE_CAP); - pc->rpe_freq = REG_FIELD_GET(RP1_MASK, reg) * GT_FREQUENCY_MULTIPLIER; - } else { - reg = xe_mmio_read32(>->mmio, FREQ_INFO_REC); - pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER; - } + reg = xe_mmio_read32(>->mmio, PVC_RP_STATE_CAP); + return REG_FIELD_GET(RP1_MASK, reg) * GT_FREQUENCY_MULTIPLIER; } -static void pc_update_rp_values(struct xe_guc_pc *pc) +static u32 tgl_get_rpe_freq(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); - struct xe_device *xe = gt_to_xe(gt); - - if (GRAPHICS_VERx100(xe) >= 1270) { - mtl_update_rpa_value(pc); - mtl_update_rpe_value(pc); - } else { - tgl_update_rpa_value(pc); - tgl_update_rpe_value(pc); - } + u32 reg; /* - * RPe is decided at runtime by PCODE. In the rare case where that's - * smaller than the fused min, we will trust the PCODE and use that - * as our minimum one. + * For other platforms than PVC, we get the resolved RPe directly from + * PCODE at a different register */ - pc->rpn_freq = min(pc->rpn_freq, pc->rpe_freq); + reg = xe_mmio_read32(>->mmio, FREQ_INFO_REC); + return REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER; } /** @@ -548,9 +536,15 @@ u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc) */ u32 xe_guc_pc_get_rpa_freq(struct xe_guc_pc *pc) { - pc_update_rp_values(pc); + struct xe_gt *gt = pc_to_gt(pc); + struct xe_device *xe = gt_to_xe(gt); - return pc->rpa_freq; + if (GRAPHICS_VERx100(xe) == 1260) + return pvc_get_rpa_freq(pc); + else if (GRAPHICS_VERx100(xe) >= 1270) + return mtl_get_rpa_freq(pc); + else + return tgl_get_rpa_freq(pc); } /** @@ -561,9 +555,17 @@ u32 xe_guc_pc_get_rpa_freq(struct xe_guc_pc *pc) */ u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc) { - pc_update_rp_values(pc); + struct xe_device *xe = pc_to_xe(pc); + u32 freq; + + if (GRAPHICS_VERx100(xe) == 1260) + freq = pvc_get_rpe_freq(pc); + else if (GRAPHICS_VERx100(xe) >= 1270) + freq = mtl_get_rpe_freq(pc); + else + freq = tgl_get_rpe_freq(pc); - return pc->rpe_freq; + return freq; } /** @@ -1022,7 +1024,7 @@ static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) /* * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. */ - ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); + ret = pc_set_min_freq(pc, min(xe_guc_pc_get_rpe_freq(pc), pc_max_freq_cap(pc))); if (!ret) ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); @@ -1133,8 +1135,6 @@ static int pc_init_freqs(struct xe_guc_pc *pc) if (ret) goto out; - pc_update_rp_values(pc); - pc_init_pcode_freq(pc); /* @@ -1340,7 +1340,7 @@ static void xe_guc_pc_fini_hw(void *arg) XE_WARN_ON(xe_guc_pc_stop(pc)); /* Bind requested freq to mert_freq_cap before unload */ - pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), pc->rpe_freq)); + pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), xe_guc_pc_get_rpe_freq(pc))); xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), fw_ref); } diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h index 5e4ea53fbee6..711bbcdcb0d3 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h @@ -19,10 +19,6 @@ struct xe_guc_pc { atomic_t flush_freq_limit; /** @rp0_freq: HW RP0 frequency - The Maximum one */ u32 rp0_freq; - /** @rpa_freq: HW RPa frequency - The Achievable one */ - u32 rpa_freq; - /** @rpe_freq: HW RPe frequency - The Efficient one */ - u32 rpe_freq; /** @rpn_freq: HW RPN frequency - The Minimum one */ u32 rpn_freq; /** @user_requested_min: Stash the minimum requested freq by user */ diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 56a5804726e9..2184af413b91 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -29,6 +29,7 @@ #include "xe_lrc.h" #include "xe_map.h" #include "xe_mocs.h" +#include "xe_printk.h" #include "xe_pt.h" #include "xe_res_cursor.h" #include "xe_sa.h" @@ -1210,6 +1211,128 @@ struct xe_exec_queue *xe_migrate_exec_queue(struct xe_migrate *migrate) return migrate->q; } +/** + * xe_migrate_vram_copy_chunk() - Copy a chunk of a VRAM buffer object. + * @vram_bo: The VRAM buffer object. + * @vram_offset: The VRAM offset. + * @sysmem_bo: The sysmem buffer object. + * @sysmem_offset: The sysmem offset. + * @size: The size of VRAM chunk to copy. + * @dir: The direction of the copy operation. + * + * Copies a portion of a buffer object between VRAM and system memory. + * On Xe2 platforms that support flat CCS, VRAM data is decompressed when + * copying to system memory. + * + * Return: Pointer to a dma_fence representing the last copy batch, or + * an error pointer on failure. If there is a failure, any copy operation + * started by the function call has been synced. + */ +struct dma_fence *xe_migrate_vram_copy_chunk(struct xe_bo *vram_bo, u64 vram_offset, + struct xe_bo *sysmem_bo, u64 sysmem_offset, + u64 size, enum xe_migrate_copy_dir dir) +{ + struct xe_device *xe = xe_bo_device(vram_bo); + struct xe_tile *tile = vram_bo->tile; + struct xe_gt *gt = tile->primary_gt; + struct xe_migrate *m = tile->migrate; + struct dma_fence *fence = NULL; + struct ttm_resource *vram = vram_bo->ttm.resource; + struct ttm_resource *sysmem = sysmem_bo->ttm.resource; + struct xe_res_cursor vram_it, sysmem_it; + u64 vram_L0_ofs, sysmem_L0_ofs; + u32 vram_L0_pt, sysmem_L0_pt; + u64 vram_L0, sysmem_L0; + bool to_sysmem = (dir == XE_MIGRATE_COPY_TO_SRAM); + bool use_comp_pat = to_sysmem && + GRAPHICS_VER(xe) >= 20 && xe_device_has_flat_ccs(xe); + int pass = 0; + int err; + + xe_assert(xe, IS_ALIGNED(vram_offset | sysmem_offset | size, PAGE_SIZE)); + xe_assert(xe, xe_bo_is_vram(vram_bo)); + xe_assert(xe, !xe_bo_is_vram(sysmem_bo)); + xe_assert(xe, !range_overflows(vram_offset, size, (u64)vram_bo->ttm.base.size)); + xe_assert(xe, !range_overflows(sysmem_offset, size, (u64)sysmem_bo->ttm.base.size)); + + xe_res_first(vram, vram_offset, size, &vram_it); + xe_res_first_sg(xe_bo_sg(sysmem_bo), sysmem_offset, size, &sysmem_it); + + while (size) { + u32 pte_flags = PTE_UPDATE_FLAG_IS_VRAM; + u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */ + struct xe_sched_job *job; + struct xe_bb *bb; + u32 update_idx; + bool usm = xe->info.has_usm; + u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; + + sysmem_L0 = xe_migrate_res_sizes(m, &sysmem_it); + vram_L0 = min(xe_migrate_res_sizes(m, &vram_it), sysmem_L0); + + xe_dbg(xe, "Pass %u, size: %llu\n", pass++, vram_L0); + + pte_flags |= use_comp_pat ? PTE_UPDATE_FLAG_IS_COMP_PTE : 0; + batch_size += pte_update_size(m, pte_flags, vram, &vram_it, &vram_L0, + &vram_L0_ofs, &vram_L0_pt, 0, 0, avail_pts); + + batch_size += pte_update_size(m, 0, sysmem, &sysmem_it, &vram_L0, &sysmem_L0_ofs, + &sysmem_L0_pt, 0, avail_pts, avail_pts); + batch_size += EMIT_COPY_DW; + + bb = xe_bb_new(gt, batch_size, usm); + if (IS_ERR(bb)) { + err = PTR_ERR(bb); + return ERR_PTR(err); + } + + if (xe_migrate_allow_identity(vram_L0, &vram_it)) + xe_res_next(&vram_it, vram_L0); + else + emit_pte(m, bb, vram_L0_pt, true, use_comp_pat, &vram_it, vram_L0, vram); + + emit_pte(m, bb, sysmem_L0_pt, false, false, &sysmem_it, vram_L0, sysmem); + + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + update_idx = bb->len; + + if (to_sysmem) + emit_copy(gt, bb, vram_L0_ofs, sysmem_L0_ofs, vram_L0, XE_PAGE_SIZE); + else + emit_copy(gt, bb, sysmem_L0_ofs, vram_L0_ofs, vram_L0, XE_PAGE_SIZE); + + job = xe_bb_create_migration_job(m->q, bb, xe_migrate_batch_base(m, usm), + update_idx); + if (IS_ERR(job)) { + xe_bb_free(bb, NULL); + err = PTR_ERR(job); + return ERR_PTR(err); + } + + xe_sched_job_add_migrate_flush(job, MI_INVALIDATE_TLB); + + xe_assert(xe, dma_resv_test_signaled(vram_bo->ttm.base.resv, + DMA_RESV_USAGE_BOOKKEEP)); + xe_assert(xe, dma_resv_test_signaled(sysmem_bo->ttm.base.resv, + DMA_RESV_USAGE_BOOKKEEP)); + + scoped_guard(mutex, &m->job_mutex) { + xe_sched_job_arm(job); + dma_fence_put(fence); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + dma_fence_put(m->fence); + m->fence = dma_fence_get(fence); + } + + xe_bb_free(bb, fence); + size -= vram_L0; + } + + return fence; +} + static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, u32 size, u32 pitch) { @@ -1912,11 +2035,6 @@ static bool xe_migrate_vram_use_pde(struct drm_pagemap_addr *sram_addr, return true; } -enum xe_migrate_copy_dir { - XE_MIGRATE_COPY_TO_VRAM, - XE_MIGRATE_COPY_TO_SRAM, -}; - #define XE_CACHELINE_BYTES 64ull #define XE_CACHELINE_MASK (XE_CACHELINE_BYTES - 1) @@ -2333,6 +2451,20 @@ void xe_migrate_job_unlock(struct xe_migrate *m, struct xe_exec_queue *q) xe_vm_assert_held(q->vm); /* User queues VM's should be locked */ } +#if IS_ENABLED(CONFIG_PROVE_LOCKING) +/** + * xe_migrate_job_lock_assert() - Assert migrate job lock held of queue + * @q: Migrate queue + */ +void xe_migrate_job_lock_assert(struct xe_exec_queue *q) +{ + struct xe_migrate *m = gt_to_tile(q->gt)->migrate; + + xe_gt_assert(q->gt, q == m->q); + lockdep_assert_held(&m->job_mutex); +} +#endif + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) #include "tests/xe_migrate.c" #endif diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index 4fad324b6253..260e298e5dd7 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -28,6 +28,11 @@ struct xe_vma; enum xe_sriov_vf_ccs_rw_ctxs; +enum xe_migrate_copy_dir { + XE_MIGRATE_COPY_TO_VRAM, + XE_MIGRATE_COPY_TO_SRAM, +}; + /** * struct xe_migrate_pt_update_ops - Callbacks for the * xe_migrate_update_pgtables() function. @@ -131,6 +136,9 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, struct xe_lrc *xe_migrate_lrc(struct xe_migrate *migrate); struct xe_exec_queue *xe_migrate_exec_queue(struct xe_migrate *migrate); +struct dma_fence *xe_migrate_vram_copy_chunk(struct xe_bo *vram_bo, u64 vram_offset, + struct xe_bo *sysmem_bo, u64 sysmem_offset, + u64 size, enum xe_migrate_copy_dir dir); int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, unsigned long offset, void *buf, int len, int write); @@ -152,6 +160,14 @@ xe_migrate_update_pgtables(struct xe_migrate *m, void xe_migrate_wait(struct xe_migrate *m); +#if IS_ENABLED(CONFIG_PROVE_LOCKING) +void xe_migrate_job_lock_assert(struct xe_exec_queue *q); +#else +static inline void xe_migrate_job_lock_assert(struct xe_exec_queue *q) +{ +} +#endif + void xe_migrate_job_lock(struct xe_migrate *m, struct xe_exec_queue *q); void xe_migrate_job_unlock(struct xe_migrate *m, struct xe_exec_queue *q); diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index f901ba52b403..87a2bf53d661 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -10,6 +10,7 @@ #include <drm/drm_drv.h> #include <drm/drm_managed.h> +#include <drm/drm_syncobj.h> #include <uapi/drm/xe_drm.h> #include <generated/xe_wa_oob.h> @@ -869,7 +870,7 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) xe_oa_free_oa_buffer(stream); - xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); + xe_force_wake_put(gt_to_fw(gt), stream->fw_ref); xe_pm_runtime_put(stream->oa->xe); /* Wa_1509372804:pvc: Unset the override of GUCRC mode to enable rc6 */ @@ -1390,7 +1391,9 @@ static int xe_oa_user_extensions(struct xe_oa *oa, enum xe_oa_user_extn_from fro return 0; } -static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param) +static int xe_oa_parse_syncs(struct xe_oa *oa, + struct xe_oa_stream *stream, + struct xe_oa_open_param *param) { int ret, num_syncs, num_ufence = 0; @@ -1410,7 +1413,9 @@ static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param) for (num_syncs = 0; num_syncs < param->num_syncs; num_syncs++) { ret = xe_sync_entry_parse(oa->xe, param->xef, ¶m->syncs[num_syncs], - ¶m->syncs_user[num_syncs], 0); + ¶m->syncs_user[num_syncs], + stream->ufence_syncobj, + ++stream->ufence_timeline_value, 0); if (ret) goto err_syncs; @@ -1540,7 +1545,7 @@ static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) return -ENODEV; param.xef = stream->xef; - err = xe_oa_parse_syncs(stream->oa, ¶m); + err = xe_oa_parse_syncs(stream->oa, stream, ¶m); if (err) goto err_config_put; @@ -1636,6 +1641,7 @@ static void xe_oa_destroy_locked(struct xe_oa_stream *stream) if (stream->exec_q) xe_exec_queue_put(stream->exec_q); + drm_syncobj_put(stream->ufence_syncobj); kfree(stream); } @@ -1711,7 +1717,6 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, struct xe_oa_open_param *param) { struct xe_gt *gt = param->hwe->gt; - unsigned int fw_ref; int ret; stream->exec_q = param->exec_q; @@ -1766,8 +1771,8 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, /* Take runtime pm ref and forcewake to disable RC6 */ xe_pm_runtime_get(stream->oa->xe); - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + stream->fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(stream->fw_ref, XE_FORCEWAKE_ALL)) { ret = -ETIMEDOUT; goto err_fw_put; } @@ -1812,7 +1817,7 @@ err_put_k_exec_q: err_free_oa_buf: xe_oa_free_oa_buffer(stream); err_fw_put: - xe_force_wake_put(gt_to_fw(gt), fw_ref); + xe_force_wake_put(gt_to_fw(gt), stream->fw_ref); xe_pm_runtime_put(stream->oa->xe); if (stream->override_gucrc) xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc)); @@ -1827,6 +1832,7 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, struct xe_oa_open_param *param) { struct xe_oa_stream *stream; + struct drm_syncobj *ufence_syncobj; int stream_fd; int ret; @@ -1837,17 +1843,31 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, goto exit; } + ret = drm_syncobj_create(&ufence_syncobj, DRM_SYNCOBJ_CREATE_SIGNALED, + NULL); + if (ret) + goto exit; + stream = kzalloc(sizeof(*stream), GFP_KERNEL); if (!stream) { ret = -ENOMEM; - goto exit; + goto err_syncobj; } - + stream->ufence_syncobj = ufence_syncobj; stream->oa = oa; - ret = xe_oa_stream_init(stream, param); + + ret = xe_oa_parse_syncs(oa, stream, param); if (ret) goto err_free; + ret = xe_oa_stream_init(stream, param); + if (ret) { + while (param->num_syncs--) + xe_sync_entry_cleanup(¶m->syncs[param->num_syncs]); + kfree(param->syncs); + goto err_free; + } + if (!param->disabled) { ret = xe_oa_enable_locked(stream); if (ret) @@ -1871,6 +1891,8 @@ err_destroy: xe_oa_stream_destroy(stream); err_free: kfree(stream); +err_syncobj: + drm_syncobj_put(ufence_syncobj); exit: return ret; } @@ -2084,22 +2106,14 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f goto err_exec_q; } - ret = xe_oa_parse_syncs(oa, ¶m); - if (ret) - goto err_exec_q; - mutex_lock(¶m.hwe->gt->oa.gt_lock); ret = xe_oa_stream_open_ioctl_locked(oa, ¶m); mutex_unlock(¶m.hwe->gt->oa.gt_lock); if (ret < 0) - goto err_sync_cleanup; + goto err_exec_q; return ret; -err_sync_cleanup: - while (param.num_syncs--) - xe_sync_entry_cleanup(¶m.syncs[param.num_syncs]); - kfree(param.syncs); err_exec_q: if (param.exec_q) xe_exec_queue_put(param.exec_q); diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 2628f78c4e8d..cf080f412189 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -15,6 +15,8 @@ #include "regs/xe_reg_defs.h" #include "xe_hw_engine_types.h" +struct drm_syncobj; + #define DEFAULT_XE_OA_BUFFER_SIZE SZ_16M enum xe_oa_report_header { @@ -248,6 +250,12 @@ struct xe_oa_stream { /** @xef: xe_file with which the stream was opened */ struct xe_file *xef; + /** @ufence_syncobj: User fence syncobj */ + struct drm_syncobj *ufence_syncobj; + + /** @ufence_timeline_value: User fence timeline value */ + u64 ufence_timeline_value; + /** @last_fence: fence to use in stream destroy when needed */ struct dma_fence *last_fence; @@ -256,5 +264,8 @@ struct xe_oa_stream { /** @syncs: syncs to wait on and to signal */ struct xe_sync_entry *syncs; + + /** @fw_ref: Forcewake reference */ + unsigned int fw_ref; }; #endif diff --git a/drivers/gpu/drm/xe/xe_pagefault.c b/drivers/gpu/drm/xe/xe_pagefault.c new file mode 100644 index 000000000000..fe3e40145012 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pagefault.c @@ -0,0 +1,445 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/circ_buf.h> + +#include <drm/drm_exec.h> +#include <drm/drm_managed.h> + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt_printk.h" +#include "xe_gt_types.h" +#include "xe_gt_stats.h" +#include "xe_hw_engine.h" +#include "xe_pagefault.h" +#include "xe_pagefault_types.h" +#include "xe_svm.h" +#include "xe_trace_bo.h" +#include "xe_vm.h" + +/** + * DOC: Xe page faults + * + * Xe page faults are handled in two layers. The producer layer interacts with + * hardware or firmware to receive and parse faults into struct xe_pagefault, + * then forwards them to the consumer. The consumer layer services the faults + * (e.g., memory migration, page table updates) and acknowledges the result back + * to the producer, which then forwards the results to the hardware or firmware. + * The consumer uses a page fault queue sized to absorb all potential faults and + * a multi-threaded worker to process them. Multiple producers are supported, + * with a single shared consumer. + * + * xe_pagefault.c implements the consumer layer. + */ + +static int xe_pagefault_entry_size(void) +{ + /* + * Power of two alignment is not a hardware requirement, rather a + * software restriction which makes the math for page fault queue + * management simplier. + */ + return roundup_pow_of_two(sizeof(struct xe_pagefault)); +} + +static int xe_pagefault_begin(struct drm_exec *exec, struct xe_vma *vma, + struct xe_vram_region *vram, bool need_vram_move) +{ + struct xe_bo *bo = xe_vma_bo(vma); + struct xe_vm *vm = xe_vma_vm(vma); + int err; + + err = xe_vm_lock_vma(exec, vma); + if (err) + return err; + + if (!bo) + return 0; + + return need_vram_move ? xe_bo_migrate(bo, vram->placement, NULL, exec) : + xe_bo_validate(bo, vm, true, exec); +} + +static int xe_pagefault_handle_vma(struct xe_gt *gt, struct xe_vma *vma, + bool atomic) +{ + struct xe_vm *vm = xe_vma_vm(vma); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_validation_ctx ctx; + struct drm_exec exec; + struct dma_fence *fence; + int err, needs_vram; + + lockdep_assert_held_write(&vm->lock); + + needs_vram = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic); + if (needs_vram < 0 || (needs_vram && xe_vma_is_userptr(vma))) + return needs_vram < 0 ? needs_vram : -EACCES; + + xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, 1); + xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, + xe_vma_size(vma) / SZ_1K); + + trace_xe_vma_pagefault(vma); + + /* Check if VMA is valid, opportunistic check only */ + if (xe_vm_has_valid_gpu_mapping(tile, vma->tile_present, + vma->tile_invalidated) && !atomic) + return 0; + +retry_userptr: + if (xe_vma_is_userptr(vma) && + xe_vma_userptr_check_repin(to_userptr_vma(vma))) { + struct xe_userptr_vma *uvma = to_userptr_vma(vma); + + err = xe_vma_userptr_pin_pages(uvma); + if (err) + return err; + } + + /* Lock VM and BOs dma-resv */ + xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, (struct xe_val_flags) {}); + drm_exec_init(&exec, 0, 0); + drm_exec_until_all_locked(&exec) { + err = xe_pagefault_begin(&exec, vma, tile->mem.vram, + needs_vram == 1); + drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &err); + if (err) + goto unlock_dma_resv; + + /* Bind VMA only to the GT that has faulted */ + trace_xe_vma_pf_bind(vma); + xe_vm_set_validation_exec(vm, &exec); + fence = xe_vma_rebind(vm, vma, BIT(tile->id)); + xe_vm_set_validation_exec(vm, NULL); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + xe_validation_retry_on_oom(&ctx, &err); + goto unlock_dma_resv; + } + } + + dma_fence_wait(fence, false); + dma_fence_put(fence); + +unlock_dma_resv: + xe_validation_ctx_fini(&ctx); + if (err == -EAGAIN) + goto retry_userptr; + + return err; +} + +static bool +xe_pagefault_access_is_atomic(enum xe_pagefault_access_type access_type) +{ + return access_type == XE_PAGEFAULT_ACCESS_TYPE_ATOMIC; +} + +static struct xe_vm *xe_pagefault_asid_to_vm(struct xe_device *xe, u32 asid) +{ + struct xe_vm *vm; + + down_read(&xe->usm.lock); + vm = xa_load(&xe->usm.asid_to_vm, asid); + if (vm && xe_vm_in_fault_mode(vm)) + xe_vm_get(vm); + else + vm = ERR_PTR(-EINVAL); + up_read(&xe->usm.lock); + + return vm; +} + +static int xe_pagefault_service(struct xe_pagefault *pf) +{ + struct xe_gt *gt = pf->gt; + struct xe_device *xe = gt_to_xe(gt); + struct xe_vm *vm; + struct xe_vma *vma = NULL; + int err; + bool atomic; + + /* Producer flagged this fault to be nacked */ + if (pf->consumer.fault_level == XE_PAGEFAULT_LEVEL_NACK) + return -EFAULT; + + vm = xe_pagefault_asid_to_vm(xe, pf->consumer.asid); + if (IS_ERR(vm)) + return PTR_ERR(vm); + + /* + * TODO: Change to read lock? Using write lock for simplicity. + */ + down_write(&vm->lock); + + if (xe_vm_is_closed(vm)) { + err = -ENOENT; + goto unlock_vm; + } + + vma = xe_vm_find_vma_by_addr(vm, pf->consumer.page_addr); + if (!vma) { + err = -EINVAL; + goto unlock_vm; + } + + atomic = xe_pagefault_access_is_atomic(pf->consumer.access_type); + + if (xe_vma_is_cpu_addr_mirror(vma)) + err = xe_svm_handle_pagefault(vm, vma, gt, + pf->consumer.page_addr, atomic); + else + err = xe_pagefault_handle_vma(gt, vma, atomic); + +unlock_vm: + if (!err) + vm->usm.last_fault_vma = vma; + up_write(&vm->lock); + xe_vm_put(vm); + + return err; +} + +static bool xe_pagefault_queue_pop(struct xe_pagefault_queue *pf_queue, + struct xe_pagefault *pf) +{ + bool found_fault = false; + + spin_lock_irq(&pf_queue->lock); + if (pf_queue->tail != pf_queue->head) { + memcpy(pf, pf_queue->data + pf_queue->tail, sizeof(*pf)); + pf_queue->tail = (pf_queue->tail + xe_pagefault_entry_size()) % + pf_queue->size; + found_fault = true; + } + spin_unlock_irq(&pf_queue->lock); + + return found_fault; +} + +static void xe_pagefault_print(struct xe_pagefault *pf) +{ + xe_gt_dbg(pf->gt, "\n\tASID: %d\n" + "\tFaulted Address: 0x%08x%08x\n" + "\tFaultType: %d\n" + "\tAccessType: %d\n" + "\tFaultLevel: %d\n" + "\tEngineClass: %d %s\n" + "\tEngineInstance: %d\n", + pf->consumer.asid, + upper_32_bits(pf->consumer.page_addr), + lower_32_bits(pf->consumer.page_addr), + pf->consumer.fault_type, + pf->consumer.access_type, + pf->consumer.fault_level, + pf->consumer.engine_class, + xe_hw_engine_class_to_str(pf->consumer.engine_class), + pf->consumer.engine_instance); +} + +static void xe_pagefault_queue_work(struct work_struct *w) +{ + struct xe_pagefault_queue *pf_queue = + container_of(w, typeof(*pf_queue), worker); + struct xe_pagefault pf; + unsigned long threshold; + +#define USM_QUEUE_MAX_RUNTIME_MS 20 + threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS); + + while (xe_pagefault_queue_pop(pf_queue, &pf)) { + int err; + + if (!pf.gt) /* Fault squashed during reset */ + continue; + + err = xe_pagefault_service(&pf); + if (err) { + xe_pagefault_print(&pf); + xe_gt_dbg(pf.gt, "Fault response: Unsuccessful %pe\n", + ERR_PTR(err)); + } + + pf.producer.ops->ack_fault(&pf, err); + + if (time_after(jiffies, threshold)) { + queue_work(gt_to_xe(pf.gt)->usm.pf_wq, w); + break; + } + } +#undef USM_QUEUE_MAX_RUNTIME_MS +} + +static int xe_pagefault_queue_init(struct xe_device *xe, + struct xe_pagefault_queue *pf_queue) +{ + struct xe_gt *gt; + int total_num_eus = 0; + u8 id; + + for_each_gt(gt, xe, id) { + xe_dss_mask_t all_dss; + int num_dss, num_eus; + + bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, + gt->fuse_topo.c_dss_mask, XE_MAX_DSS_FUSE_BITS); + + num_dss = bitmap_weight(all_dss, XE_MAX_DSS_FUSE_BITS); + num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss, + XE_MAX_EU_FUSE_BITS) * num_dss; + + total_num_eus += num_eus; + } + + xe_assert(xe, total_num_eus); + + /* + * user can issue separate page faults per EU and per CS + * + * XXX: Multiplier required as compute UMD are getting PF queue errors + * without it. Follow on why this multiplier is required. + */ +#define PF_MULTIPLIER 8 + pf_queue->size = (total_num_eus + XE_NUM_HW_ENGINES) * + xe_pagefault_entry_size() * PF_MULTIPLIER; + pf_queue->size = roundup_pow_of_two(pf_queue->size); +#undef PF_MULTIPLIER + + drm_dbg(&xe->drm, "xe_pagefault_entry_size=%d, total_num_eus=%d, pf_queue->size=%u", + xe_pagefault_entry_size(), total_num_eus, pf_queue->size); + + spin_lock_init(&pf_queue->lock); + INIT_WORK(&pf_queue->worker, xe_pagefault_queue_work); + + pf_queue->data = drmm_kzalloc(&xe->drm, pf_queue->size, GFP_KERNEL); + if (!pf_queue->data) + return -ENOMEM; + + return 0; +} + +static void xe_pagefault_fini(void *arg) +{ + struct xe_device *xe = arg; + + destroy_workqueue(xe->usm.pf_wq); +} + +/** + * xe_pagefault_init() - Page fault init + * @xe: xe device instance + * + * Initialize Xe page fault state. Must be done after reading fuses. + * + * Return: 0 on Success, errno on failure + */ +int xe_pagefault_init(struct xe_device *xe) +{ + int err, i; + + if (!xe->info.has_usm) + return 0; + + xe->usm.pf_wq = alloc_workqueue("xe_page_fault_work_queue", + WQ_UNBOUND | WQ_HIGHPRI, + XE_PAGEFAULT_QUEUE_COUNT); + if (!xe->usm.pf_wq) + return -ENOMEM; + + for (i = 0; i < XE_PAGEFAULT_QUEUE_COUNT; ++i) { + err = xe_pagefault_queue_init(xe, xe->usm.pf_queue + i); + if (err) + goto err_out; + } + + return devm_add_action_or_reset(xe->drm.dev, xe_pagefault_fini, xe); + +err_out: + destroy_workqueue(xe->usm.pf_wq); + return err; +} + +static void xe_pagefault_queue_reset(struct xe_device *xe, struct xe_gt *gt, + struct xe_pagefault_queue *pf_queue) +{ + u32 i; + + /* Driver load failure guard / USM not enabled guard */ + if (!pf_queue->data) + return; + + /* Squash all pending faults on the GT */ + + spin_lock_irq(&pf_queue->lock); + for (i = pf_queue->tail; i != pf_queue->head; + i = (i + xe_pagefault_entry_size()) % pf_queue->size) { + struct xe_pagefault *pf = pf_queue->data + i; + + if (pf->gt == gt) + pf->gt = NULL; + } + spin_unlock_irq(&pf_queue->lock); +} + +/** + * xe_pagefault_reset() - Page fault reset for a GT + * @xe: xe device instance + * @gt: GT being reset + * + * Reset the Xe page fault state for a GT; that is, squash any pending faults on + * the GT. + */ +void xe_pagefault_reset(struct xe_device *xe, struct xe_gt *gt) +{ + int i; + + for (i = 0; i < XE_PAGEFAULT_QUEUE_COUNT; ++i) + xe_pagefault_queue_reset(xe, gt, xe->usm.pf_queue + i); +} + +static bool xe_pagefault_queue_full(struct xe_pagefault_queue *pf_queue) +{ + lockdep_assert_held(&pf_queue->lock); + + return CIRC_SPACE(pf_queue->head, pf_queue->tail, pf_queue->size) <= + xe_pagefault_entry_size(); +} + +/** + * xe_pagefault_handler() - Page fault handler + * @xe: xe device instance + * @pf: Page fault + * + * Sink the page fault to a queue (i.e., a memory buffer) and queue a worker to + * service it. Safe to be called from IRQ or process context. Reclaim safe. + * + * Return: 0 on success, errno on failure + */ +int xe_pagefault_handler(struct xe_device *xe, struct xe_pagefault *pf) +{ + struct xe_pagefault_queue *pf_queue = xe->usm.pf_queue + + (pf->consumer.asid % XE_PAGEFAULT_QUEUE_COUNT); + unsigned long flags; + bool full; + + spin_lock_irqsave(&pf_queue->lock, flags); + full = xe_pagefault_queue_full(pf_queue); + if (!full) { + memcpy(pf_queue->data + pf_queue->head, pf, sizeof(*pf)); + pf_queue->head = (pf_queue->head + xe_pagefault_entry_size()) % + pf_queue->size; + queue_work(xe->usm.pf_wq, &pf_queue->worker); + } else { + drm_warn(&xe->drm, + "PageFault Queue (%d) full, shouldn't be possible\n", + pf->consumer.asid % XE_PAGEFAULT_QUEUE_COUNT); + } + spin_unlock_irqrestore(&pf_queue->lock, flags); + + return full ? -ENOSPC : 0; +} diff --git a/drivers/gpu/drm/xe/xe_pagefault.h b/drivers/gpu/drm/xe/xe_pagefault.h new file mode 100644 index 000000000000..bd0cdf9ed37f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pagefault.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_PAGEFAULT_H_ +#define _XE_PAGEFAULT_H_ + +struct xe_device; +struct xe_gt; +struct xe_pagefault; + +int xe_pagefault_init(struct xe_device *xe); + +void xe_pagefault_reset(struct xe_device *xe, struct xe_gt *gt); + +int xe_pagefault_handler(struct xe_device *xe, struct xe_pagefault *pf); + +#endif diff --git a/drivers/gpu/drm/xe/xe_pagefault_types.h b/drivers/gpu/drm/xe/xe_pagefault_types.h new file mode 100644 index 000000000000..d3b516407d60 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pagefault_types.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_PAGEFAULT_TYPES_H_ +#define _XE_PAGEFAULT_TYPES_H_ + +#include <linux/workqueue.h> + +struct xe_gt; +struct xe_pagefault; + +/** enum xe_pagefault_access_type - Xe page fault access type */ +enum xe_pagefault_access_type { + /** @XE_PAGEFAULT_ACCESS_TYPE_READ: Read access type */ + XE_PAGEFAULT_ACCESS_TYPE_READ = 0, + /** @XE_PAGEFAULT_ACCESS_TYPE_WRITE: Write access type */ + XE_PAGEFAULT_ACCESS_TYPE_WRITE = 1, + /** @XE_PAGEFAULT_ACCESS_TYPE_ATOMIC: Atomic access type */ + XE_PAGEFAULT_ACCESS_TYPE_ATOMIC = 2, +}; + +/** enum xe_pagefault_type - Xe page fault type */ +enum xe_pagefault_type { + /** @XE_PAGEFAULT_TYPE_NOT_PRESENT: Not present */ + XE_PAGEFAULT_TYPE_NOT_PRESENT = 0, + /** @XE_PAGEFAULT_TYPE_WRITE_ACCESS_VIOLATION: Write access violation */ + XE_PAGEFAULT_TYPE_WRITE_ACCESS_VIOLATION = 1, + /** @XE_PAGEFAULT_TYPE_ATOMIC_ACCESS_VIOLATION: Atomic access violation */ + XE_PAGEFAULT_TYPE_ATOMIC_ACCESS_VIOLATION = 2, +}; + +/** struct xe_pagefault_ops - Xe pagefault ops (producer) */ +struct xe_pagefault_ops { + /** + * @ack_fault: Ack fault + * @pf: Page fault + * @err: Error state of fault + * + * Page fault producer receives acknowledgment from the consumer and + * sends the result to the HW/FW interface. + */ + void (*ack_fault)(struct xe_pagefault *pf, int err); +}; + +/** + * struct xe_pagefault - Xe page fault + * + * Generic page fault structure for communication between producer and consumer. + * Carefully sized to be 64 bytes. Upon a device page fault, the producer + * populates this structure, and the consumer copies it into the page-fault + * queue for deferred handling. + */ +struct xe_pagefault { + /** + * @gt: GT of fault + */ + struct xe_gt *gt; + /** + * @consumer: State for the software handling the fault. Populated by + * the producer and may be modified by the consumer to communicate + * information back to the producer upon fault acknowledgment. + */ + struct { + /** @consumer.page_addr: address of page fault */ + u64 page_addr; + /** @consumer.asid: address space ID */ + u32 asid; + /** + * @consumer.access_type: access type, u8 rather than enum to + * keep size compact + */ + u8 access_type; + /** + * @consumer.fault_type: fault type, u8 rather than enum to + * keep size compact + */ + u8 fault_type; +#define XE_PAGEFAULT_LEVEL_NACK 0xff /* Producer indicates nack fault */ + /** @consumer.fault_level: fault level */ + u8 fault_level; + /** @consumer.engine_class: engine class */ + u8 engine_class; + /** @consumer.engine_instance: engine instance */ + u8 engine_instance; + /** consumer.reserved: reserved bits for future expansion */ + u8 reserved[7]; + } consumer; + /** + * @producer: State for the producer (i.e., HW/FW interface). Populated + * by the producer and should not be modified—or even inspected—by the + * consumer, except for calling operations. + */ + struct { + /** @producer.private: private pointer */ + void *private; + /** @producer.ops: operations */ + const struct xe_pagefault_ops *ops; +#define XE_PAGEFAULT_PRODUCER_MSG_LEN_DW 4 + /** + * @producer.msg: page fault message, used by producer in fault + * acknowledgment to formulate response to HW/FW interface. + * Included in the page-fault message because the producer + * typically receives the fault in a context where memory cannot + * be allocated (e.g., atomic context or the reclaim path). + */ + u32 msg[XE_PAGEFAULT_PRODUCER_MSG_LEN_DW]; + } producer; +}; + +/** + * struct xe_pagefault_queue: Xe pagefault queue (consumer) + * + * Used to capture all device page faults for deferred processing. Size this + * queue to absorb the device’s worst-case number of outstanding faults. + */ +struct xe_pagefault_queue { + /** + * @data: Data in queue containing struct xe_pagefault, protected by + * @lock + */ + void *data; + /** @size: Size of queue in bytes */ + u32 size; + /** @head: Head pointer in bytes, moved by producer, protected by @lock */ + u32 head; + /** @tail: Tail pointer in bytes, moved by consumer, protected by @lock */ + u32 tail; + /** @lock: protects page fault queue */ + spinlock_t lock; + /** @worker: to process page faults */ + struct work_struct worker; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 7649b554942a..68171cceea18 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -115,7 +115,8 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = { REG_FIELD_PREP(XE2_L4_POLICY, l4_policy) | \ REG_FIELD_PREP(XE2_COH_MODE, __coh_mode), \ .coh_mode = (BUILD_BUG_ON_ZERO(__coh_mode && comp_en) || __coh_mode) ? \ - XE_COH_AT_LEAST_1WAY : XE_COH_NONE \ + XE_COH_AT_LEAST_1WAY : XE_COH_NONE, \ + .valid = 1 \ } static const struct xe_pat_table_entry xe2_pat_table[] = { @@ -368,7 +369,7 @@ static int xe2_dump(struct xe_gt *gt, struct drm_printer *p) if (!fw_ref) return -ETIMEDOUT; - drm_printf(p, "PAT table:\n"); + drm_printf(p, "PAT table: (* = reserved entry)\n"); for (i = 0; i < xe->pat.n_entries; i++) { if (xe_gt_is_media_type(gt)) @@ -376,14 +377,14 @@ static int xe2_dump(struct xe_gt *gt, struct drm_printer *p) else pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); - drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u, %u ] (%#8x)\n", i, + drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u, %u ] (%#8x)%s\n", i, !!(pat & XE2_NO_PROMOTE), !!(pat & XE2_COMP_EN), REG_FIELD_GET(XE2_L3_CLOS, pat), REG_FIELD_GET(XE2_L3_POLICY, pat), REG_FIELD_GET(XE2_L4_POLICY, pat), REG_FIELD_GET(XE2_COH_MODE, pat), - pat); + pat, xe->pat.table[i].valid ? "" : " *"); } /* @@ -426,18 +427,18 @@ static int xe3p_xpc_dump(struct xe_gt *gt, struct drm_printer *p) if (!fw_ref) return -ETIMEDOUT; - drm_printf(p, "PAT table:\n"); + drm_printf(p, "PAT table: (* = reserved entry)\n"); for (i = 0; i < xe->pat.n_entries; i++) { pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); - drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u ] (%#8x)\n", i, + drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u ] (%#8x)%s\n", i, !!(pat & XE2_NO_PROMOTE), REG_FIELD_GET(XE2_L3_CLOS, pat), REG_FIELD_GET(XE2_L3_POLICY, pat), REG_FIELD_GET(XE2_L4_POLICY, pat), REG_FIELD_GET(XE2_COH_MODE, pat), - pat); + pat, xe->pat.table[i].valid ? "" : " *"); } /* diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h index 268c9a899f56..05dae03a5f54 100644 --- a/drivers/gpu/drm/xe/xe_pat.h +++ b/drivers/gpu/drm/xe/xe_pat.h @@ -29,6 +29,11 @@ struct xe_pat_table_entry { #define XE_COH_NONE 1 #define XE_COH_AT_LEAST_1WAY 2 u16 coh_mode; + + /** + * @valid: Set to 1 if the entry is valid, 0 if it's reserved. + */ + u16 valid; }; /** diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 90e2ee5e9270..bbe6f8e65844 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -333,7 +333,7 @@ static const struct xe_device_desc mtl_desc = { .has_pxp = true, .max_gt_per_tile = 2, .va_bits = 48, - .vm_max_level = 4, + .vm_max_level = 3, }; static const struct xe_device_desc lnl_desc = { diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c index 735f51effc7a..9ff69c4843b0 100644 --- a/drivers/gpu/drm/xe/xe_pci_sriov.c +++ b/drivers/gpu/drm/xe/xe_pci_sriov.c @@ -20,6 +20,7 @@ #include "xe_sriov_pf_control.h" #include "xe_sriov_pf_helpers.h" #include "xe_sriov_pf_provision.h" +#include "xe_sriov_pf_sysfs.h" #include "xe_sriov_printk.h" static void pf_reset_vfs(struct xe_device *xe, unsigned int num_vfs) @@ -30,18 +31,6 @@ static void pf_reset_vfs(struct xe_device *xe, unsigned int num_vfs) xe_sriov_pf_control_reset_vf(xe, n); } -static struct pci_dev *xe_pci_pf_get_vf_dev(struct xe_device *xe, unsigned int vf_id) -{ - struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - - xe_assert(xe, IS_SRIOV_PF(xe)); - - /* caller must use pci_dev_put() */ - return pci_get_domain_bus_and_slot(pci_domain_nr(pdev->bus), - pdev->bus->number, - pci_iov_virtfn_devfn(pdev, vf_id)); -} - static void pf_link_vfs(struct xe_device *xe, int num_vfs) { struct pci_dev *pdev_pf = to_pci_dev(xe->drm.dev); @@ -60,7 +49,7 @@ static void pf_link_vfs(struct xe_device *xe, int num_vfs) * enforce correct resume order. */ for (n = 1; n <= num_vfs; n++) { - pdev_vf = xe_pci_pf_get_vf_dev(xe, n - 1); + pdev_vf = xe_pci_sriov_get_vf_pdev(pdev_pf, n); /* unlikely, something weird is happening, abort */ if (!pdev_vf) { @@ -105,6 +94,20 @@ static int resize_vf_vram_bar(struct xe_device *xe, int num_vfs) return pci_iov_vf_bar_set_size(pdev, VF_LMEM_BAR, __fls(sizes)); } +static int pf_prepare_vfs_enabling(struct xe_device *xe) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + /* make sure we are not locked-down by other components */ + return xe_sriov_pf_arm_guard(xe, &xe->sriov.pf.guard_vfs_enabling, false, NULL); +} + +static void pf_finish_vfs_enabling(struct xe_device *xe) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + /* allow other components to lockdown VFs enabling */ + xe_sriov_pf_disarm_guard(xe, &xe->sriov.pf.guard_vfs_enabling, false, NULL); +} + static int pf_enable_vfs(struct xe_device *xe, int num_vfs) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -120,6 +123,10 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) if (err) goto out; + err = pf_prepare_vfs_enabling(xe); + if (err) + goto out; + /* * We must hold additional reference to the runtime PM to keep PF in D0 * during VFs lifetime, as our VFs do not implement the PM capability. @@ -150,6 +157,8 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) xe_sriov_info(xe, "Enabled %u of %u VF%s\n", num_vfs, total_vfs, str_plural(total_vfs)); + xe_sriov_pf_sysfs_link_vfs(xe, num_vfs); + pf_engine_activity_stats(xe, num_vfs, true); return num_vfs; @@ -157,6 +166,7 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) failed: xe_sriov_pf_unprovision_vfs(xe, num_vfs); xe_pm_runtime_put(xe); + pf_finish_vfs_enabling(xe); out: xe_sriov_notice(xe, "Failed to enable %u VF%s (%pe)\n", num_vfs, str_plural(num_vfs), ERR_PTR(err)); @@ -177,6 +187,8 @@ static int pf_disable_vfs(struct xe_device *xe) pf_engine_activity_stats(xe, num_vfs, false); + xe_sriov_pf_sysfs_unlink_vfs(xe, num_vfs); + pci_disable_sriov(pdev); pf_reset_vfs(xe, num_vfs); @@ -186,6 +198,8 @@ static int pf_disable_vfs(struct xe_device *xe) /* not needed anymore - see pf_enable_vfs() */ xe_pm_runtime_put(xe); + pf_finish_vfs_enabling(xe); + xe_sriov_info(xe, "Disabled %u VF%s\n", num_vfs, str_plural(num_vfs)); return 0; } @@ -228,3 +242,25 @@ int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) return ret; } + +/** + * xe_pci_sriov_get_vf_pdev() - Lookup the VF's PCI device using the VF identifier. + * @pdev: the PF's &pci_dev + * @vfid: VF identifier (1-based) + * + * The caller must decrement the reference count by calling pci_dev_put(). + * + * Return: the VF's &pci_dev or NULL if the VF device was not found. + */ +struct pci_dev *xe_pci_sriov_get_vf_pdev(struct pci_dev *pdev, unsigned int vfid) +{ + struct xe_device *xe = pdev_to_xe_device(pdev); + + xe_assert(xe, dev_is_pf(&pdev->dev)); + xe_assert(xe, vfid); + xe_assert(xe, vfid <= pci_sriov_get_totalvfs(pdev)); + + return pci_get_domain_bus_and_slot(pci_domain_nr(pdev->bus), + pdev->bus->number, + pci_iov_virtfn_devfn(pdev, vfid - 1)); +} diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.h b/drivers/gpu/drm/xe/xe_pci_sriov.h index c76dd0d90495..b9105d71dbb1 100644 --- a/drivers/gpu/drm/xe/xe_pci_sriov.h +++ b/drivers/gpu/drm/xe/xe_pci_sriov.h @@ -10,6 +10,7 @@ struct pci_dev; #ifdef CONFIG_PCI_IOV int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs); +struct pci_dev *xe_pci_sriov_get_vf_pdev(struct pci_dev *pdev, unsigned int vfid); #else static inline int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) { diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index 6a7ddb9005f9..0d33c14ea0cf 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -32,27 +32,39 @@ static int pcode_mailbox_status(struct xe_tile *tile) { + const char *err_str; + int err_decode; u32 err; - static const struct pcode_err_decode err_decode[] = { - [PCODE_ILLEGAL_CMD] = {-ENXIO, "Illegal Command"}, - [PCODE_TIMEOUT] = {-ETIMEDOUT, "Timed out"}, - [PCODE_ILLEGAL_DATA] = {-EINVAL, "Illegal Data"}, - [PCODE_ILLEGAL_SUBCOMMAND] = {-ENXIO, "Illegal Subcommand"}, - [PCODE_LOCKED] = {-EBUSY, "PCODE Locked"}, - [PCODE_GT_RATIO_OUT_OF_RANGE] = {-EOVERFLOW, - "GT ratio out of range"}, - [PCODE_REJECTED] = {-EACCES, "PCODE Rejected"}, - [PCODE_ERROR_MASK] = {-EPROTO, "Unknown"}, - }; + +#define CASE_ERR(_err, _err_decode, _err_str) \ + case _err: \ + err_decode = _err_decode; \ + err_str = _err_str; \ + break err = xe_mmio_read32(&tile->mmio, PCODE_MAILBOX) & PCODE_ERROR_MASK; + switch (err) { + CASE_ERR(PCODE_ILLEGAL_CMD, -ENXIO, "Illegal Command"); + CASE_ERR(PCODE_TIMEOUT, -ETIMEDOUT, "Timed out"); + CASE_ERR(PCODE_ILLEGAL_DATA, -EINVAL, "Illegal Data"); + CASE_ERR(PCODE_ILLEGAL_SUBCOMMAND, -ENXIO, "Illegal Subcommand"); + CASE_ERR(PCODE_LOCKED, -EBUSY, "PCODE Locked"); + CASE_ERR(PCODE_GT_RATIO_OUT_OF_RANGE, -EOVERFLOW, "GT ratio out of range"); + CASE_ERR(PCODE_REJECTED, -EACCES, "PCODE Rejected"); + default: + err_decode = -EPROTO; + err_str = "Unknown"; + } + if (err) { - drm_err(&tile_to_xe(tile)->drm, "PCODE Mailbox failed: %d %s", err, - err_decode[err].str ?: "Unknown"); - return err_decode[err].errno ?: -EPROTO; + drm_err(&tile_to_xe(tile)->drm, "PCODE Mailbox failed: %d %s", + err_decode, err_str); + + return err_decode; } return 0; +#undef CASE_ERR } static int __pcode_mailbox_rw(struct xe_tile *tile, u32 mbox, u32 *data0, u32 *data1, diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h index 92bfcba51e19..70dcd6625680 100644 --- a/drivers/gpu/drm/xe/xe_pcode_api.h +++ b/drivers/gpu/drm/xe/xe_pcode_api.h @@ -92,9 +92,3 @@ #define BMG_PCIE_CAP XE_REG(0x138340) #define LINK_DOWNGRADE REG_GENMASK(1, 0) #define DOWNGRADE_CAPABLE 2 - -struct pcode_err_decode { - int errno; - const char *str; -}; - diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 7b089e6fb63f..44924512830f 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -112,7 +112,7 @@ void xe_pm_might_block_on_suspend(void) } /** - * xe_pm_might_block_on_suspend() - Block pending suspend. + * xe_pm_block_on_suspend() - Block pending suspend. * @xe: The xe device about to be suspended. * * Block if the pm notifier has start evicting bos, to avoid diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 7c5bca78c8bf..884127b4d97d 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -3,8 +3,6 @@ * Copyright © 2022 Intel Corporation */ -#include <linux/dma-fence-array.h> - #include "xe_pt.h" #include "regs/xe_gtt_defs.h" @@ -1340,13 +1338,6 @@ static int xe_pt_vm_dependencies(struct xe_sched_job *job, return err; } - if (!(pt_update_ops->q->flags & EXEC_QUEUE_FLAG_KERNEL)) { - if (job) - err = xe_sched_job_last_fence_add_dep(job, vm); - else - err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm); - } - for (i = 0; job && !err && i < vops->num_syncs; i++) err = xe_sync_entry_add_deps(&vops->syncs[i], job); @@ -2359,10 +2350,9 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) struct xe_vm *vm = vops->vm; struct xe_vm_pgtable_update_ops *pt_update_ops = &vops->pt_update_ops[tile->id]; - struct dma_fence *fence, *ifence, *mfence; + struct xe_exec_queue *q = pt_update_ops->q; + struct dma_fence *fence, *ifence = NULL, *mfence = NULL; struct xe_tlb_inval_job *ijob = NULL, *mjob = NULL; - struct dma_fence **fences = NULL; - struct dma_fence_array *cf = NULL; struct xe_range_fence *rfence; struct xe_vma_op *op; int err = 0, i; @@ -2390,15 +2380,14 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) #endif if (pt_update_ops->needs_invalidation) { - struct xe_exec_queue *q = pt_update_ops->q; struct xe_dep_scheduler *dep_scheduler = to_dep_scheduler(q, tile->primary_gt); ijob = xe_tlb_inval_job_create(q, &tile->primary_gt->tlb_inval, - dep_scheduler, + dep_scheduler, vm, pt_update_ops->start, pt_update_ops->last, - vm->usm.asid); + XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); if (IS_ERR(ijob)) { err = PTR_ERR(ijob); goto kill_vm_tile1; @@ -2410,26 +2399,15 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) mjob = xe_tlb_inval_job_create(q, &tile->media_gt->tlb_inval, - dep_scheduler, + dep_scheduler, vm, pt_update_ops->start, pt_update_ops->last, - vm->usm.asid); + XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT); if (IS_ERR(mjob)) { err = PTR_ERR(mjob); goto free_ijob; } update.mjob = mjob; - - fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL); - if (!fences) { - err = -ENOMEM; - goto free_ijob; - } - cf = dma_fence_array_alloc(2); - if (!cf) { - err = -ENOMEM; - goto free_ijob; - } } } @@ -2460,31 +2438,12 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) pt_update_ops->last, fence)) dma_fence_wait(fence, false); - /* tlb invalidation must be done before signaling unbind/rebind */ - if (ijob) { - struct dma_fence *__fence; - + if (ijob) ifence = xe_tlb_inval_job_push(ijob, tile->migrate, fence); - __fence = ifence; - - if (mjob) { - fences[0] = ifence; - mfence = xe_tlb_inval_job_push(mjob, tile->migrate, - fence); - fences[1] = mfence; - - dma_fence_array_init(cf, 2, fences, - vm->composite_fence_ctx, - vm->composite_fence_seqno++, - false); - __fence = &cf->base; - } - - dma_fence_put(fence); - fence = __fence; - } + if (mjob) + mfence = xe_tlb_inval_job_push(mjob, tile->migrate, fence); - if (!mjob) { + if (!mjob && !ijob) { dma_resv_add_fence(xe_vm_resv(vm), fence, pt_update_ops->wait_vm_bookkeep ? DMA_RESV_USAGE_KERNEL : @@ -2492,6 +2451,14 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) list_for_each_entry(op, &vops->list, link) op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL); + } else if (ijob && !mjob) { + dma_resv_add_fence(xe_vm_resv(vm), ifence, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); + + list_for_each_entry(op, &vops->list, link) + op_commit(vops->vm, tile, pt_update_ops, op, ifence, NULL); } else { dma_resv_add_fence(xe_vm_resv(vm), ifence, pt_update_ops->wait_vm_bookkeep ? @@ -2511,16 +2478,23 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) if (pt_update_ops->needs_svm_lock) xe_svm_notifier_unlock(vm); + /* + * The last fence is only used for zero bind queue idling; migrate + * queues are not exposed to user space. + */ + if (!(q->flags & EXEC_QUEUE_FLAG_MIGRATE)) + xe_exec_queue_last_fence_set(q, vm, fence); + xe_tlb_inval_job_put(mjob); xe_tlb_inval_job_put(ijob); + dma_fence_put(ifence); + dma_fence_put(mfence); return fence; free_rfence: kfree(rfence); free_ijob: - kfree(cf); - kfree(fences); xe_tlb_inval_job_put(mjob); xe_tlb_inval_job_put(ijob); kill_vm_tile1: diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 690bc327a363..7ca360b2c20d 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -89,6 +89,13 @@ static const struct xe_rtp_entry_sr register_whitelist[] = { RING_FORCE_TO_NONPRIV_ACCESS_RD | RING_FORCE_TO_NONPRIV_RANGE_4)) }, + { XE_RTP_NAME("14024997852"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(WHITELIST(FF_MODE, + RING_FORCE_TO_NONPRIV_ACCESS_RW), + WHITELIST(VFLSKPD, + RING_FORCE_TO_NONPRIV_ACCESS_RW)) + }, }; static void whitelist_apply_to_hwe(struct xe_hw_engine *hwe) diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index fedd017d6dd3..63a5263dcf1b 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -110,6 +110,10 @@ struct drm_suballoc *__xe_sa_bo_new(struct xe_sa_manager *sa_manager, u32 size, return drm_suballoc_new(&sa_manager->base, size, gfp, true, 0); } +/** + * xe_sa_bo_flush_write() - Copy the data from the sub-allocation to the GPU memory. + * @sa_bo: the &drm_suballoc to flush + */ void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo) { struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager); @@ -123,6 +127,23 @@ void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo) drm_suballoc_size(sa_bo)); } +/** + * xe_sa_bo_sync_read() - Copy the data from GPU memory to the sub-allocation. + * @sa_bo: the &drm_suballoc to sync + */ +void xe_sa_bo_sync_read(struct drm_suballoc *sa_bo) +{ + struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager); + struct xe_device *xe = tile_to_xe(sa_manager->bo->tile); + + if (!sa_manager->bo->vmap.is_iomem) + return; + + xe_map_memcpy_from(xe, xe_sa_bo_cpu_addr(sa_bo), &sa_manager->bo->vmap, + drm_suballoc_soffset(sa_bo), + drm_suballoc_size(sa_bo)); +} + void xe_sa_bo_free(struct drm_suballoc *sa_bo, struct dma_fence *fence) { diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h index 99dbf0eea540..1be744350836 100644 --- a/drivers/gpu/drm/xe/xe_sa.h +++ b/drivers/gpu/drm/xe/xe_sa.h @@ -37,6 +37,7 @@ static inline struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager } void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo); +void xe_sa_bo_sync_read(struct drm_suballoc *sa_bo); void xe_sa_bo_free(struct drm_suballoc *sa_bo, struct dma_fence *fence); static inline struct xe_sa_manager * diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 6ae4cc6a3802..cb674a322113 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -146,6 +146,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q, for (i = 0; i < width; ++i) job->ptrs[i].batch_addr = batch_addr[i]; + atomic_inc(&q->job_cnt); xe_pm_runtime_get_noresume(job_to_xe(job)); trace_xe_sched_job_create(job); return job; @@ -177,6 +178,7 @@ void xe_sched_job_destroy(struct kref *ref) dma_fence_put(job->fence); drm_sched_job_cleanup(&job->drm); job_free(job); + atomic_dec(&q->job_cnt); xe_exec_queue_put(q); xe_pm_runtime_put(xe); } @@ -296,23 +298,6 @@ void xe_sched_job_push(struct xe_sched_job *job) } /** - * xe_sched_job_last_fence_add_dep - Add last fence dependency to job - * @job:job to add the last fence dependency to - * @vm: virtual memory job belongs to - * - * Returns: - * 0 on success, or an error on failing to expand the array. - */ -int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm) -{ - struct dma_fence *fence; - - fence = xe_exec_queue_last_fence_get(job->q, vm); - - return drm_sched_job_add_dependency(&job->drm, fence); -} - -/** * xe_sched_job_init_user_fence - Initialize user_fence for the job * @job: job whose user_fence needs an init * @sync: sync to be use to init user_fence diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h index b467131b6d5f..1c1cb44216c3 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.h +++ b/drivers/gpu/drm/xe/xe_sched_job.h @@ -58,7 +58,6 @@ bool xe_sched_job_completed(struct xe_sched_job *job); void xe_sched_job_arm(struct xe_sched_job *job); void xe_sched_job_push(struct xe_sched_job *job); -int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm); void xe_sched_job_init_user_fence(struct xe_sched_job *job, struct xe_sync_entry *sync); diff --git a/drivers/gpu/drm/xe/xe_sriov_packet.c b/drivers/gpu/drm/xe/xe_sriov_packet.c new file mode 100644 index 000000000000..bab994696896 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_packet.c @@ -0,0 +1,520 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_guc_klv_helpers.h" +#include "xe_printk.h" +#include "xe_sriov_packet.h" +#include "xe_sriov_packet_types.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_pf_migration.h" +#include "xe_sriov_printk.h" + +static struct mutex *pf_migration_mutex(struct xe_device *xe, unsigned int vfid) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, vfid <= xe_sriov_pf_get_totalvfs(xe)); + + return &xe->sriov.pf.vfs[vfid].migration.lock; +} + +static struct xe_sriov_packet **pf_pick_pending(struct xe_device *xe, unsigned int vfid) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, vfid <= xe_sriov_pf_get_totalvfs(xe)); + lockdep_assert_held(pf_migration_mutex(xe, vfid)); + + return &xe->sriov.pf.vfs[vfid].migration.pending; +} + +static struct xe_sriov_packet ** +pf_pick_descriptor(struct xe_device *xe, unsigned int vfid) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, vfid <= xe_sriov_pf_get_totalvfs(xe)); + lockdep_assert_held(pf_migration_mutex(xe, vfid)); + + return &xe->sriov.pf.vfs[vfid].migration.descriptor; +} + +static struct xe_sriov_packet **pf_pick_trailer(struct xe_device *xe, unsigned int vfid) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, vfid <= xe_sriov_pf_get_totalvfs(xe)); + lockdep_assert_held(pf_migration_mutex(xe, vfid)); + + return &xe->sriov.pf.vfs[vfid].migration.trailer; +} + +static struct xe_sriov_packet **pf_pick_read_packet(struct xe_device *xe, + unsigned int vfid) +{ + struct xe_sriov_packet **data; + + data = pf_pick_descriptor(xe, vfid); + if (*data) + return data; + + data = pf_pick_pending(xe, vfid); + if (!*data) + *data = xe_sriov_pf_migration_save_consume(xe, vfid); + if (*data) + return data; + + data = pf_pick_trailer(xe, vfid); + if (*data) + return data; + + return NULL; +} + +static bool pkt_needs_bo(struct xe_sriov_packet *data) +{ + return data->hdr.type == XE_SRIOV_PACKET_TYPE_VRAM; +} + +/** + * xe_sriov_packet_alloc() - Allocate migration data packet + * @xe: the &xe_device + * + * Only allocates the "outer" structure, without initializing the migration + * data backing storage. + * + * Return: Pointer to &xe_sriov_packet on success, + * NULL in case of error. + */ +struct xe_sriov_packet *xe_sriov_packet_alloc(struct xe_device *xe) +{ + struct xe_sriov_packet *data; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return NULL; + + data->xe = xe; + data->hdr_remaining = sizeof(data->hdr); + + return data; +} + +/** + * xe_sriov_packet_free() - Free migration data packet. + * @data: the &xe_sriov_packet + */ +void xe_sriov_packet_free(struct xe_sriov_packet *data) +{ + if (IS_ERR_OR_NULL(data)) + return; + + if (pkt_needs_bo(data)) + xe_bo_unpin_map_no_vm(data->bo); + else + kvfree(data->buff); + + kfree(data); +} + +static int pkt_init(struct xe_sriov_packet *data) +{ + struct xe_gt *gt = xe_device_get_gt(data->xe, data->hdr.gt_id); + + if (!gt) + return -EINVAL; + + if (data->hdr.size == 0) + return 0; + + if (pkt_needs_bo(data)) { + struct xe_bo *bo; + + bo = xe_bo_create_pin_map_novm(data->xe, gt->tile, PAGE_ALIGN(data->hdr.size), + ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED, false); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + data->bo = bo; + data->vaddr = bo->vmap.vaddr; + } else { + void *buff = kvzalloc(data->hdr.size, GFP_KERNEL); + + if (!buff) + return -ENOMEM; + + data->buff = buff; + data->vaddr = buff; + } + + return 0; +} + +#define XE_SRIOV_PACKET_SUPPORTED_VERSION 1 + +/** + * xe_sriov_packet_init() - Initialize migration packet header and backing storage. + * @data: the &xe_sriov_packet + * @tile_id: tile identifier + * @gt_id: GT identifier + * @type: &xe_sriov_packet_type + * @offset: offset of data packet payload (within wider resource) + * @size: size of data packet payload + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_packet_init(struct xe_sriov_packet *data, u8 tile_id, u8 gt_id, + enum xe_sriov_packet_type type, loff_t offset, size_t size) +{ + data->hdr.version = XE_SRIOV_PACKET_SUPPORTED_VERSION; + data->hdr.type = type; + data->hdr.tile_id = tile_id; + data->hdr.gt_id = gt_id; + data->hdr.offset = offset; + data->hdr.size = size; + data->remaining = size; + + return pkt_init(data); +} + +/** + * xe_sriov_packet_init_from_hdr() - Initialize migration packet backing storage based on header. + * @data: the &xe_sriov_packet + * + * Header data is expected to be filled prior to calling this function. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_packet_init_from_hdr(struct xe_sriov_packet *data) +{ + xe_assert(data->xe, !data->hdr_remaining); + + if (data->hdr.version != XE_SRIOV_PACKET_SUPPORTED_VERSION) + return -EINVAL; + + data->remaining = data->hdr.size; + + return pkt_init(data); +} + +static ssize_t pkt_hdr_read(struct xe_sriov_packet *data, + char __user *buf, size_t len) +{ + loff_t offset = sizeof(data->hdr) - data->hdr_remaining; + + if (!data->hdr_remaining) + return -EINVAL; + + if (len > data->hdr_remaining) + len = data->hdr_remaining; + + if (copy_to_user(buf, (void *)&data->hdr + offset, len)) + return -EFAULT; + + data->hdr_remaining -= len; + + return len; +} + +static ssize_t pkt_data_read(struct xe_sriov_packet *data, + char __user *buf, size_t len) +{ + if (len > data->remaining) + len = data->remaining; + + if (copy_to_user(buf, data->vaddr + (data->hdr.size - data->remaining), len)) + return -EFAULT; + + data->remaining -= len; + + return len; +} + +static ssize_t pkt_read_single(struct xe_sriov_packet **data, + unsigned int vfid, char __user *buf, size_t len) +{ + ssize_t copied = 0; + + if ((*data)->hdr_remaining) + copied = pkt_hdr_read(*data, buf, len); + else + copied = pkt_data_read(*data, buf, len); + + if ((*data)->remaining == 0 && (*data)->hdr_remaining == 0) { + xe_sriov_packet_free(*data); + *data = NULL; + } + + return copied; +} + +/** + * xe_sriov_packet_read_single() - Read migration data from a single packet. + * @xe: the &xe_device + * @vfid: the VF identifier + * @buf: start address of userspace buffer + * @len: requested read size from userspace + * + * Return: number of bytes that has been successfully read, + * 0 if no more migration data is available, + * -errno on failure. + */ +ssize_t xe_sriov_packet_read_single(struct xe_device *xe, unsigned int vfid, + char __user *buf, size_t len) +{ + struct xe_sriov_packet **data = pf_pick_read_packet(xe, vfid); + + if (!data) + return -ENODATA; + if (IS_ERR(*data)) + return PTR_ERR(*data); + + return pkt_read_single(data, vfid, buf, len); +} + +static ssize_t pkt_hdr_write(struct xe_sriov_packet *data, + const char __user *buf, size_t len) +{ + loff_t offset = sizeof(data->hdr) - data->hdr_remaining; + int ret; + + if (len > data->hdr_remaining) + len = data->hdr_remaining; + + if (copy_from_user((void *)&data->hdr + offset, buf, len)) + return -EFAULT; + + data->hdr_remaining -= len; + + if (!data->hdr_remaining) { + ret = xe_sriov_packet_init_from_hdr(data); + if (ret) + return ret; + } + + return len; +} + +static ssize_t pkt_data_write(struct xe_sriov_packet *data, + const char __user *buf, size_t len) +{ + if (len > data->remaining) + len = data->remaining; + + if (copy_from_user(data->vaddr + (data->hdr.size - data->remaining), buf, len)) + return -EFAULT; + + data->remaining -= len; + + return len; +} + +/** + * xe_sriov_packet_write_single() - Write migration data to a single packet. + * @xe: the &xe_device + * @vfid: the VF identifier + * @buf: start address of userspace buffer + * @len: requested write size from userspace + * + * Return: number of bytes that has been successfully written, + * -errno on failure. + */ +ssize_t xe_sriov_packet_write_single(struct xe_device *xe, unsigned int vfid, + const char __user *buf, size_t len) +{ + struct xe_sriov_packet **data = pf_pick_pending(xe, vfid); + int ret; + ssize_t copied; + + if (IS_ERR_OR_NULL(*data)) { + *data = xe_sriov_packet_alloc(xe); + if (!*data) + return -ENOMEM; + } + + if ((*data)->hdr_remaining) + copied = pkt_hdr_write(*data, buf, len); + else + copied = pkt_data_write(*data, buf, len); + + if ((*data)->hdr_remaining == 0 && (*data)->remaining == 0) { + ret = xe_sriov_pf_migration_restore_produce(xe, vfid, *data); + if (ret) { + xe_sriov_packet_free(*data); + return ret; + } + + *data = NULL; + } + + return copied; +} + +#define MIGRATION_KLV_DEVICE_DEVID_KEY 0xf001u +#define MIGRATION_KLV_DEVICE_DEVID_LEN 1u +#define MIGRATION_KLV_DEVICE_REVID_KEY 0xf002u +#define MIGRATION_KLV_DEVICE_REVID_LEN 1u + +#define MIGRATION_DESCRIPTOR_DWORDS (GUC_KLV_LEN_MIN + MIGRATION_KLV_DEVICE_DEVID_LEN + \ + GUC_KLV_LEN_MIN + MIGRATION_KLV_DEVICE_REVID_LEN) +static size_t pf_descriptor_init(struct xe_device *xe, unsigned int vfid) +{ + struct xe_sriov_packet **desc = pf_pick_descriptor(xe, vfid); + struct xe_sriov_packet *data; + unsigned int len = 0; + u32 *klvs; + int ret; + + data = xe_sriov_packet_alloc(xe); + if (!data) + return -ENOMEM; + + ret = xe_sriov_packet_init(data, 0, 0, XE_SRIOV_PACKET_TYPE_DESCRIPTOR, + 0, MIGRATION_DESCRIPTOR_DWORDS * sizeof(u32)); + if (ret) { + xe_sriov_packet_free(data); + return ret; + } + + klvs = data->vaddr; + klvs[len++] = PREP_GUC_KLV_CONST(MIGRATION_KLV_DEVICE_DEVID_KEY, + MIGRATION_KLV_DEVICE_DEVID_LEN); + klvs[len++] = xe->info.devid; + klvs[len++] = PREP_GUC_KLV_CONST(MIGRATION_KLV_DEVICE_REVID_KEY, + MIGRATION_KLV_DEVICE_REVID_LEN); + klvs[len++] = xe->info.revid; + + xe_assert(xe, len == MIGRATION_DESCRIPTOR_DWORDS); + + *desc = data; + + return 0; +} + +/** + * xe_sriov_packet_process_descriptor() - Process migration data descriptor packet. + * @xe: the &xe_device + * @vfid: the VF identifier + * @data: the &xe_sriov_packet containing the descriptor + * + * The descriptor uses the same KLV format as GuC, and contains metadata used for + * checking migration data compatibility. + * + * Return: 0 on success, -errno on failure. + */ +int xe_sriov_packet_process_descriptor(struct xe_device *xe, unsigned int vfid, + struct xe_sriov_packet *data) +{ + u32 num_dwords = data->hdr.size / sizeof(u32); + u32 *klvs = data->vaddr; + + xe_assert(xe, data->hdr.type == XE_SRIOV_PACKET_TYPE_DESCRIPTOR); + + if (data->hdr.size % sizeof(u32)) { + xe_sriov_warn(xe, "Aborting migration, descriptor not in KLV format (size=%llu)\n", + data->hdr.size); + return -EINVAL; + } + + while (num_dwords >= GUC_KLV_LEN_MIN) { + u32 key = FIELD_GET(GUC_KLV_0_KEY, klvs[0]); + u32 len = FIELD_GET(GUC_KLV_0_LEN, klvs[0]); + + klvs += GUC_KLV_LEN_MIN; + num_dwords -= GUC_KLV_LEN_MIN; + + if (len > num_dwords) { + xe_sriov_warn(xe, "Aborting migration, truncated KLV %#x, len %u\n", + key, len); + return -EINVAL; + } + + switch (key) { + case MIGRATION_KLV_DEVICE_DEVID_KEY: + if (*klvs != xe->info.devid) { + xe_sriov_warn(xe, + "Aborting migration, devid mismatch %#06x!=%#06x\n", + *klvs, xe->info.devid); + return -ENODEV; + } + break; + case MIGRATION_KLV_DEVICE_REVID_KEY: + if (*klvs != xe->info.revid) { + xe_sriov_warn(xe, + "Aborting migration, revid mismatch %#06x!=%#06x\n", + *klvs, xe->info.revid); + return -ENODEV; + } + break; + default: + xe_sriov_dbg(xe, + "Skipping unknown migration KLV %#x, len=%u\n", + key, len); + print_hex_dump_bytes("desc: ", DUMP_PREFIX_OFFSET, klvs, + min(SZ_64, len * sizeof(u32))); + break; + } + + klvs += len; + num_dwords -= len; + } + + return 0; +} + +static void pf_pending_init(struct xe_device *xe, unsigned int vfid) +{ + struct xe_sriov_packet **data = pf_pick_pending(xe, vfid); + + *data = NULL; +} + +#define MIGRATION_TRAILER_SIZE 0 +static int pf_trailer_init(struct xe_device *xe, unsigned int vfid) +{ + struct xe_sriov_packet **trailer = pf_pick_trailer(xe, vfid); + struct xe_sriov_packet *data; + int ret; + + data = xe_sriov_packet_alloc(xe); + if (!data) + return -ENOMEM; + + ret = xe_sriov_packet_init(data, 0, 0, XE_SRIOV_PACKET_TYPE_TRAILER, + 0, MIGRATION_TRAILER_SIZE); + if (ret) { + xe_sriov_packet_free(data); + return ret; + } + + *trailer = data; + + return 0; +} + +/** + * xe_sriov_packet_save_init() - Initialize the pending save migration packets. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * Return: 0 on success, -errno on failure. + */ +int xe_sriov_packet_save_init(struct xe_device *xe, unsigned int vfid) +{ + int ret; + + scoped_cond_guard(mutex_intr, return -EINTR, pf_migration_mutex(xe, vfid)) { + ret = pf_descriptor_init(xe, vfid); + if (ret) + return ret; + + ret = pf_trailer_init(xe, vfid); + if (ret) + return ret; + + pf_pending_init(xe, vfid); + } + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_sriov_packet.h b/drivers/gpu/drm/xe/xe_sriov_packet.h new file mode 100644 index 000000000000..2731e52cf7ef --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_packet.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PACKET_H_ +#define _XE_SRIOV_PACKET_H_ + +#include <linux/types.h> + +struct xe_device; +struct xe_sriov_packet; +enum xe_sriov_packet_type; + +struct xe_sriov_packet *xe_sriov_packet_alloc(struct xe_device *xe); +void xe_sriov_packet_free(struct xe_sriov_packet *data); + +int xe_sriov_packet_init(struct xe_sriov_packet *data, u8 tile_id, u8 gt_id, + enum xe_sriov_packet_type, loff_t offset, size_t size); +int xe_sriov_packet_init_from_hdr(struct xe_sriov_packet *data); + +ssize_t xe_sriov_packet_read_single(struct xe_device *xe, unsigned int vfid, + char __user *buf, size_t len); +ssize_t xe_sriov_packet_write_single(struct xe_device *xe, unsigned int vfid, + const char __user *buf, size_t len); +int xe_sriov_packet_save_init(struct xe_device *xe, unsigned int vfid); +int xe_sriov_packet_process_descriptor(struct xe_device *xe, unsigned int vfid, + struct xe_sriov_packet *data); + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_packet_types.h b/drivers/gpu/drm/xe/xe_sriov_packet_types.h new file mode 100644 index 000000000000..078a1c95e786 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_packet_types.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PACKET_TYPES_H_ +#define _XE_SRIOV_PACKET_TYPES_H_ + +#include <linux/types.h> + +/** + * enum xe_sriov_packet_type - Xe SR-IOV VF migration data packet type + * @XE_SRIOV_PACKET_TYPE_DESCRIPTOR: Descriptor with VF device metadata + * @XE_SRIOV_PACKET_TYPE_TRAILER: Trailer indicating end-of-stream + * @XE_SRIOV_PACKET_TYPE_GGTT: Global GTT migration data + * @XE_SRIOV_PACKET_TYPE_MMIO: MMIO registers migration data + * @XE_SRIOV_PACKET_TYPE_GUC: GuC firmware migration data + * @XE_SRIOV_PACKET_TYPE_VRAM: VRAM migration data + */ +enum xe_sriov_packet_type { + /* Skipping 0 to catch uninitialized data */ + XE_SRIOV_PACKET_TYPE_DESCRIPTOR = 1, + XE_SRIOV_PACKET_TYPE_TRAILER, + XE_SRIOV_PACKET_TYPE_GGTT, + XE_SRIOV_PACKET_TYPE_MMIO, + XE_SRIOV_PACKET_TYPE_GUC, + XE_SRIOV_PACKET_TYPE_VRAM, +}; + +/** + * struct xe_sriov_packet_hdr - Xe SR-IOV VF migration data packet header + */ +struct xe_sriov_packet_hdr { + /** @version: migration data protocol version */ + u8 version; + /** @type: migration data type */ + u8 type; + /** @tile_id: migration data tile id */ + u8 tile_id; + /** @gt_id: migration data gt id */ + u8 gt_id; + /** @flags: migration data flags */ + u32 flags; + /** + * @offset: offset into the resource; + * used when multiple packets of given type are used for migration + */ + u64 offset; + /** @size: migration data size */ + u64 size; +} __packed; + +/** + * struct xe_sriov_packet - Xe SR-IOV VF migration data packet + */ +struct xe_sriov_packet { + /** @xe: the PF &xe_device this data packet belongs to */ + struct xe_device *xe; + /** @vaddr: CPU pointer to payload data */ + void *vaddr; + /** @remaining: payload data remaining */ + size_t remaining; + /** @hdr_remaining: header data remaining */ + size_t hdr_remaining; + union { + /** @bo: Buffer object with migration data */ + struct xe_bo *bo; + /** @buff: Buffer with migration data */ + void *buff; + }; + /** @hdr: data packet header */ + struct xe_sriov_packet_hdr hdr; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c index bc1ab9ee31d9..7c779d63179f 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf.c @@ -15,7 +15,9 @@ #include "xe_sriov.h" #include "xe_sriov_pf.h" #include "xe_sriov_pf_helpers.h" +#include "xe_sriov_pf_migration.h" #include "xe_sriov_pf_service.h" +#include "xe_sriov_pf_sysfs.h" #include "xe_sriov_printk.h" static unsigned int wanted_max_vfs(struct xe_device *xe) @@ -101,6 +103,12 @@ int xe_sriov_pf_init_early(struct xe_device *xe) if (err) return err; + err = xe_sriov_pf_migration_init(xe); + if (err) + return err; + + xe_guard_init(&xe->sriov.pf.guard_vfs_enabling, "vfs_enabling"); + xe_sriov_pf_service_init(xe); return 0; @@ -128,6 +136,10 @@ int xe_sriov_pf_init_late(struct xe_device *xe) return err; } + err = xe_sriov_pf_sysfs_init(xe); + if (err) + return err; + return 0; } @@ -158,6 +170,101 @@ int xe_sriov_pf_wait_ready(struct xe_device *xe) } /** + * xe_sriov_pf_arm_guard() - Arm the guard for exclusive/lockdown mode. + * @xe: the PF &xe_device + * @guard: the &xe_guard to arm + * @lockdown: arm for lockdown(true) or exclusive(false) mode + * @who: the address of the new owner, or NULL if it's a caller + * + * This function can only be called on PF. + * + * It is a simple wrapper for xe_guard_arm() with additional debug + * messages. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_arm_guard(struct xe_device *xe, struct xe_guard *guard, + bool lockdown, void *who) +{ + void *new_owner = who ?: __builtin_return_address(0); + int err; + + err = xe_guard_arm(guard, lockdown, new_owner); + if (err) { + xe_sriov_dbg(xe, "%s/%s mode denied (%pe) last owner %ps\n", + guard->name, xe_guard_mode_str(lockdown), + ERR_PTR(err), guard->owner); + return err; + } + + xe_sriov_dbg_verbose(xe, "%s/%s by %ps\n", + guard->name, xe_guard_mode_str(lockdown), + new_owner); + return 0; +} + +/** + * xe_sriov_pf_disarm_guard() - Disarm the guard. + * @xe: the PF &xe_device + * @guard: the &xe_guard to disarm + * @lockdown: disarm from lockdown(true) or exclusive(false) mode + * @who: the address of the indirect owner, or NULL if it's a caller + * + * This function can only be called on PF. + * + * It is a simple wrapper for xe_guard_disarm() with additional debug + * messages and xe_assert() to easily catch any illegal calls. + */ +void xe_sriov_pf_disarm_guard(struct xe_device *xe, struct xe_guard *guard, + bool lockdown, void *who) +{ + bool disarmed; + + xe_sriov_dbg_verbose(xe, "%s/%s by %ps\n", + guard->name, xe_guard_mode_str(lockdown), + who ?: __builtin_return_address(0)); + + disarmed = xe_guard_disarm(guard, lockdown); + xe_assert_msg(xe, disarmed, "%s/%s not armed? last owner %ps", + guard->name, xe_guard_mode_str(lockdown), guard->owner); +} + +/** + * xe_sriov_pf_lockdown() - Lockdown the PF to prevent VFs enabling. + * @xe: the PF &xe_device + * + * This function can only be called on PF. + * + * Once the PF is locked down, it will not enable VFs. + * If VFs are already enabled, the -EBUSY will be returned. + * To allow the PF enable VFs again call xe_sriov_pf_end_lockdown(). + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_lockdown(struct xe_device *xe) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + + return xe_sriov_pf_arm_guard(xe, &xe->sriov.pf.guard_vfs_enabling, true, + __builtin_return_address(0)); +} + +/** + * xe_sriov_pf_end_lockdown() - Allow the PF to enable VFs again. + * @xe: the PF &xe_device + * + * This function can only be called on PF. + * See xe_sriov_pf_lockdown() for details. + */ +void xe_sriov_pf_end_lockdown(struct xe_device *xe) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + + xe_sriov_pf_disarm_guard(xe, &xe->sriov.pf.guard_vfs_enabling, true, + __builtin_return_address(0)); +} + +/** * xe_sriov_pf_print_vfs_summary - Print SR-IOV PF information. * @xe: the &xe_device to print info from * @p: the &drm_printer diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.h b/drivers/gpu/drm/xe/xe_sriov_pf.h index cba3fde9581f..b4d050ad5b7c 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf.h @@ -17,11 +17,15 @@ bool xe_sriov_pf_readiness(struct xe_device *xe); int xe_sriov_pf_init_early(struct xe_device *xe); int xe_sriov_pf_init_late(struct xe_device *xe); int xe_sriov_pf_wait_ready(struct xe_device *xe); +int xe_sriov_pf_lockdown(struct xe_device *xe); +void xe_sriov_pf_end_lockdown(struct xe_device *xe); void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p); #else static inline bool xe_sriov_pf_readiness(struct xe_device *xe) { return false; } static inline int xe_sriov_pf_init_early(struct xe_device *xe) { return 0; } static inline int xe_sriov_pf_init_late(struct xe_device *xe) { return 0; } +static inline int xe_sriov_pf_lockdown(struct xe_device *xe) { return 0; } +static inline void xe_sriov_pf_end_lockdown(struct xe_device *xe) { } #endif #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_sriov_pf_control.c index 416d00a03fbb..ed4b9820b06e 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_control.c @@ -5,6 +5,8 @@ #include "xe_device.h" #include "xe_gt_sriov_pf_control.h" +#include "xe_gt_sriov_pf_migration.h" +#include "xe_sriov_packet.h" #include "xe_sriov_pf_control.h" #include "xe_sriov_printk.h" @@ -122,6 +124,30 @@ int xe_sriov_pf_control_reset_vf(struct xe_device *xe, unsigned int vfid) } /** + * xe_sriov_pf_control_wait_flr() - Wait for a VF reset (FLR) to complete. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_control_wait_flr(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_control_wait_flr(gt, vfid); + result = result ? -EUCLEAN : err; + } + + return result; +} + +/** * xe_sriov_pf_control_sync_flr() - Synchronize a VF FLR between all GTs. * @xe: the &xe_device * @vfid: the VF identifier @@ -149,3 +175,105 @@ int xe_sriov_pf_control_sync_flr(struct xe_device *xe, unsigned int vfid) return 0; } + +/** + * xe_sriov_pf_control_trigger_save_vf() - Start VF migration data SAVE sequence on all GTs. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_control_trigger_save_vf(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt *gt; + unsigned int id; + int ret; + + ret = xe_sriov_packet_save_init(xe, vfid); + if (ret) + return ret; + + for_each_gt(gt, xe, id) { + xe_gt_sriov_pf_migration_save_init(gt, vfid); + + ret = xe_gt_sriov_pf_control_trigger_save_vf(gt, vfid); + if (ret) + return ret; + } + + return 0; +} + +/** + * xe_sriov_pf_control_finish_save_vf() - Complete VF migration data SAVE sequence on all GTs. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_control_finish_save_vf(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt *gt; + unsigned int id; + int ret; + + for_each_gt(gt, xe, id) { + ret = xe_gt_sriov_pf_control_finish_save_vf(gt, vfid); + if (ret) + break; + } + + return ret; +} + +/** + * xe_sriov_pf_control_trigger_restore_vf() - Start VF migration data RESTORE sequence on all GTs. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_control_trigger_restore_vf(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt *gt; + unsigned int id; + int ret; + + for_each_gt(gt, xe, id) { + ret = xe_gt_sriov_pf_control_trigger_restore_vf(gt, vfid); + if (ret) + return ret; + } + + return ret; +} + +/** + * xe_sriov_pf_control_finish_restore_vf() - Complete VF migration data RESTORE sequence on all GTs. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_control_finish_restore_vf(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt *gt; + unsigned int id; + int ret; + + for_each_gt(gt, xe, id) { + ret = xe_gt_sriov_pf_control_finish_restore_vf(gt, vfid); + if (ret) + break; + } + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_control.h b/drivers/gpu/drm/xe/xe_sriov_pf_control.h index 2d52d0ac1b28..ef9f219b2109 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_control.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf_control.h @@ -12,6 +12,11 @@ int xe_sriov_pf_control_pause_vf(struct xe_device *xe, unsigned int vfid); int xe_sriov_pf_control_resume_vf(struct xe_device *xe, unsigned int vfid); int xe_sriov_pf_control_stop_vf(struct xe_device *xe, unsigned int vfid); int xe_sriov_pf_control_reset_vf(struct xe_device *xe, unsigned int vfid); +int xe_sriov_pf_control_wait_flr(struct xe_device *xe, unsigned int vfid); int xe_sriov_pf_control_sync_flr(struct xe_device *xe, unsigned int vfid); +int xe_sriov_pf_control_trigger_save_vf(struct xe_device *xe, unsigned int vfid); +int xe_sriov_pf_control_finish_save_vf(struct xe_device *xe, unsigned int vfid); +int xe_sriov_pf_control_trigger_restore_vf(struct xe_device *xe, unsigned int vfid); +int xe_sriov_pf_control_finish_restore_vf(struct xe_device *xe, unsigned int vfid); #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c index a81aa05c5532..bad751217e1e 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c @@ -13,6 +13,7 @@ #include "xe_sriov_pf_control.h" #include "xe_sriov_pf_debugfs.h" #include "xe_sriov_pf_helpers.h" +#include "xe_sriov_pf_migration.h" #include "xe_sriov_pf_provision.h" #include "xe_sriov_pf_service.h" #include "xe_sriov_printk.h" @@ -98,10 +99,40 @@ static inline int xe_sriov_pf_restore_auto_provisioning(struct xe_device *xe) DEFINE_SRIOV_ATTRIBUTE(restore_auto_provisioning); +static int lockdown_vfs_enabling_open(struct inode *inode, struct file *file) +{ + struct dentry *dent = file_dentry(file); + struct xe_device *xe = extract_xe(dent); + ssize_t ret; + + ret = xe_sriov_pf_lockdown(xe); + if (ret < 0) + return ret; + + file->private_data = xe; + return nonseekable_open(inode, file); +} + +static int lockdown_vfs_enabling_release(struct inode *inode, struct file *file) +{ + struct xe_device *xe = file->private_data; + + xe_sriov_pf_end_lockdown(xe); + return 0; +} + +static const struct file_operations lockdown_vfs_enabling_fops = { + .owner = THIS_MODULE, + .open = lockdown_vfs_enabling_open, + .release = lockdown_vfs_enabling_release, +}; + static void pf_populate_root(struct xe_device *xe, struct dentry *dent) { debugfs_create_file("restore_auto_provisioning", 0200, dent, xe, &restore_auto_provisioning_fops); + debugfs_create_file("lockdown_vfs_enabling", 0400, dent, xe, + &lockdown_vfs_enabling_fops); } static int simple_show(struct seq_file *m, void *data) @@ -132,15 +163,36 @@ static void pf_populate_pf(struct xe_device *xe, struct dentry *pfdent) * /sys/kernel/debug/dri/BDF/ * ├── sriov * │ ├── vf1 + * │ │ ├── migration_data * │ │ ├── pause * │ │ ├── reset * │ │ ├── resume * │ │ ├── stop + * │ │ ├── save + * │ │ ├── restore * │ │ : * │ ├── vf2 * │ │ ├── ... */ +static int from_file_read_to_vf_call(struct seq_file *s, + int (*call)(struct xe_device *, unsigned int)) +{ + struct dentry *dent = file_dentry(s->file)->d_parent; + struct xe_device *xe = extract_xe(dent); + unsigned int vfid = extract_vfid(dent); + int ret; + + xe_pm_runtime_get(xe); + ret = call(xe, vfid); + xe_pm_runtime_put(xe); + + if (ret < 0) + return ret; + + return 0; +} + static ssize_t from_file_write_to_vf_call(struct file *file, const char __user *userbuf, size_t count, loff_t *ppos, int (*call)(struct xe_device *, unsigned int)) @@ -179,10 +231,85 @@ static ssize_t OP##_write(struct file *file, const char __user *userbuf, \ } \ DEFINE_SHOW_STORE_ATTRIBUTE(OP) +#define DEFINE_VF_CONTROL_ATTRIBUTE_RW(OP) \ +static int OP##_show(struct seq_file *s, void *unused) \ +{ \ + return from_file_read_to_vf_call(s, \ + xe_sriov_pf_control_finish_##OP); \ +} \ +static ssize_t OP##_write(struct file *file, const char __user *userbuf, \ + size_t count, loff_t *ppos) \ +{ \ + return from_file_write_to_vf_call(file, userbuf, count, ppos, \ + xe_sriov_pf_control_trigger_##OP); \ +} \ +DEFINE_SHOW_STORE_ATTRIBUTE(OP) + DEFINE_VF_CONTROL_ATTRIBUTE(pause_vf); DEFINE_VF_CONTROL_ATTRIBUTE(resume_vf); DEFINE_VF_CONTROL_ATTRIBUTE(stop_vf); DEFINE_VF_CONTROL_ATTRIBUTE(reset_vf); +DEFINE_VF_CONTROL_ATTRIBUTE_RW(save_vf); +DEFINE_VF_CONTROL_ATTRIBUTE_RW(restore_vf); + +static ssize_t data_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) +{ + struct dentry *dent = file_dentry(file)->d_parent; + struct xe_device *xe = extract_xe(dent); + unsigned int vfid = extract_vfid(dent); + + if (*pos) + return -ESPIPE; + + return xe_sriov_pf_migration_write(xe, vfid, buf, count); +} + +static ssize_t data_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) +{ + struct dentry *dent = file_dentry(file)->d_parent; + struct xe_device *xe = extract_xe(dent); + unsigned int vfid = extract_vfid(dent); + + if (*ppos) + return -ESPIPE; + + return xe_sriov_pf_migration_read(xe, vfid, buf, count); +} + +static const struct file_operations data_vf_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = data_write, + .read = data_read, + .llseek = default_llseek, +}; + +static ssize_t size_read(struct file *file, char __user *ubuf, size_t count, loff_t *ppos) +{ + struct dentry *dent = file_dentry(file)->d_parent; + struct xe_device *xe = extract_xe(dent); + unsigned int vfid = extract_vfid(dent); + char buf[21]; + ssize_t ret; + int len; + + xe_pm_runtime_get(xe); + ret = xe_sriov_pf_migration_size(xe, vfid); + xe_pm_runtime_put(xe); + if (ret < 0) + return ret; + + len = scnprintf(buf, sizeof(buf), "%zd\n", ret); + + return simple_read_from_buffer(ubuf, count, ppos, buf, len); +} + +static const struct file_operations size_vf_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = size_read, + .llseek = default_llseek, +}; static void pf_populate_vf(struct xe_device *xe, struct dentry *vfdent) { @@ -190,6 +317,10 @@ static void pf_populate_vf(struct xe_device *xe, struct dentry *vfdent) debugfs_create_file("resume", 0200, vfdent, xe, &resume_vf_fops); debugfs_create_file("stop", 0200, vfdent, xe, &stop_vf_fops); debugfs_create_file("reset", 0200, vfdent, xe, &reset_vf_fops); + debugfs_create_file("save", 0600, vfdent, xe, &save_vf_fops); + debugfs_create_file("restore", 0600, vfdent, xe, &restore_vf_fops); + debugfs_create_file("migration_data", 0600, vfdent, xe, &data_vf_fops); + debugfs_create_file("migration_size", 0400, vfdent, xe, &size_vf_fops); } static void pf_populate_with_tiles(struct xe_device *xe, struct dentry *dent, unsigned int vfid) diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h index 4a4340fb633a..9054fdc34597 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h @@ -48,10 +48,26 @@ static inline unsigned int xe_sriov_pf_num_vfs(const struct xe_device *xe) return pci_num_vf(to_pci_dev(xe->drm.dev)); } +/** + * xe_sriov_pf_admin_only() - Check if PF is mainly used for VFs administration. + * @xe: the PF &xe_device + * + * Return: True if PF is mainly used for VFs administration. + */ +static inline bool xe_sriov_pf_admin_only(const struct xe_device *xe) +{ + return !xe->info.probe_display; +} + static inline struct mutex *xe_sriov_pf_master_mutex(struct xe_device *xe) { xe_assert(xe, IS_SRIOV_PF(xe)); return &xe->sriov.pf.master_lock; } +int xe_sriov_pf_arm_guard(struct xe_device *xe, struct xe_guard *guard, + bool write, void *who); +void xe_sriov_pf_disarm_guard(struct xe_device *xe, struct xe_guard *guard, + bool write, void *who); + #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c new file mode 100644 index 000000000000..21b06ce6830a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c @@ -0,0 +1,342 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <drm/drm_managed.h> + +#include "xe_device.h" +#include "xe_gt_sriov_pf_control.h" +#include "xe_gt_sriov_pf_migration.h" +#include "xe_pm.h" +#include "xe_sriov.h" +#include "xe_sriov_packet.h" +#include "xe_sriov_packet_types.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_pf_migration.h" +#include "xe_sriov_printk.h" + +static struct xe_sriov_migration_state *pf_pick_migration(struct xe_device *xe, unsigned int vfid) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, vfid <= xe_sriov_pf_get_totalvfs(xe)); + + return &xe->sriov.pf.vfs[vfid].migration; +} + +/** + * xe_sriov_pf_migration_waitqueue() - Get waitqueue for migration. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * Return: pointer to the migration waitqueue. + */ +wait_queue_head_t *xe_sriov_pf_migration_waitqueue(struct xe_device *xe, unsigned int vfid) +{ + return &pf_pick_migration(xe, vfid)->wq; +} + +/** + * xe_sriov_pf_migration_supported() - Check if SR-IOV VF migration is supported by the device + * @xe: the &xe_device + * + * Return: true if migration is supported, false otherwise + */ +bool xe_sriov_pf_migration_supported(struct xe_device *xe) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + + return xe->sriov.pf.migration.supported; +} + +static bool pf_check_migration_support(struct xe_device *xe) +{ + /* XXX: for now this is for feature enabling only */ + return IS_ENABLED(CONFIG_DRM_XE_DEBUG); +} + +static void pf_migration_cleanup(void *arg) +{ + struct xe_sriov_migration_state *migration = arg; + + xe_sriov_packet_free(migration->pending); + xe_sriov_packet_free(migration->trailer); + xe_sriov_packet_free(migration->descriptor); +} + +/** + * xe_sriov_pf_migration_init() - Initialize support for SR-IOV VF migration. + * @xe: the &xe_device + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_migration_init(struct xe_device *xe) +{ + unsigned int n, totalvfs; + int err; + + xe_assert(xe, IS_SRIOV_PF(xe)); + + xe->sriov.pf.migration.supported = pf_check_migration_support(xe); + if (!xe_sriov_pf_migration_supported(xe)) + return 0; + + totalvfs = xe_sriov_pf_get_totalvfs(xe); + for (n = 1; n <= totalvfs; n++) { + struct xe_sriov_migration_state *migration = pf_pick_migration(xe, n); + + err = drmm_mutex_init(&xe->drm, &migration->lock); + if (err) + return err; + + init_waitqueue_head(&migration->wq); + + err = devm_add_action_or_reset(xe->drm.dev, pf_migration_cleanup, migration); + if (err) + return err; + } + + return 0; +} + +static bool pf_migration_data_ready(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt *gt; + u8 gt_id; + + for_each_gt(gt, xe, gt_id) { + if (xe_gt_sriov_pf_control_check_save_failed(gt, vfid) || + xe_gt_sriov_pf_control_check_save_data_done(gt, vfid) || + !xe_gt_sriov_pf_migration_ring_empty(gt, vfid)) + return true; + } + + return false; +} + +static struct xe_sriov_packet * +pf_migration_consume(struct xe_device *xe, unsigned int vfid) +{ + struct xe_sriov_packet *data; + bool more_data = false; + struct xe_gt *gt; + u8 gt_id; + + for_each_gt(gt, xe, gt_id) { + data = xe_gt_sriov_pf_migration_save_consume(gt, vfid); + if (data && PTR_ERR(data) != EAGAIN) + return data; + if (PTR_ERR(data) == -EAGAIN) + more_data = true; + } + + if (!more_data) + return NULL; + + return ERR_PTR(-EAGAIN); +} + +/** + * xe_sriov_pf_migration_save_consume() - Consume a VF migration data packet from the device. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * Called by the save migration data consumer (userspace) when + * processing migration data. + * If there is no migration data to process, wait until more data is available. + * + * Return: Pointer to &xe_sriov_packet on success, + * NULL if ring is empty and no more migration data is expected, + * ERR_PTR value in case of error. + * + * Return: 0 on success or a negative error code on failure. + */ +struct xe_sriov_packet * +xe_sriov_pf_migration_save_consume(struct xe_device *xe, unsigned int vfid) +{ + struct xe_sriov_migration_state *migration = pf_pick_migration(xe, vfid); + struct xe_sriov_packet *data; + int ret; + + xe_assert(xe, IS_SRIOV_PF(xe)); + + for (;;) { + data = pf_migration_consume(xe, vfid); + if (PTR_ERR(data) != -EAGAIN) + break; + + ret = wait_event_interruptible(migration->wq, + pf_migration_data_ready(xe, vfid)); + if (ret) + return ERR_PTR(ret); + } + + return data; +} + +static int pf_handle_descriptor(struct xe_device *xe, unsigned int vfid, + struct xe_sriov_packet *data) +{ + int ret; + + if (data->hdr.tile_id != 0 || data->hdr.gt_id != 0) + return -EINVAL; + + ret = xe_sriov_packet_process_descriptor(xe, vfid, data); + if (ret) + return ret; + + xe_sriov_packet_free(data); + + return 0; +} + +static int pf_handle_trailer(struct xe_device *xe, unsigned int vfid, + struct xe_sriov_packet *data) +{ + struct xe_gt *gt; + u8 gt_id; + + if (data->hdr.tile_id != 0 || data->hdr.gt_id != 0) + return -EINVAL; + if (data->hdr.offset != 0 || data->hdr.size != 0 || data->buff || data->bo) + return -EINVAL; + + xe_sriov_packet_free(data); + + for_each_gt(gt, xe, gt_id) + xe_gt_sriov_pf_control_restore_data_done(gt, vfid); + + return 0; +} + +/** + * xe_sriov_pf_migration_restore_produce() - Produce a VF migration data packet to the device. + * @xe: the &xe_device + * @vfid: the VF identifier + * @data: Pointer to &xe_sriov_packet + * + * Called by the restore migration data producer (userspace) when processing + * migration data. + * If the underlying data structure is full, wait until there is space. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_migration_restore_produce(struct xe_device *xe, unsigned int vfid, + struct xe_sriov_packet *data) +{ + struct xe_gt *gt; + + xe_assert(xe, IS_SRIOV_PF(xe)); + + if (data->hdr.type == XE_SRIOV_PACKET_TYPE_DESCRIPTOR) + return pf_handle_descriptor(xe, vfid, data); + if (data->hdr.type == XE_SRIOV_PACKET_TYPE_TRAILER) + return pf_handle_trailer(xe, vfid, data); + + gt = xe_device_get_gt(xe, data->hdr.gt_id); + if (!gt || data->hdr.tile_id != gt->tile->id || data->hdr.type == 0) { + xe_sriov_err_ratelimited(xe, "Received invalid restore packet for VF%u (type:%u, tile:%u, GT:%u)\n", + vfid, data->hdr.type, data->hdr.tile_id, data->hdr.gt_id); + return -EINVAL; + } + + return xe_gt_sriov_pf_migration_restore_produce(gt, vfid, data); +} + +/** + * xe_sriov_pf_migration_read() - Read migration data from the device. + * @xe: the &xe_device + * @vfid: the VF identifier + * @buf: start address of userspace buffer + * @len: requested read size from userspace + * + * Return: number of bytes that has been successfully read, + * 0 if no more migration data is available, + * -errno on failure. + */ +ssize_t xe_sriov_pf_migration_read(struct xe_device *xe, unsigned int vfid, + char __user *buf, size_t len) +{ + struct xe_sriov_migration_state *migration = pf_pick_migration(xe, vfid); + ssize_t ret, consumed = 0; + + xe_assert(xe, IS_SRIOV_PF(xe)); + + scoped_cond_guard(mutex_intr, return -EINTR, &migration->lock) { + while (consumed < len) { + ret = xe_sriov_packet_read_single(xe, vfid, buf, len - consumed); + if (ret == -ENODATA) + break; + if (ret < 0) + return ret; + + consumed += ret; + buf += ret; + } + } + + return consumed; +} + +/** + * xe_sriov_pf_migration_write() - Write migration data to the device. + * @xe: the &xe_device + * @vfid: the VF identifier + * @buf: start address of userspace buffer + * @len: requested write size from userspace + * + * Return: number of bytes that has been successfully written, + * -errno on failure. + */ +ssize_t xe_sriov_pf_migration_write(struct xe_device *xe, unsigned int vfid, + const char __user *buf, size_t len) +{ + struct xe_sriov_migration_state *migration = pf_pick_migration(xe, vfid); + ssize_t ret, produced = 0; + + xe_assert(xe, IS_SRIOV_PF(xe)); + + scoped_cond_guard(mutex_intr, return -EINTR, &migration->lock) { + while (produced < len) { + ret = xe_sriov_packet_write_single(xe, vfid, buf, len - produced); + if (ret < 0) + return ret; + + produced += ret; + buf += ret; + } + } + + return produced; +} + +/** + * xe_sriov_pf_migration_size() - Total size of migration data from all components within a device + * @xe: the &xe_device + * @vfid: the VF identifier (can't be 0) + * + * This function is for PF only. + * + * Return: total migration data size in bytes or a negative error code on failure. + */ +ssize_t xe_sriov_pf_migration_size(struct xe_device *xe, unsigned int vfid) +{ + size_t size = 0; + struct xe_gt *gt; + ssize_t ret; + u8 gt_id; + + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, vfid); + + for_each_gt(gt, xe, gt_id) { + ret = xe_gt_sriov_pf_migration_size(gt, vfid); + if (ret < 0) + return ret; + + size += ret; + } + + return size; +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_migration.h b/drivers/gpu/drm/xe/xe_sriov_pf_migration.h new file mode 100644 index 000000000000..b806298a0bb6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_migration.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_MIGRATION_H_ +#define _XE_SRIOV_PF_MIGRATION_H_ + +#include <linux/types.h> +#include <linux/wait.h> + +struct xe_device; +struct xe_sriov_packet; + +int xe_sriov_pf_migration_init(struct xe_device *xe); +bool xe_sriov_pf_migration_supported(struct xe_device *xe); +int xe_sriov_pf_migration_restore_produce(struct xe_device *xe, unsigned int vfid, + struct xe_sriov_packet *data); +struct xe_sriov_packet * +xe_sriov_pf_migration_save_consume(struct xe_device *xe, unsigned int vfid); +ssize_t xe_sriov_pf_migration_size(struct xe_device *xe, unsigned int vfid); +wait_queue_head_t *xe_sriov_pf_migration_waitqueue(struct xe_device *xe, unsigned int vfid); + +ssize_t xe_sriov_pf_migration_read(struct xe_device *xe, unsigned int vfid, + char __user *buf, size_t len); +ssize_t xe_sriov_pf_migration_write(struct xe_device *xe, unsigned int vfid, + const char __user *buf, size_t len); + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_migration_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_migration_types.h new file mode 100644 index 000000000000..363d673ee1dd --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_migration_types.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_MIGRATION_TYPES_H_ +#define _XE_SRIOV_PF_MIGRATION_TYPES_H_ + +#include <linux/types.h> +#include <linux/mutex_types.h> +#include <linux/wait.h> + +/** + * struct xe_sriov_pf_migration - Xe device level VF migration data + */ +struct xe_sriov_pf_migration { + /** @supported: indicates whether VF migration feature is supported */ + bool supported; +}; + +/** + * struct xe_sriov_migration_state - Per VF device-level migration related data + */ +struct xe_sriov_migration_state { + /** @wq: waitqueue used to avoid busy-waiting for snapshot production/consumption */ + wait_queue_head_t wq; + /** @lock: Mutex protecting the migration data */ + struct mutex lock; + /** @pending: currently processed data packet of VF resource */ + struct xe_sriov_packet *pending; + /** @trailer: data packet used to indicate the end of stream */ + struct xe_sriov_packet *trailer; + /** @descriptor: data packet containing the metadata describing the device */ + struct xe_sriov_packet *descriptor; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision.c b/drivers/gpu/drm/xe/xe_sriov_pf_provision.c index 663fb0c045e9..01470c42e8a7 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_provision.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision.c @@ -6,6 +6,7 @@ #include "xe_assert.h" #include "xe_device.h" #include "xe_gt_sriov_pf_config.h" +#include "xe_gt_sriov_pf_policy.h" #include "xe_sriov.h" #include "xe_sriov_pf_helpers.h" #include "xe_sriov_pf_provision.h" @@ -152,3 +153,286 @@ int xe_sriov_pf_provision_set_mode(struct xe_device *xe, enum xe_sriov_provision xe->sriov.pf.provision.mode = mode; return 0; } + +/** + * xe_sriov_pf_provision_bulk_apply_eq() - Change execution quantum for all VFs and PF. + * @xe: the PF &xe_device + * @eq: execution quantum in [ms] to set + * + * Change execution quantum (EQ) provisioning on all tiles/GTs. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_bulk_apply_eq(struct xe_device *xe, u32 eq) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + guard(mutex)(xe_sriov_pf_master_mutex(xe)); + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_config_bulk_set_exec_quantum_locked(gt, eq); + result = result ?: err; + } + + return result; +} + +/** + * xe_sriov_pf_provision_apply_vf_eq() - Change VF's execution quantum. + * @xe: the PF &xe_device + * @vfid: the VF identifier + * @eq: execution quantum in [ms] to set + * + * Change VF's execution quantum (EQ) provisioning on all tiles/GTs. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_apply_vf_eq(struct xe_device *xe, unsigned int vfid, u32 eq) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + guard(mutex)(xe_sriov_pf_master_mutex(xe)); + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_config_set_exec_quantum_locked(gt, vfid, eq); + result = result ?: err; + } + + return result; +} + +static int pf_report_unclean(struct xe_gt *gt, unsigned int vfid, + const char *what, u32 found, u32 expected) +{ + char name[8]; + + xe_sriov_dbg(gt_to_xe(gt), "%s on GT%u has %s=%u (expected %u)\n", + xe_sriov_function_name(vfid, name, sizeof(name)), + gt->info.id, what, found, expected); + return -EUCLEAN; +} + +/** + * xe_sriov_pf_provision_query_vf_eq() - Query VF's execution quantum. + * @xe: the PF &xe_device + * @vfid: the VF identifier + * @eq: placeholder for the returned execution quantum in [ms] + * + * Query VF's execution quantum (EQ) provisioning from all tiles/GTs. + * If values across tiles/GTs are inconsistent then -EUCLEAN error will be returned. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_query_vf_eq(struct xe_device *xe, unsigned int vfid, u32 *eq) +{ + struct xe_gt *gt; + unsigned int id; + int count = 0; + u32 value; + + guard(mutex)(xe_sriov_pf_master_mutex(xe)); + + for_each_gt(gt, xe, id) { + value = xe_gt_sriov_pf_config_get_exec_quantum_locked(gt, vfid); + if (!count++) + *eq = value; + else if (value != *eq) + return pf_report_unclean(gt, vfid, "EQ", value, *eq); + } + + return !count ? -ENODATA : 0; +} + +/** + * xe_sriov_pf_provision_bulk_apply_pt() - Change preemption timeout for all VFs and PF. + * @xe: the PF &xe_device + * @pt: preemption timeout in [us] to set + * + * Change preemption timeout (PT) provisioning on all tiles/GTs. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_bulk_apply_pt(struct xe_device *xe, u32 pt) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + guard(mutex)(xe_sriov_pf_master_mutex(xe)); + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_config_bulk_set_preempt_timeout_locked(gt, pt); + result = result ?: err; + } + + return result; +} + +/** + * xe_sriov_pf_provision_apply_vf_pt() - Change VF's preemption timeout. + * @xe: the PF &xe_device + * @vfid: the VF identifier + * @pt: preemption timeout in [us] to set + * + * Change VF's preemption timeout (PT) provisioning on all tiles/GTs. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_apply_vf_pt(struct xe_device *xe, unsigned int vfid, u32 pt) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + guard(mutex)(xe_sriov_pf_master_mutex(xe)); + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_config_set_preempt_timeout_locked(gt, vfid, pt); + result = result ?: err; + } + + return result; +} + +/** + * xe_sriov_pf_provision_query_vf_pt() - Query VF's preemption timeout. + * @xe: the PF &xe_device + * @vfid: the VF identifier + * @pt: placeholder for the returned preemption timeout in [us] + * + * Query VF's preemption timeout (PT) provisioning from all tiles/GTs. + * If values across tiles/GTs are inconsistent then -EUCLEAN error will be returned. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_query_vf_pt(struct xe_device *xe, unsigned int vfid, u32 *pt) +{ + struct xe_gt *gt; + unsigned int id; + int count = 0; + u32 value; + + guard(mutex)(xe_sriov_pf_master_mutex(xe)); + + for_each_gt(gt, xe, id) { + value = xe_gt_sriov_pf_config_get_preempt_timeout_locked(gt, vfid); + if (!count++) + *pt = value; + else if (value != *pt) + return pf_report_unclean(gt, vfid, "PT", value, *pt); + } + + return !count ? -ENODATA : 0; +} + +/** + * xe_sriov_pf_provision_bulk_apply_priority() - Change scheduling priority of all VFs and PF. + * @xe: the PF &xe_device + * @prio: scheduling priority to set + * + * Change the scheduling priority provisioning on all tiles/GTs. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_bulk_apply_priority(struct xe_device *xe, u32 prio) +{ + bool sched_if_idle; + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + /* + * Currently, priority changes that involves VFs are only allowed using + * the 'sched_if_idle' policy KLV, so only LOW and NORMAL are supported. + */ + xe_assert(xe, prio < GUC_SCHED_PRIORITY_HIGH); + sched_if_idle = prio == GUC_SCHED_PRIORITY_NORMAL; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_policy_set_sched_if_idle(gt, sched_if_idle); + result = result ?: err; + } + + return result; +} + +/** + * xe_sriov_pf_provision_apply_vf_priority() - Change VF's scheduling priority. + * @xe: the PF &xe_device + * @vfid: the VF identifier + * @prio: scheduling priority to set + * + * Change VF's scheduling priority provisioning on all tiles/GTs. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_apply_vf_priority(struct xe_device *xe, unsigned int vfid, u32 prio) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_config_set_sched_priority(gt, vfid, prio); + result = result ?: err; + } + + return result; +} + +/** + * xe_sriov_pf_provision_query_vf_priority() - Query VF's scheduling priority. + * @xe: the PF &xe_device + * @vfid: the VF identifier + * @prio: placeholder for the returned scheduling priority + * + * Query VF's scheduling priority provisioning from all tiles/GTs. + * If values across tiles/GTs are inconsistent then -EUCLEAN error will be returned. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_query_vf_priority(struct xe_device *xe, unsigned int vfid, u32 *prio) +{ + struct xe_gt *gt; + unsigned int id; + int count = 0; + u32 value; + + for_each_gt(gt, xe, id) { + value = xe_gt_sriov_pf_config_get_sched_priority(gt, vfid); + if (!count++) + *prio = value; + else if (value != *prio) + return pf_report_unclean(gt, vfid, "priority", value, *prio); + } + + return !count ? -ENODATA : 0; +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision.h b/drivers/gpu/drm/xe/xe_sriov_pf_provision.h index cf3657a32e90..bccf23d51396 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_provision.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision.h @@ -6,10 +6,24 @@ #ifndef _XE_SRIOV_PF_PROVISION_H_ #define _XE_SRIOV_PF_PROVISION_H_ +#include <linux/types.h> + #include "xe_sriov_pf_provision_types.h" struct xe_device; +int xe_sriov_pf_provision_bulk_apply_eq(struct xe_device *xe, u32 eq); +int xe_sriov_pf_provision_apply_vf_eq(struct xe_device *xe, unsigned int vfid, u32 eq); +int xe_sriov_pf_provision_query_vf_eq(struct xe_device *xe, unsigned int vfid, u32 *eq); + +int xe_sriov_pf_provision_bulk_apply_pt(struct xe_device *xe, u32 pt); +int xe_sriov_pf_provision_apply_vf_pt(struct xe_device *xe, unsigned int vfid, u32 pt); +int xe_sriov_pf_provision_query_vf_pt(struct xe_device *xe, unsigned int vfid, u32 *pt); + +int xe_sriov_pf_provision_bulk_apply_priority(struct xe_device *xe, u32 prio); +int xe_sriov_pf_provision_apply_vf_priority(struct xe_device *xe, unsigned int vfid, u32 prio); +int xe_sriov_pf_provision_query_vf_priority(struct xe_device *xe, unsigned int vfid, u32 *prio); + int xe_sriov_pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs); int xe_sriov_pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs); diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c new file mode 100644 index 000000000000..c0b767ac735c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c @@ -0,0 +1,647 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/kobject.h> +#include <linux/sysfs.h> + +#include <drm/drm_managed.h> + +#include "xe_assert.h" +#include "xe_pci_sriov.h" +#include "xe_pm.h" +#include "xe_sriov.h" +#include "xe_sriov_pf.h" +#include "xe_sriov_pf_control.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_pf_provision.h" +#include "xe_sriov_pf_sysfs.h" +#include "xe_sriov_printk.h" + +static int emit_choice(char *buf, int choice, const char * const *array, size_t size) +{ + int pos = 0; + int n; + + for (n = 0; n < size; n++) { + pos += sysfs_emit_at(buf, pos, "%s%s%s%s", + n ? " " : "", + n == choice ? "[" : "", + array[n], + n == choice ? "]" : ""); + } + pos += sysfs_emit_at(buf, pos, "\n"); + + return pos; +} + +/* + * /sys/bus/pci/drivers/xe/BDF/ + * : + * ├── sriov_admin/ + * ├── ... + * ├── .bulk_profile + * │ ├── exec_quantum_ms + * │ ├── preempt_timeout_us + * │ └── sched_priority + * ├── pf/ + * │ ├── ... + * │ ├── device -> ../../../BDF + * │ └── profile + * │ ├── exec_quantum_ms + * │ ├── preempt_timeout_us + * │ └── sched_priority + * ├── vf1/ + * │ ├── ... + * │ ├── device -> ../../../BDF.1 + * │ ├── stop + * │ └── profile + * │ ├── exec_quantum_ms + * │ ├── preempt_timeout_us + * │ └── sched_priority + * ├── vf2/ + * : + * └── vfN/ + */ + +struct xe_sriov_kobj { + struct kobject base; + struct xe_device *xe; + unsigned int vfid; +}; +#define to_xe_sriov_kobj(p) container_of_const((p), struct xe_sriov_kobj, base) + +struct xe_sriov_dev_attr { + struct attribute attr; + ssize_t (*show)(struct xe_device *xe, char *buf); + ssize_t (*store)(struct xe_device *xe, const char *buf, size_t count); +}; +#define to_xe_sriov_dev_attr(p) container_of_const((p), struct xe_sriov_dev_attr, attr) + +#define XE_SRIOV_DEV_ATTR(NAME) \ +struct xe_sriov_dev_attr xe_sriov_dev_attr_##NAME = \ + __ATTR(NAME, 0644, xe_sriov_dev_attr_##NAME##_show, xe_sriov_dev_attr_##NAME##_store) + +#define XE_SRIOV_DEV_ATTR_RO(NAME) \ +struct xe_sriov_dev_attr xe_sriov_dev_attr_##NAME = \ + __ATTR(NAME, 0444, xe_sriov_dev_attr_##NAME##_show, NULL) + +#define XE_SRIOV_DEV_ATTR_WO(NAME) \ +struct xe_sriov_dev_attr xe_sriov_dev_attr_##NAME = \ + __ATTR(NAME, 0200, NULL, xe_sriov_dev_attr_##NAME##_store) + +struct xe_sriov_vf_attr { + struct attribute attr; + ssize_t (*show)(struct xe_device *xe, unsigned int vfid, char *buf); + ssize_t (*store)(struct xe_device *xe, unsigned int vfid, const char *buf, size_t count); +}; +#define to_xe_sriov_vf_attr(p) container_of_const((p), struct xe_sriov_vf_attr, attr) + +#define XE_SRIOV_VF_ATTR(NAME) \ +struct xe_sriov_vf_attr xe_sriov_vf_attr_##NAME = \ + __ATTR(NAME, 0644, xe_sriov_vf_attr_##NAME##_show, xe_sriov_vf_attr_##NAME##_store) + +#define XE_SRIOV_VF_ATTR_RO(NAME) \ +struct xe_sriov_vf_attr xe_sriov_vf_attr_##NAME = \ + __ATTR(NAME, 0444, xe_sriov_vf_attr_##NAME##_show, NULL) + +#define XE_SRIOV_VF_ATTR_WO(NAME) \ +struct xe_sriov_vf_attr xe_sriov_vf_attr_##NAME = \ + __ATTR(NAME, 0200, NULL, xe_sriov_vf_attr_##NAME##_store) + +/* device level attributes go here */ + +#define DEFINE_SIMPLE_BULK_PROVISIONING_SRIOV_DEV_ATTR_WO(NAME, ITEM, TYPE) \ + \ +static ssize_t xe_sriov_dev_attr_##NAME##_store(struct xe_device *xe, \ + const char *buf, size_t count) \ +{ \ + TYPE value; \ + int err; \ + \ + err = kstrto##TYPE(buf, 0, &value); \ + if (err) \ + return err; \ + \ + err = xe_sriov_pf_provision_bulk_apply_##ITEM(xe, value); \ + return err ?: count; \ +} \ + \ +static XE_SRIOV_DEV_ATTR_WO(NAME) + +DEFINE_SIMPLE_BULK_PROVISIONING_SRIOV_DEV_ATTR_WO(exec_quantum_ms, eq, u32); +DEFINE_SIMPLE_BULK_PROVISIONING_SRIOV_DEV_ATTR_WO(preempt_timeout_us, pt, u32); + +static const char * const sched_priority_names[] = { + [GUC_SCHED_PRIORITY_LOW] = "low", + [GUC_SCHED_PRIORITY_NORMAL] = "normal", + [GUC_SCHED_PRIORITY_HIGH] = "high", +}; + +static bool sched_priority_change_allowed(unsigned int vfid) +{ + /* As of today GuC FW allows to selectively change only the PF priority. */ + return vfid == PFID; +} + +static bool sched_priority_high_allowed(unsigned int vfid) +{ + /* As of today GuC FW allows to select 'high' priority only for the PF. */ + return vfid == PFID; +} + +static bool sched_priority_bulk_high_allowed(struct xe_device *xe) +{ + /* all VFs are equal - it's sufficient to check VF1 only */ + return sched_priority_high_allowed(VFID(1)); +} + +static ssize_t xe_sriov_dev_attr_sched_priority_store(struct xe_device *xe, + const char *buf, size_t count) +{ + size_t num_priorities = ARRAY_SIZE(sched_priority_names); + int match; + int err; + + if (!sched_priority_bulk_high_allowed(xe)) + num_priorities--; + + match = __sysfs_match_string(sched_priority_names, num_priorities, buf); + if (match < 0) + return -EINVAL; + + err = xe_sriov_pf_provision_bulk_apply_priority(xe, match); + return err ?: count; +} + +static XE_SRIOV_DEV_ATTR_WO(sched_priority); + +static struct attribute *bulk_profile_dev_attrs[] = { + &xe_sriov_dev_attr_exec_quantum_ms.attr, + &xe_sriov_dev_attr_preempt_timeout_us.attr, + &xe_sriov_dev_attr_sched_priority.attr, + NULL +}; + +static const struct attribute_group bulk_profile_dev_attr_group = { + .name = ".bulk_profile", + .attrs = bulk_profile_dev_attrs, +}; + +static const struct attribute_group *xe_sriov_dev_attr_groups[] = { + &bulk_profile_dev_attr_group, + NULL +}; + +/* and VF-level attributes go here */ + +#define DEFINE_SIMPLE_PROVISIONING_SRIOV_VF_ATTR(NAME, ITEM, TYPE, FORMAT) \ +static ssize_t xe_sriov_vf_attr_##NAME##_show(struct xe_device *xe, unsigned int vfid, \ + char *buf) \ +{ \ + TYPE value = 0; \ + int err; \ + \ + err = xe_sriov_pf_provision_query_vf_##ITEM(xe, vfid, &value); \ + if (err) \ + return err; \ + \ + return sysfs_emit(buf, FORMAT, value); \ +} \ + \ +static ssize_t xe_sriov_vf_attr_##NAME##_store(struct xe_device *xe, unsigned int vfid, \ + const char *buf, size_t count) \ +{ \ + TYPE value; \ + int err; \ + \ + err = kstrto##TYPE(buf, 0, &value); \ + if (err) \ + return err; \ + \ + err = xe_sriov_pf_provision_apply_vf_##ITEM(xe, vfid, value); \ + return err ?: count; \ +} \ + \ +static XE_SRIOV_VF_ATTR(NAME) + +DEFINE_SIMPLE_PROVISIONING_SRIOV_VF_ATTR(exec_quantum_ms, eq, u32, "%u\n"); +DEFINE_SIMPLE_PROVISIONING_SRIOV_VF_ATTR(preempt_timeout_us, pt, u32, "%u\n"); + +static ssize_t xe_sriov_vf_attr_sched_priority_show(struct xe_device *xe, unsigned int vfid, + char *buf) +{ + size_t num_priorities = ARRAY_SIZE(sched_priority_names); + u32 priority; + int err; + + err = xe_sriov_pf_provision_query_vf_priority(xe, vfid, &priority); + if (err) + return err; + + if (!sched_priority_high_allowed(vfid)) + num_priorities--; + + xe_assert(xe, priority < num_priorities); + return emit_choice(buf, priority, sched_priority_names, num_priorities); +} + +static ssize_t xe_sriov_vf_attr_sched_priority_store(struct xe_device *xe, unsigned int vfid, + const char *buf, size_t count) +{ + size_t num_priorities = ARRAY_SIZE(sched_priority_names); + int match; + int err; + + if (!sched_priority_change_allowed(vfid)) + return -EOPNOTSUPP; + + if (!sched_priority_high_allowed(vfid)) + num_priorities--; + + match = __sysfs_match_string(sched_priority_names, num_priorities, buf); + if (match < 0) + return -EINVAL; + + err = xe_sriov_pf_provision_apply_vf_priority(xe, vfid, match); + return err ?: count; +} + +static XE_SRIOV_VF_ATTR(sched_priority); + +static struct attribute *profile_vf_attrs[] = { + &xe_sriov_vf_attr_exec_quantum_ms.attr, + &xe_sriov_vf_attr_preempt_timeout_us.attr, + &xe_sriov_vf_attr_sched_priority.attr, + NULL +}; + +static umode_t profile_vf_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int index) +{ + struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); + + if (attr == &xe_sriov_vf_attr_sched_priority.attr && + !sched_priority_change_allowed(vkobj->vfid)) + return attr->mode & 0444; + + return attr->mode; +} + +static const struct attribute_group profile_vf_attr_group = { + .name = "profile", + .attrs = profile_vf_attrs, + .is_visible = profile_vf_attr_is_visible, +}; + +#define DEFINE_SIMPLE_CONTROL_SRIOV_VF_ATTR(NAME) \ + \ +static ssize_t xe_sriov_vf_attr_##NAME##_store(struct xe_device *xe, unsigned int vfid, \ + const char *buf, size_t count) \ +{ \ + bool yes; \ + int err; \ + \ + if (!vfid) \ + return -EPERM; \ + \ + err = kstrtobool(buf, &yes); \ + if (err) \ + return err; \ + if (!yes) \ + return count; \ + \ + err = xe_sriov_pf_control_##NAME##_vf(xe, vfid); \ + return err ?: count; \ +} \ + \ +static XE_SRIOV_VF_ATTR_WO(NAME) + +DEFINE_SIMPLE_CONTROL_SRIOV_VF_ATTR(stop); + +static struct attribute *control_vf_attrs[] = { + &xe_sriov_vf_attr_stop.attr, + NULL +}; + +static umode_t control_vf_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int index) +{ + struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); + + if (vkobj->vfid == PFID) + return 0; + + return attr->mode; +} + +static const struct attribute_group control_vf_attr_group = { + .attrs = control_vf_attrs, + .is_visible = control_vf_attr_is_visible, +}; + +static const struct attribute_group *xe_sriov_vf_attr_groups[] = { + &profile_vf_attr_group, + &control_vf_attr_group, + NULL +}; + +/* no user serviceable parts below */ + +static struct kobject *create_xe_sriov_kobj(struct xe_device *xe, unsigned int vfid) +{ + struct xe_sriov_kobj *vkobj; + + xe_sriov_pf_assert_vfid(xe, vfid); + + vkobj = kzalloc(sizeof(*vkobj), GFP_KERNEL); + if (!vkobj) + return NULL; + + vkobj->xe = xe; + vkobj->vfid = vfid; + return &vkobj->base; +} + +static void release_xe_sriov_kobj(struct kobject *kobj) +{ + struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); + + kfree(vkobj); +} + +static ssize_t xe_sriov_dev_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) +{ + struct xe_sriov_dev_attr *vattr = to_xe_sriov_dev_attr(attr); + struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); + struct xe_device *xe = vkobj->xe; + + if (!vattr->show) + return -EPERM; + + return vattr->show(xe, buf); +} + +static ssize_t xe_sriov_dev_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct xe_sriov_dev_attr *vattr = to_xe_sriov_dev_attr(attr); + struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); + struct xe_device *xe = vkobj->xe; + ssize_t ret; + + if (!vattr->store) + return -EPERM; + + xe_pm_runtime_get(xe); + ret = xe_sriov_pf_wait_ready(xe) ?: vattr->store(xe, buf, count); + xe_pm_runtime_put(xe); + + return ret; +} + +static ssize_t xe_sriov_vf_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) +{ + struct xe_sriov_vf_attr *vattr = to_xe_sriov_vf_attr(attr); + struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); + struct xe_device *xe = vkobj->xe; + unsigned int vfid = vkobj->vfid; + + xe_sriov_pf_assert_vfid(xe, vfid); + + if (!vattr->show) + return -EPERM; + + return vattr->show(xe, vfid, buf); +} + +static ssize_t xe_sriov_vf_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct xe_sriov_vf_attr *vattr = to_xe_sriov_vf_attr(attr); + struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); + struct xe_device *xe = vkobj->xe; + unsigned int vfid = vkobj->vfid; + ssize_t ret; + + xe_sriov_pf_assert_vfid(xe, vfid); + + if (!vattr->store) + return -EPERM; + + xe_pm_runtime_get(xe); + ret = xe_sriov_pf_wait_ready(xe) ?: vattr->store(xe, vfid, buf, count); + xe_pm_runtime_get(xe); + + return ret; +} + +static const struct sysfs_ops xe_sriov_dev_sysfs_ops = { + .show = xe_sriov_dev_attr_show, + .store = xe_sriov_dev_attr_store, +}; + +static const struct sysfs_ops xe_sriov_vf_sysfs_ops = { + .show = xe_sriov_vf_attr_show, + .store = xe_sriov_vf_attr_store, +}; + +static const struct kobj_type xe_sriov_dev_ktype = { + .release = release_xe_sriov_kobj, + .sysfs_ops = &xe_sriov_dev_sysfs_ops, + .default_groups = xe_sriov_dev_attr_groups, +}; + +static const struct kobj_type xe_sriov_vf_ktype = { + .release = release_xe_sriov_kobj, + .sysfs_ops = &xe_sriov_vf_sysfs_ops, + .default_groups = xe_sriov_vf_attr_groups, +}; + +static int pf_sysfs_error(struct xe_device *xe, int err, const char *what) +{ + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) + xe_sriov_dbg(xe, "Failed to setup sysfs %s (%pe)\n", what, ERR_PTR(err)); + return err; +} + +static void pf_sysfs_note(struct xe_device *xe, int err, const char *what) +{ + xe_sriov_dbg(xe, "Failed to setup sysfs %s (%pe)\n", what, ERR_PTR(err)); +} + +static void action_put_kobject(void *arg) +{ + struct kobject *kobj = arg; + + kobject_put(kobj); +} + +static int pf_setup_root(struct xe_device *xe) +{ + struct kobject *parent = &xe->drm.dev->kobj; + struct kobject *root; + int err; + + root = create_xe_sriov_kobj(xe, PFID); + if (!root) + return pf_sysfs_error(xe, -ENOMEM, "root obj"); + + err = devm_add_action_or_reset(xe->drm.dev, action_put_kobject, root); + if (err) + return pf_sysfs_error(xe, err, "root action"); + + err = kobject_init_and_add(root, &xe_sriov_dev_ktype, parent, "sriov_admin"); + if (err) + return pf_sysfs_error(xe, err, "root init"); + + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, !xe->sriov.pf.sysfs.root); + xe->sriov.pf.sysfs.root = root; + return 0; +} + +static int pf_setup_tree(struct xe_device *xe) +{ + unsigned int totalvfs = xe_sriov_pf_get_totalvfs(xe); + struct kobject *root, *kobj; + unsigned int n; + int err; + + xe_assert(xe, IS_SRIOV_PF(xe)); + root = xe->sriov.pf.sysfs.root; + + for (n = 0; n <= totalvfs; n++) { + kobj = create_xe_sriov_kobj(xe, VFID(n)); + if (!kobj) + return pf_sysfs_error(xe, -ENOMEM, "tree obj"); + + err = devm_add_action_or_reset(xe->drm.dev, action_put_kobject, root); + if (err) + return pf_sysfs_error(xe, err, "tree action"); + + if (n) + err = kobject_init_and_add(kobj, &xe_sriov_vf_ktype, + root, "vf%u", n); + else + err = kobject_init_and_add(kobj, &xe_sriov_vf_ktype, + root, "pf"); + if (err) + return pf_sysfs_error(xe, err, "tree init"); + + xe_assert(xe, !xe->sriov.pf.vfs[n].kobj); + xe->sriov.pf.vfs[n].kobj = kobj; + } + + return 0; +} + +static void action_rm_device_link(void *arg) +{ + struct kobject *kobj = arg; + + sysfs_remove_link(kobj, "device"); +} + +static int pf_link_pf_device(struct xe_device *xe) +{ + struct kobject *kobj = xe->sriov.pf.vfs[PFID].kobj; + int err; + + err = sysfs_create_link(kobj, &xe->drm.dev->kobj, "device"); + if (err) + return pf_sysfs_error(xe, err, "PF device link"); + + err = devm_add_action_or_reset(xe->drm.dev, action_rm_device_link, kobj); + if (err) + return pf_sysfs_error(xe, err, "PF unlink action"); + + return 0; +} + +/** + * xe_sriov_pf_sysfs_init() - Setup PF's SR-IOV sysfs tree. + * @xe: the PF &xe_device to setup sysfs + * + * This function will create additional nodes that will represent PF and VFs + * devices, each populated with SR-IOV Xe specific attributes. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_sysfs_init(struct xe_device *xe) +{ + int err; + + err = pf_setup_root(xe); + if (err) + return err; + + err = pf_setup_tree(xe); + if (err) + return err; + + err = pf_link_pf_device(xe); + if (err) + return err; + + return 0; +} + +/** + * xe_sriov_pf_sysfs_link_vfs() - Add VF's links in SR-IOV sysfs tree. + * @xe: the &xe_device where to update sysfs + * @num_vfs: number of enabled VFs to link + * + * This function is specific for the PF driver. + * + * This function will add symbolic links between VFs represented in the SR-IOV + * sysfs tree maintained by the PF and enabled VF PCI devices. + * + * The @xe_sriov_pf_sysfs_unlink_vfs() shall be used to remove those links. + */ +void xe_sriov_pf_sysfs_link_vfs(struct xe_device *xe, unsigned int num_vfs) +{ + unsigned int totalvfs = xe_sriov_pf_get_totalvfs(xe); + struct pci_dev *pf_pdev = to_pci_dev(xe->drm.dev); + struct pci_dev *vf_pdev = NULL; + unsigned int n; + int err; + + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, num_vfs <= totalvfs); + + for (n = 1; n <= num_vfs; n++) { + vf_pdev = xe_pci_sriov_get_vf_pdev(pf_pdev, VFID(n)); + if (!vf_pdev) + return pf_sysfs_note(xe, -ENOENT, "VF link"); + + err = sysfs_create_link(xe->sriov.pf.vfs[VFID(n)].kobj, + &vf_pdev->dev.kobj, "device"); + + /* must balance xe_pci_sriov_get_vf_pdev() */ + pci_dev_put(vf_pdev); + + if (err) + return pf_sysfs_note(xe, err, "VF link"); + } +} + +/** + * xe_sriov_pf_sysfs_unlink_vfs() - Remove VF's links from SR-IOV sysfs tree. + * @xe: the &xe_device where to update sysfs + * @num_vfs: number of VFs to unlink + * + * This function shall be called only on the PF. + * This function will remove "device" links added by @xe_sriov_sysfs_link_vfs(). + */ +void xe_sriov_pf_sysfs_unlink_vfs(struct xe_device *xe, unsigned int num_vfs) +{ + unsigned int n; + + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, num_vfs <= xe_sriov_pf_get_totalvfs(xe)); + + for (n = 1; n <= num_vfs; n++) + sysfs_remove_link(xe->sriov.pf.vfs[VFID(n)].kobj, "device"); +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.h b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.h new file mode 100644 index 000000000000..ae92ed1766e7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_SYSFS_H_ +#define _XE_SRIOV_PF_SYSFS_H_ + +struct xe_device; + +int xe_sriov_pf_sysfs_init(struct xe_device *xe); + +void xe_sriov_pf_sysfs_link_vfs(struct xe_device *xe, unsigned int num_vfs); +void xe_sriov_pf_sysfs_unlink_vfs(struct xe_device *xe, unsigned int num_vfs); + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_types.h index c753cd59aed2..b0253e1ae5da 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf_types.h @@ -9,15 +9,24 @@ #include <linux/mutex.h> #include <linux/types.h> +#include "xe_guard.h" +#include "xe_sriov_pf_migration_types.h" #include "xe_sriov_pf_provision_types.h" #include "xe_sriov_pf_service_types.h" +struct kobject; + /** * struct xe_sriov_metadata - per-VF device level metadata */ struct xe_sriov_metadata { + /** @kobj: kobject representing VF in PF's SR-IOV sysfs tree. */ + struct kobject *kobj; + /** @version: negotiated VF/PF ABI version */ struct xe_sriov_pf_service_version version; + /** @migration: migration state */ + struct xe_sriov_migration_state migration; }; /** @@ -33,15 +42,27 @@ struct xe_device_pf { /** @driver_max_vfs: Maximum number of VFs supported by the driver. */ u16 driver_max_vfs; + /** @guard_vfs_enabling: guards VFs enabling */ + struct xe_guard guard_vfs_enabling; + /** @master_lock: protects all VFs configurations across GTs */ struct mutex master_lock; /** @provision: device level provisioning data. */ struct xe_sriov_pf_provision provision; + /** @migration: device level migration data. */ + struct xe_sriov_pf_migration migration; + /** @service: device level service data. */ struct xe_sriov_pf_service service; + /** @sysfs: device level sysfs data. */ + struct { + /** @sysfs.root: the root kobject for all SR-IOV entries in sysfs. */ + struct kobject *root; + } sysfs; + /** @vfs: metadata for all VFs. */ struct xe_sriov_metadata *vfs; }; diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c index 39c829daa97c..284ce37ca92d 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -156,14 +156,6 @@ void xe_sriov_vf_migration_disable(struct xe_device *xe, const char *fmt, ...) static void vf_migration_init_early(struct xe_device *xe) { - /* - * TODO: Add conditions to allow specific platforms, when they're - * supported at production quality. - */ - if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG)) - return xe_sriov_vf_migration_disable(xe, - "experimental feature not available on production builds"); - if (!xe_device_has_memirq(xe)) return xe_sriov_vf_migration_disable(xe, "requires memory-based IRQ support"); diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 13af589715a7..55c5a0eb82e1 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -104,8 +104,7 @@ xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range, &vm->svm.garbage_collector.range_list); spin_unlock(&vm->svm.garbage_collector.lock); - queue_work(xe_device_get_root_tile(xe)->primary_gt->usm.pf_wq, - &vm->svm.garbage_collector.work); + queue_work(xe->usm.pf_wq, &vm->svm.garbage_collector.work); } static void xe_svm_tlb_inval_count_stats_incr(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 82872a51f098..ff74528ca0c6 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -14,7 +14,7 @@ #include <drm/drm_syncobj.h> #include <uapi/drm/xe_drm.h> -#include "xe_device_types.h" +#include "xe_device.h" #include "xe_exec_queue.h" #include "xe_macros.h" #include "xe_sched_job_types.h" @@ -113,6 +113,8 @@ static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, struct xe_sync_entry *sync, struct drm_xe_sync __user *sync_user, + struct drm_syncobj *ufence_syncobj, + u64 ufence_timeline_value, unsigned int flags) { struct drm_xe_sync sync_in; @@ -192,10 +194,15 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, if (exec) { sync->addr = sync_in.addr; } else { + sync->ufence_timeline_value = ufence_timeline_value; sync->ufence = user_fence_create(xe, sync_in.addr, sync_in.timeline_value); if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence))) return PTR_ERR(sync->ufence); + sync->ufence_chain_fence = dma_fence_chain_alloc(); + if (!sync->ufence_chain_fence) + return -ENOMEM; + sync->ufence_syncobj = ufence_syncobj; } break; @@ -239,7 +246,12 @@ void xe_sync_entry_signal(struct xe_sync_entry *sync, struct dma_fence *fence) } else if (sync->ufence) { int err; - dma_fence_get(fence); + drm_syncobj_add_point(sync->ufence_syncobj, + sync->ufence_chain_fence, + fence, sync->ufence_timeline_value); + sync->ufence_chain_fence = NULL; + + fence = drm_syncobj_fence_get(sync->ufence_syncobj); user_fence_get(sync->ufence); err = dma_fence_add_callback(fence, &sync->ufence->cb, user_fence_cb); @@ -259,7 +271,8 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync) drm_syncobj_put(sync->syncobj); dma_fence_put(sync->fence); dma_fence_chain_free(sync->chain_fence); - if (sync->ufence) + dma_fence_chain_free(sync->ufence_chain_fence); + if (!IS_ERR_OR_NULL(sync->ufence)) user_fence_put(sync->ufence); } @@ -284,51 +297,59 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync, struct dma_fence **fences = NULL; struct dma_fence_array *cf = NULL; struct dma_fence *fence; - int i, num_in_fence = 0, current_fence = 0; + int i, num_fence = 0, current_fence = 0; lockdep_assert_held(&vm->lock); - /* Count in-fences */ - for (i = 0; i < num_sync; ++i) { - if (sync[i].fence) { - ++num_in_fence; - fence = sync[i].fence; + /* Reject in fences */ + for (i = 0; i < num_sync; ++i) + if (sync[i].fence) + return ERR_PTR(-EOPNOTSUPP); + + if (q->flags & EXEC_QUEUE_FLAG_VM) { + struct xe_exec_queue *__q; + struct xe_tile *tile; + u8 id; + + for_each_tile(tile, vm->xe, id) + num_fence += (1 + XE_MAX_GT_PER_TILE); + + fences = kmalloc_array(num_fence, sizeof(*fences), + GFP_KERNEL); + if (!fences) + return ERR_PTR(-ENOMEM); + + fences[current_fence++] = + xe_exec_queue_last_fence_get(q, vm); + for_each_tlb_inval(i) + fences[current_fence++] = + xe_exec_queue_tlb_inval_last_fence_get(q, vm, i); + list_for_each_entry(__q, &q->multi_gt_list, + multi_gt_link) { + fences[current_fence++] = + xe_exec_queue_last_fence_get(__q, vm); + for_each_tlb_inval(i) + fences[current_fence++] = + xe_exec_queue_tlb_inval_last_fence_get(__q, vm, i); } - } - /* Easy case... */ - if (!num_in_fence) { - fence = xe_exec_queue_last_fence_get(q, vm); - return fence; - } + xe_assert(vm->xe, current_fence == num_fence); + cf = dma_fence_array_create(num_fence, fences, + dma_fence_context_alloc(1), + 1, false); + if (!cf) + goto err_out; - /* Create composite fence */ - fences = kmalloc_array(num_in_fence + 1, sizeof(*fences), GFP_KERNEL); - if (!fences) - return ERR_PTR(-ENOMEM); - for (i = 0; i < num_sync; ++i) { - if (sync[i].fence) { - dma_fence_get(sync[i].fence); - fences[current_fence++] = sync[i].fence; - } - } - fences[current_fence++] = xe_exec_queue_last_fence_get(q, vm); - cf = dma_fence_array_create(num_in_fence, fences, - vm->composite_fence_ctx, - vm->composite_fence_seqno++, - false); - if (!cf) { - --vm->composite_fence_seqno; - goto err_out; + return &cf->base; } - return &cf->base; + fence = xe_exec_queue_last_fence_get(q, vm); + return fence; err_out: while (current_fence) dma_fence_put(fences[--current_fence]); kfree(fences); - kfree(cf); return ERR_PTR(-ENOMEM); } diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h index 256ffc1e54dc..51f2d803e977 100644 --- a/drivers/gpu/drm/xe/xe_sync.h +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -8,6 +8,7 @@ #include "xe_sync_types.h" +struct drm_syncobj; struct xe_device; struct xe_exec_queue; struct xe_file; @@ -21,6 +22,8 @@ struct xe_vm; int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, struct xe_sync_entry *sync, struct drm_xe_sync __user *sync_user, + struct drm_syncobj *ufence_syncobj, + u64 ufence_timeline_value, unsigned int flags); int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job); diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h index 30ac3f51993b..b88f1833e28c 100644 --- a/drivers/gpu/drm/xe/xe_sync_types.h +++ b/drivers/gpu/drm/xe/xe_sync_types.h @@ -18,9 +18,12 @@ struct xe_sync_entry { struct drm_syncobj *syncobj; struct dma_fence *fence; struct dma_fence_chain *chain_fence; + struct dma_fence_chain *ufence_chain_fence; + struct drm_syncobj *ufence_syncobj; struct xe_user_fence *ufence; u64 addr; u64 timeline_value; + u64 ufence_timeline_value; u32 type; u32 flags; }; diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.c b/drivers/gpu/drm/xe/xe_tlb_inval_job.c index 492def04a559..1ae0dec2cf31 100644 --- a/drivers/gpu/drm/xe/xe_tlb_inval_job.c +++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.c @@ -12,6 +12,7 @@ #include "xe_tlb_inval_job.h" #include "xe_migrate.h" #include "xe_pm.h" +#include "xe_vm.h" /** struct xe_tlb_inval_job - TLB invalidation job */ struct xe_tlb_inval_job { @@ -21,6 +22,8 @@ struct xe_tlb_inval_job { struct xe_tlb_inval *tlb_inval; /** @q: exec queue issuing the invalidate */ struct xe_exec_queue *q; + /** @vm: VM which TLB invalidation is being issued for */ + struct xe_vm *vm; /** @refcount: ref count of this job */ struct kref refcount; /** @@ -32,8 +35,8 @@ struct xe_tlb_inval_job { u64 start; /** @end: End address to invalidate */ u64 end; - /** @asid: Address space ID to invalidate */ - u32 asid; + /** @type: GT type */ + int type; /** @fence_armed: Fence has been armed */ bool fence_armed; }; @@ -46,7 +49,7 @@ static struct dma_fence *xe_tlb_inval_job_run(struct xe_dep_job *dep_job) container_of(job->fence, typeof(*ifence), base); xe_tlb_inval_range(job->tlb_inval, ifence, job->start, - job->end, job->asid); + job->end, job->vm->usm.asid); return job->fence; } @@ -70,9 +73,10 @@ static const struct xe_dep_job_ops dep_job_ops = { * @q: exec queue issuing the invalidate * @tlb_inval: TLB invalidation client * @dep_scheduler: Dependency scheduler for job + * @vm: VM which TLB invalidation is being issued for * @start: Start address to invalidate * @end: End address to invalidate - * @asid: Address space ID to invalidate + * @type: GT type * * Create a TLB invalidation job and initialize internal fields. The caller is * responsible for releasing the creation reference. @@ -81,8 +85,8 @@ static const struct xe_dep_job_ops dep_job_ops = { */ struct xe_tlb_inval_job * xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval, - struct xe_dep_scheduler *dep_scheduler, u64 start, - u64 end, u32 asid) + struct xe_dep_scheduler *dep_scheduler, + struct xe_vm *vm, u64 start, u64 end, int type) { struct xe_tlb_inval_job *job; struct drm_sched_entity *entity = @@ -90,19 +94,24 @@ xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval, struct xe_tlb_inval_fence *ifence; int err; + xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || + type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); + job = kmalloc(sizeof(*job), GFP_KERNEL); if (!job) return ERR_PTR(-ENOMEM); job->q = q; + job->vm = vm; job->tlb_inval = tlb_inval; job->start = start; job->end = end; - job->asid = asid; job->fence_armed = false; job->dep.ops = &dep_job_ops; + job->type = type; kref_init(&job->refcount); xe_exec_queue_get(q); /* Pairs with put in xe_tlb_inval_job_destroy */ + xe_vm_get(vm); /* Pairs with put in xe_tlb_inval_job_destroy */ ifence = kmalloc(sizeof(*ifence), GFP_KERNEL); if (!ifence) { @@ -124,6 +133,7 @@ xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval, err_fence: kfree(ifence); err_job: + xe_vm_put(vm); xe_exec_queue_put(q); kfree(job); @@ -138,6 +148,7 @@ static void xe_tlb_inval_job_destroy(struct kref *ref) container_of(job->fence, typeof(*ifence), base); struct xe_exec_queue *q = job->q; struct xe_device *xe = gt_to_xe(q->gt); + struct xe_vm *vm = job->vm; if (!job->fence_armed) kfree(ifence); @@ -147,6 +158,7 @@ static void xe_tlb_inval_job_destroy(struct kref *ref) drm_sched_job_cleanup(&job->dep.drm); kfree(job); + xe_vm_put(vm); /* Pairs with get from xe_tlb_inval_job_create */ xe_exec_queue_put(q); /* Pairs with get from xe_tlb_inval_job_create */ xe_pm_runtime_put(xe); /* Pairs with get from xe_tlb_inval_job_create */ } @@ -231,6 +243,11 @@ struct dma_fence *xe_tlb_inval_job_push(struct xe_tlb_inval_job *job, dma_fence_get(&job->dep.drm.s_fence->finished); drm_sched_entity_push_job(&job->dep.drm); + /* Let the upper layers fish this out */ + xe_exec_queue_tlb_inval_last_fence_set(job->q, job->vm, + &job->dep.drm.s_fence->finished, + job->type); + xe_migrate_job_unlock(m, job->q); /* diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.h b/drivers/gpu/drm/xe/xe_tlb_inval_job.h index e63edcb26b50..4d6df1a6c6ca 100644 --- a/drivers/gpu/drm/xe/xe_tlb_inval_job.h +++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.h @@ -11,14 +11,15 @@ struct dma_fence; struct xe_dep_scheduler; struct xe_exec_queue; +struct xe_migrate; struct xe_tlb_inval; struct xe_tlb_inval_job; -struct xe_migrate; +struct xe_vm; struct xe_tlb_inval_job * xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval, struct xe_dep_scheduler *dep_scheduler, - u64 start, u64 end, u32 asid); + struct xe_vm *vm, u64 start, u64 end, int type); int xe_tlb_inval_job_alloc_dep(struct xe_tlb_inval_job *job); diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 314f42fcbcbd..79a97b086cb2 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -441,6 +441,29 @@ TRACE_EVENT(xe_eu_stall_data_read, __entry->read_size, __entry->total_size) ); +TRACE_EVENT(xe_exec_queue_reach_max_job_count, + TP_PROTO(struct xe_exec_queue *q, int max_cnt), + TP_ARGS(q, max_cnt), + + TP_STRUCT__entry(__string(dev, __dev_name_eq(q)) + __field(enum xe_engine_class, class) + __field(u32, logical_mask) + __field(u16, guc_id) + __field(int, max_cnt) + ), + + TP_fast_assign(__assign_str(dev); + __entry->class = q->class; + __entry->logical_mask = q->logical_mask; + __entry->guc_id = q->guc->id; + __entry->max_cnt = max_cnt; + ), + + TP_printk("dev=%s, job count exceeded the maximum limit (%d) per exec queue. engine_class=0x%x, logical_mask=0x%x, guc_id=%d", + __get_str(dev), __entry->max_cnt, + __entry->class, __entry->logical_mask, __entry->guc_id) +); + #endif /* This part must be outside protection */ diff --git a/drivers/gpu/drm/xe/xe_validation.h b/drivers/gpu/drm/xe/xe_validation.h index 1ef181c90434..a30e732c4d51 100644 --- a/drivers/gpu/drm/xe/xe_validation.h +++ b/drivers/gpu/drm/xe/xe_validation.h @@ -166,10 +166,10 @@ xe_validation_device_init(struct xe_validation_device *val) */ DEFINE_CLASS(xe_validation, struct xe_validation_ctx *, if (_T) xe_validation_ctx_fini(_T);, - ({_ret = xe_validation_ctx_init(_ctx, _val, _exec, _flags); - _ret ? NULL : _ctx; }), + ({*_ret = xe_validation_ctx_init(_ctx, _val, _exec, _flags); + *_ret ? NULL : _ctx; }), struct xe_validation_ctx *_ctx, struct xe_validation_device *_val, - struct drm_exec *_exec, const struct xe_val_flags _flags, int _ret); + struct drm_exec *_exec, const struct xe_val_flags _flags, int *_ret); static inline void *class_xe_validation_lock_ptr(class_xe_validation_t *_T) {return *_T; } #define class_xe_validation_is_conditional true @@ -186,7 +186,7 @@ static inline void *class_xe_validation_lock_ptr(class_xe_validation_t *_T) * exhaustive eviction. */ #define xe_validation_guard(_ctx, _val, _exec, _flags, _ret) \ - scoped_guard(xe_validation, _ctx, _val, _exec, _flags, _ret) \ + scoped_guard(xe_validation, _ctx, _val, _exec, _flags, &_ret) \ drm_exec_until_all_locked(_exec) #endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 00f3520dec38..8fb5cc6a69ec 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -27,7 +27,6 @@ #include "xe_device.h" #include "xe_drm_client.h" #include "xe_exec_queue.h" -#include "xe_gt_pagefault.h" #include "xe_migrate.h" #include "xe_pat.h" #include "xe_pm.h" @@ -755,6 +754,7 @@ struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_ma xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); xe_vma_ops_init(&vops, vm, NULL, NULL, 0); + vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT; for_each_tile(tile, vm->xe, id) { vops.pt_update_ops[id].wait_vm_bookkeep = true; vops.pt_update_ops[tile->id].q = @@ -845,6 +845,7 @@ struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); xe_vma_ops_init(&vops, vm, NULL, NULL, 0); + vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT; for_each_tile(tile, vm->xe, id) { vops.pt_update_ops[id].wait_vm_bookkeep = true; vops.pt_update_ops[tile->id].q = @@ -1458,7 +1459,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) struct xe_validation_ctx ctx; struct drm_exec exec; struct xe_vm *vm; - int err, number_tiles = 0; + int err; struct xe_tile *tile; u8 id; @@ -1619,13 +1620,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) goto err_close; } vm->q[id] = q; - number_tiles++; } } - if (number_tiles > 1) - vm->composite_fence_ctx = dma_fence_context_alloc(1); - if (xef && xe->info.has_asid) { u32 asid; @@ -1731,8 +1728,13 @@ void xe_vm_close_and_put(struct xe_vm *vm) down_write(&vm->lock); for_each_tile(tile, xe, id) { - if (vm->q[id]) + if (vm->q[id]) { + int i; + xe_exec_queue_last_fence_put(vm->q[id], vm); + for_each_tlb_inval(i) + xe_exec_queue_tlb_inval_last_fence_put(vm->q[id], vm, i); + } } up_write(&vm->lock); @@ -3102,20 +3104,31 @@ static struct dma_fence *ops_execute(struct xe_vm *vm, struct dma_fence *fence = NULL; struct dma_fence **fences = NULL; struct dma_fence_array *cf = NULL; - int number_tiles = 0, current_fence = 0, err; + int number_tiles = 0, current_fence = 0, n_fence = 0, err; u8 id; number_tiles = vm_ops_setup_tile_args(vm, vops); if (number_tiles == 0) return ERR_PTR(-ENODATA); - if (number_tiles > 1) { - fences = kmalloc_array(number_tiles, sizeof(*fences), - GFP_KERNEL); - if (!fences) { - fence = ERR_PTR(-ENOMEM); - goto err_trace; - } + if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT) { + for_each_tile(tile, vm->xe, id) + ++n_fence; + } else { + for_each_tile(tile, vm->xe, id) + n_fence += (1 + XE_MAX_GT_PER_TILE); + } + + fences = kmalloc_array(n_fence, sizeof(*fences), GFP_KERNEL); + if (!fences) { + fence = ERR_PTR(-ENOMEM); + goto err_trace; + } + + cf = dma_fence_array_alloc(n_fence); + if (!cf) { + fence = ERR_PTR(-ENOMEM); + goto err_out; } for_each_tile(tile, vm->xe, id) { @@ -3132,30 +3145,34 @@ static struct dma_fence *ops_execute(struct xe_vm *vm, trace_xe_vm_ops_execute(vops); for_each_tile(tile, vm->xe, id) { + struct xe_exec_queue *q = vops->pt_update_ops[tile->id].q; + int i; + + fence = NULL; if (!vops->pt_update_ops[id].num_ops) - continue; + goto collect_fences; fence = xe_pt_update_ops_run(tile, vops); if (IS_ERR(fence)) goto err_out; - if (fences) - fences[current_fence++] = fence; - } +collect_fences: + fences[current_fence++] = fence ?: dma_fence_get_stub(); + if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT) + continue; - if (fences) { - cf = dma_fence_array_create(number_tiles, fences, - vm->composite_fence_ctx, - vm->composite_fence_seqno++, - false); - if (!cf) { - --vm->composite_fence_seqno; - fence = ERR_PTR(-ENOMEM); - goto err_out; - } - fence = &cf->base; + xe_migrate_job_lock(tile->migrate, q); + for_each_tlb_inval(i) + fences[current_fence++] = + xe_exec_queue_tlb_inval_last_fence_get(q, vm, i); + xe_migrate_job_unlock(tile->migrate, q); } + xe_assert(vm->xe, current_fence == n_fence); + dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1), + 1, false); + fence = &cf->base; + for_each_tile(tile, vm->xe, id) { if (!vops->pt_update_ops[id].num_ops) continue; @@ -3215,7 +3232,6 @@ static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, struct dma_fence *fence) { - struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); struct xe_user_fence *ufence; struct xe_vma_op *op; int i; @@ -3236,7 +3252,6 @@ static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, if (fence) { for (i = 0; i < vops->num_syncs; i++) xe_sync_entry_signal(vops->syncs + i, fence); - xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); } } @@ -3430,19 +3445,19 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, struct xe_sync_entry *syncs, int num_syncs) { - struct dma_fence *fence; + struct dma_fence *fence = NULL; int i, err = 0; - fence = xe_sync_in_fence_get(syncs, num_syncs, - to_wait_exec_queue(vm, q), vm); - if (IS_ERR(fence)) - return PTR_ERR(fence); + if (num_syncs) { + fence = xe_sync_in_fence_get(syncs, num_syncs, + to_wait_exec_queue(vm, q), vm); + if (IS_ERR(fence)) + return PTR_ERR(fence); - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], fence); + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], fence); + } - xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, - fence); dma_fence_put(fence); return err; @@ -3633,8 +3648,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) syncs_user = u64_to_user_ptr(args->syncs); for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { + struct xe_exec_queue *__q = q ?: vm->q[0]; + err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], &syncs_user[num_syncs], + __q->ufence_syncobj, + ++__q->ufence_timeline_value, (xe_vm_in_lr_mode(vm) ? SYNC_PARSE_FLAG_LR_MODE : 0) | (!args->num_binds ? diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 830ed7b05c27..ccd6cc090309 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -221,11 +221,6 @@ struct xe_vm { #define XE_VM_FLAG_GSC BIT(8) unsigned long flags; - /** @composite_fence_ctx: context composite fence */ - u64 composite_fence_ctx; - /** @composite_fence_seqno: seqno for composite fence */ - u32 composite_fence_seqno; - /** * @lock: outer most lock, protects objects of anything attached to this * VM @@ -471,6 +466,7 @@ struct xe_vma_ops { #define XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH BIT(0) #define XE_VMA_OPS_FLAG_MADVISE BIT(1) #define XE_VMA_OPS_ARRAY_OF_BINDS BIT(2) +#define XE_VMA_OPS_FLAG_SKIP_TLB_WAIT BIT(3) u32 flags; #ifdef TEST_VM_OPS_ERROR /** @inject_error: inject error to test error handling */ diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index b62a96f8ef9e..0e10da790cc5 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -183,12 +183,17 @@ static int determine_lmem_bar_size(struct xe_device *xe, struct xe_vram_region * return 0; } -static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size) +static int get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size, u64 *poffset) { struct xe_device *xe = gt_to_xe(gt); + unsigned int fw_ref; u64 offset; u32 reg; + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return -ETIMEDOUT; + if (GRAPHICS_VER(xe) >= 20) { u64 ccs_size = tile_size / 512; u64 offset_hi, offset_lo; @@ -218,7 +223,10 @@ static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size) offset = (u64)REG_FIELD_GET(XEHP_FLAT_CCS_PTR, reg) * SZ_64K; } - return offset; + xe_force_wake_put(gt_to_fw(gt), fw_ref); + *poffset = offset; + + return 0; } /* @@ -245,7 +253,6 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size, { struct xe_device *xe = tile_to_xe(tile); struct xe_gt *gt = tile->primary_gt; - unsigned int fw_ref; u64 offset; u32 reg; @@ -265,23 +272,22 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size, return 0; } - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) - return -ETIMEDOUT; - /* actual size */ if (unlikely(xe->info.platform == XE_DG1)) { *tile_size = pci_resource_len(to_pci_dev(xe->drm.dev), LMEM_BAR); *tile_offset = 0; } else { - reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE_ADDR_RANGE(gt->info.id)); + reg = xe_mmio_read32(&tile->mmio, SG_TILE_ADDR_RANGE(tile->id)); *tile_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G; *tile_offset = (u64)REG_FIELD_GET(GENMASK(7, 1), reg) * SZ_1G; } /* minus device usage */ if (xe->info.has_flat_ccs) { - offset = get_flat_ccs_offset(gt, *tile_size); + int ret = get_flat_ccs_offset(gt, *tile_size, &offset); + + if (ret) + return ret; } else { offset = xe_mmio_read64_2x32(&tile->mmio, GSMBASE); } @@ -289,8 +295,6 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size, /* remove the tile offset so we have just the available size */ *vram_size = offset - *tile_offset; - xe_force_wake_put(gt_to_fw(gt), fw_ref); - return 0; } diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index ec638b431131..3764abca3d4f 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -679,6 +679,8 @@ static const struct xe_rtp_entry_sr engine_was[] = { }, { XE_RTP_NAME("14023061436"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), + FUNC(xe_rtp_match_first_render_or_compute), OR, + GRAPHICS_VERSION_RANGE(3003, 3005), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_CHICKEN, QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE)) }, @@ -920,6 +922,11 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(XEHP_SLICE_COMMON_ECO_CHICKEN1, FAST_CLEAR_VALIGN_FIX)) }, + { XE_RTP_NAME("15016589081"), + XE_RTP_RULES(GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) + }, }; static __maybe_unused const struct xe_rtp_entry oob_was[] = { diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h index b258e79b437a..52520e684ab1 100644 --- a/include/drm/intel/pciids.h +++ b/include/drm/intel/pciids.h @@ -861,7 +861,7 @@ MACRO__(0xE216, ## __VA_ARGS__) #define INTEL_BMG_IDS(MACRO__, ...) \ - INTEL_BMG_G21_IDS(MACRO__, __VA_ARGS__), \ + INTEL_BMG_G21_IDS(MACRO__, ## __VA_ARGS__), \ MACRO__(0xE220, ## __VA_ARGS__), \ MACRO__(0xE221, ## __VA_ARGS__), \ MACRO__(0xE222, ## __VA_ARGS__), \ diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index d016360e5ceb..5618aef462f2 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -221,7 +221,7 @@ struct ttm_device { struct list_head device_list; /** - * @alloc_flags: TTM_ALLOCATION_ flags. + * @alloc_flags: TTM_ALLOCATION_* flags. */ unsigned int alloc_flags; diff --git a/include/drm/ttm/ttm_pool.h b/include/drm/ttm/ttm_pool.h index 67c72de913bb..233581670e78 100644 --- a/include/drm/ttm/ttm_pool.h +++ b/include/drm/ttm/ttm_pool.h @@ -64,7 +64,7 @@ struct ttm_pool_type { * * @dev: the device we allocate pages for * @nid: which numa node to use - * @alloc_flags: TTM_ALLOCATION_POOL_ flags + * @alloc_flags: TTM_ALLOCATION_POOL_* flags * @caching: pools for each caching/order */ struct ttm_pool { diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h index 8ad254bc35a5..62c917fd4f7b 100644 --- a/include/uapi/drm/amdxdna_accel.h +++ b/include/uapi/drm/amdxdna_accel.h @@ -443,7 +443,9 @@ enum amdxdna_drm_get_param { DRM_AMDXDNA_QUERY_FIRMWARE_VERSION = 8, DRM_AMDXDNA_GET_POWER_MODE, DRM_AMDXDNA_QUERY_TELEMETRY, - DRM_AMDXDNA_QUERY_RESOURCE_INFO = 12, + DRM_AMDXDNA_GET_FORCE_PREEMPT_STATE, + DRM_AMDXDNA_QUERY_RESOURCE_INFO, + DRM_AMDXDNA_GET_FRAME_BOUNDARY_PREEMPT_STATE, }; /** @@ -463,6 +465,16 @@ struct amdxdna_drm_get_resource_info { }; /** + * struct amdxdna_drm_attribute_state - State of an attribute + */ +struct amdxdna_drm_attribute_state { + /** @state: enabled or disabled */ + __u8 state; + /** @pad: MBZ */ + __u8 pad[7]; +}; + +/** * struct amdxdna_drm_query_telemetry_header - Telemetry data header */ struct amdxdna_drm_query_telemetry_header { @@ -613,6 +625,8 @@ enum amdxdna_drm_set_param { DRM_AMDXDNA_SET_POWER_MODE, DRM_AMDXDNA_WRITE_AIE_MEM, DRM_AMDXDNA_WRITE_AIE_REG, + DRM_AMDXDNA_SET_FORCE_PREEMPT, + DRM_AMDXDNA_SET_FRAME_BOUNDARY_PREEMPT, }; /** diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index e8b47c9f6976..1956431bb391 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -54,32 +54,46 @@ extern "C" { * This asks the kernel to have the GPU execute a render command list. */ struct drm_panfrost_submit { - - /** Address to GPU mapping of job descriptor */ + /** + * @jc: Address to GPU mapping of job descriptor + */ __u64 jc; - - /** An optional array of sync objects to wait on before starting this job. */ + /** + * @in_syncs: An optional array of sync objects to wait on + * before starting this job. + */ __u64 in_syncs; - - /** Number of sync objects to wait on before starting this job. */ + /** + * @in_sync_count: Number of sync objects to wait on before + * starting this job. + */ __u32 in_sync_count; - - /** An optional sync object to place the completion fence in. */ + /** + * @out_sync: An optional sync object to place the completion fence in. + */ __u32 out_sync; - - /** Pointer to a u32 array of the BOs that are referenced by the job. */ + /** + * @bo_handles: Pointer to a u32 array of the BOs that are + * referenced by the job. + */ __u64 bo_handles; - - /** Number of BO handles passed in (size is that times 4). */ + /** + * @bo_handle_count: Number of BO handles passed in (size is + * that times 4). + */ __u32 bo_handle_count; - - /** A combination of PANFROST_JD_REQ_* */ + /** + * @requirements: A combination of PANFROST_JD_REQ_* + */ __u32 requirements; - - /** JM context handle. Zero if you want to use the default context. */ + /** + * @jm_ctx_handle: JM context handle. Zero if you want to use the + * default context. + */ __u32 jm_ctx_handle; - - /** Padding field. MBZ. */ + /** + * @pad: Padding field. Must be zero. + */ __u32 pad; }; @@ -92,9 +106,18 @@ struct drm_panfrost_submit { * completed. */ struct drm_panfrost_wait_bo { + /** + * @handle: Handle for the object to wait for. + */ __u32 handle; + /** + * @pad: Padding, must be zero-filled. + */ __u32 pad; - __s64 timeout_ns; /* absolute */ + /** + * @timeout_ns: absolute number of nanoseconds to wait. + */ + __s64 timeout_ns; }; /* Valid flags to pass to drm_panfrost_create_bo */ @@ -107,16 +130,26 @@ struct drm_panfrost_wait_bo { * The flags argument is a bit mask of PANFROST_BO_* flags. */ struct drm_panfrost_create_bo { + /** + * @size: size of shmem/BO area to create (bytes) + */ __u32 size; + /** + * @flags: see PANFROST_BO_* flags + */ __u32 flags; - /** Returned GEM handle for the BO. */ + /** + * @handle: Returned GEM handle for the BO. + */ __u32 handle; - /* Pad, must be zero-filled. */ + /** + * @pad: Padding, must be zero-filled. + */ __u32 pad; /** - * Returned offset for the BO in the GPU address space. This offset - * is private to the DRM fd and is valid for the lifetime of the GEM - * handle. + * @offset: Returned offset for the BO in the GPU address space. + * This offset is private to the DRM fd and is valid for the + * lifetime of the GEM handle. * * This offset value will always be nonzero, since various HW * units treat 0 specially. @@ -136,10 +169,17 @@ struct drm_panfrost_create_bo { * used in a future extension. */ struct drm_panfrost_mmap_bo { - /** Handle for the object being mapped. */ + /** + * @handle: Handle for the object being mapped. + */ __u32 handle; + /** + * @flags: currently not used (should be zero) + */ __u32 flags; - /** offset into the drm node to use for subsequent mmap call. */ + /** + * @offset: offset into the drm node to use for subsequent mmap call. + */ __u64 offset; }; @@ -196,7 +236,7 @@ struct drm_panfrost_get_param { __u64 value; }; -/** +/* * Returns the offset for the BO in the GPU address space for this DRM fd. * This is the same value returned by drm_panfrost_create_bo, if that was called * from this DRM fd. @@ -244,12 +284,14 @@ struct drm_panfrost_madvise { * struct drm_panfrost_set_label_bo - ioctl argument for labelling Panfrost BOs. */ struct drm_panfrost_set_label_bo { - /** @handle: Handle of the buffer object to label. */ + /** + * @handle: Handle of the buffer object to label. + */ __u32 handle; - - /** @pad: MBZ. */ + /** + * @pad: Must be zero. + */ __u32 pad; - /** * @label: User pointer to a NUL-terminated string * @@ -330,10 +372,13 @@ enum drm_panfrost_jm_ctx_priority { }; struct drm_panfrost_jm_ctx_create { - /** @handle: Handle of the created JM context */ + /** + * @handle: Handle of the created JM context + */ __u32 handle; - - /** @priority: Context priority (see enum drm_panfrost_jm_ctx_priority). */ + /** + * @priority: Context priority (see enum drm_panfrost_jm_ctx_priority). + */ __u32 priority; }; @@ -344,8 +389,9 @@ struct drm_panfrost_jm_ctx_destroy { * Must be a valid context handle returned by DRM_IOCTL_PANTHOR_JM_CTX_CREATE. */ __u32 handle; - - /** @pad: Padding field, MBZ. */ + /** + * @pad: Padding field, must be zero. + */ __u32 pad; }; |
