From 385a56642b9fb57d1d256dca08963a4832da352e Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Mon, 28 Apr 2025 19:00:07 -0400 Subject: [PATCH] freedreno/a6xx, turnip: Set CONSTANTRAMMODE correctly This should fix hangs when using more than 256 constants on a7xx. Fixes: 5879eaac185 ("ir3: Increase compute const size on a7xx") Part-of: (cherry picked from commit 80bcbc0e924f7e021bcca155fa12501a2d6fb467) --- .pick_status.json | 2 +- src/freedreno/computerator/a6xx.cc | 8 ++++++-- src/freedreno/vulkan/tu_shader.cc | 8 ++++++-- src/gallium/drivers/freedreno/a6xx/fd6_compute.cc | 8 ++++++-- 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index faac10f3e49..a1d38001c78 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -64,7 +64,7 @@ "description": "freedreno/a6xx, turnip: Set CONSTANTRAMMODE correctly", "nominated": true, "nomination_type": 2, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "5879eaac185ed1c167fd01aff9b91c7cbe43ab0a", "notes": null diff --git a/src/freedreno/computerator/a6xx.cc b/src/freedreno/computerator/a6xx.cc index 4a712b32d68..e0adbfd4c31 100644 --- a/src/freedreno/computerator/a6xx.cc +++ b/src/freedreno/computerator/a6xx.cc @@ -180,14 +180,18 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel) } uint32_t shared_size = MAX2(((int)v->shared_size - 1) / 1024, 1); + enum a6xx_const_ram_mode mode = + v->constlen > 256 ? CONSTLEN_512 : + (v->constlen > 192 ? CONSTLEN_256 : + (v->constlen > 128 ? CONSTLEN_192 : CONSTLEN_128)); OUT_PKT4(ring, REG_A6XX_SP_CS_CTRL_REG1, 1); OUT_RING(ring, A6XX_SP_CS_CTRL_REG1_SHARED_SIZE(shared_size) | - A6XX_SP_CS_CTRL_REG1_CONSTANTRAMMODE(CONSTLEN_256)); + A6XX_SP_CS_CTRL_REG1_CONSTANTRAMMODE(mode)); if (CHIP == A6XX && a6xx_backend->info->a6xx.has_lpac) { OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CTRL_REG1, 1); OUT_RING(ring, A6XX_HLSQ_CS_CTRL_REG1_SHARED_SIZE(1) | - A6XX_HLSQ_CS_CTRL_REG1_CONSTANTRAMMODE(CONSTLEN_256)); + A6XX_HLSQ_CS_CTRL_REG1_CONSTANTRAMMODE(mode)); } uint32_t local_invocation_id, work_group_id; diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index 7ed7d4af188..e7f5a11f543 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -1530,14 +1530,18 @@ tu6_emit_cs_config(struct tu_cs *cs, tu6_emit_xs(cs, MESA_SHADER_COMPUTE, v, pvtmem, binary_iova); uint32_t shared_size = MAX2(((int)v->shared_size - 1) / 1024, 1); + enum a6xx_const_ram_mode mode = + v->constlen > 256 ? CONSTLEN_512 : + (v->constlen > 192 ? CONSTLEN_256 : + (v->constlen > 128 ? CONSTLEN_192 : CONSTLEN_128)); tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_CTRL_REG1, 1); tu_cs_emit(cs, A6XX_SP_CS_CTRL_REG1_SHARED_SIZE(shared_size) | - A6XX_SP_CS_CTRL_REG1_CONSTANTRAMMODE(CONSTLEN_256)); + A6XX_SP_CS_CTRL_REG1_CONSTANTRAMMODE(mode)); if (CHIP == A6XX && cs->device->physical_device->info->a6xx.has_lpac) { tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CS_CTRL_REG1, 1); tu_cs_emit(cs, A6XX_HLSQ_CS_CTRL_REG1_SHARED_SIZE(shared_size) | - A6XX_HLSQ_CS_CTRL_REG1_CONSTANTRAMMODE(CONSTLEN_256)); + A6XX_HLSQ_CS_CTRL_REG1_CONSTANTRAMMODE(mode)); } uint32_t local_invocation_id = diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc b/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc index 210a9db824c..a82b1517f18 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc @@ -220,14 +220,18 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt uint32_t shared_size = MAX2(((int)(cs->v->cs.req_local_mem + info->variable_shared_mem) - 1) / 1024, 1); + enum a6xx_const_ram_mode mode = + cs->v->constlen > 256 ? CONSTLEN_512 : + (cs->v->constlen > 192 ? CONSTLEN_256 : + (cs->v->constlen > 128 ? CONSTLEN_192 : CONSTLEN_128)); OUT_PKT4(ring, REG_A6XX_SP_CS_CTRL_REG1, 1); OUT_RING(ring, A6XX_SP_CS_CTRL_REG1_SHARED_SIZE(shared_size) | - A6XX_SP_CS_CTRL_REG1_CONSTANTRAMMODE(CONSTLEN_256)); + A6XX_SP_CS_CTRL_REG1_CONSTANTRAMMODE(mode)); if (CHIP == A6XX && ctx->screen->info->a6xx.has_lpac) { OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CTRL_REG1, 1); OUT_RING(ring, A6XX_HLSQ_CS_CTRL_REG1_SHARED_SIZE(shared_size) | - A6XX_HLSQ_CS_CTRL_REG1_CONSTANTRAMMODE(CONSTLEN_256)); + A6XX_HLSQ_CS_CTRL_REG1_CONSTANTRAMMODE(mode)); } const unsigned *local_size =