freedreno/a6xx: Define CONSTANTRAMMODE

While we're here, give SP_CS_UNKNOWN_A9B1 a better name.

Fixes: 5879eaac18 ("ir3: Increase compute const size on a7xx")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34746>
(cherry picked from commit 57986ae5ec57820e4e06d7674f1496de58f4fd0e)
This commit is contained in:
Connor Abbott
2025-04-28 18:43:39 -04:00
committed by Eric Engestrom
parent f6450df88f
commit 2a06a20a4a
9 changed files with 44 additions and 35 deletions

View File

@@ -74,7 +74,7 @@
"description": "freedreno/a6xx: Define CONSTANTRAMMODE",
"nominated": true,
"nomination_type": 2,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "5879eaac185ed1c167fd01aff9b91c7cbe43ab0a",
"notes": null

View File

@@ -7184,7 +7184,7 @@ WARNING: 64b discontinuity (no _LO dword for 890d)
00000080 SP_FS_TEX_COUNT: 128
0000f000 SP_UNKNOWN_A9A8: 0xf000
00421800 SP_CS_CTRL_REG0: { THREADSIZE = THREAD64 | UNK22 | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 8 }
0000001f SP_CS_UNKNOWN_A9B1: { SHARED_SIZE = 31 }
0000001f SP_CS_CTRL_REG1: { SHARED_SIZE = 31 | CONSTANTRAMMODE = CONSTLEN_128 }
00000000 SP_CS_BRANCH_COND: 0
00000000 SP_CS_OBJ_FIRST_EXEC_OFFSET: 0
8c415420 SP_CS_OBJ_START: 0x8c415420
@@ -7252,7 +7252,7 @@ WARNING: 64b discontinuity (no _LO dword for 890d)
00000080 SP_FS_TEX_COUNT: 128
0000f000 SP_UNKNOWN_A9A8: 0xf000
00421800 SP_CS_CTRL_REG0: { THREADSIZE = THREAD64 | UNK22 | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 8 }
0000001f SP_CS_UNKNOWN_A9B1: { SHARED_SIZE = 31 }
0000001f SP_CS_CTRL_REG1: { SHARED_SIZE = 31 | CONSTANTRAMMODE = CONSTLEN_128 }
00000000 SP_CS_BRANCH_COND: 0
00000000 SP_CS_OBJ_FIRST_EXEC_OFFSET: 0
8c415420 SP_CS_OBJ_START: 0x8c415420

View File

@@ -18960,7 +18960,7 @@ WARNING: 64b discontinuity (no _LO dword for 890d)
00000080 SP_FS_TEX_COUNT: 128
00000000 SP_UNKNOWN_A9A8: 0
00100000 SP_CS_CTRL_REG0: { THREADSIZE = THREAD128 | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 }
00000000 SP_CS_UNKNOWN_A9B1: { SHARED_SIZE = 0 }
00000000 SP_CS_CTRL_REG1: { SHARED_SIZE = 0 | CONSTANTRAMMODE = CONSTLEN_128 }
00000000 SP_CS_BRANCH_COND: 0
00000000 SP_CS_OBJ_FIRST_EXEC_OFFSET: 0
10019a300 SP_CS_OBJ_START: 0x10019a300
@@ -19028,7 +19028,7 @@ WARNING: 64b discontinuity (no _LO dword for 890d)
00000080 SP_FS_TEX_COUNT: 128
00000000 SP_UNKNOWN_A9A8: 0
00100000 SP_CS_CTRL_REG0: { THREADSIZE = THREAD128 | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 }
00000000 SP_CS_UNKNOWN_A9B1: { SHARED_SIZE = 0 }
00000000 SP_CS_CTRL_REG1: { SHARED_SIZE = 0 | CONSTANTRAMMODE = CONSTLEN_128 }
00000000 SP_CS_BRANCH_COND: 0
00000000 SP_CS_OBJ_FIRST_EXEC_OFFSET: 0
10019a300 SP_CS_OBJ_START: 0x10019a300

View File

@@ -152955,7 +152955,7 @@ WARNING: 64b discontinuity (no _LO dword for 890d)
00000001 SP_FS_TEX_COUNT: 1
00000000 SP_UNKNOWN_A9A8: 0
00100000 SP_CS_CTRL_REG0: { THREADSIZE = THREAD128 | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 }
00000000 SP_CS_UNKNOWN_A9B1: { SHARED_SIZE = 0 }
00000000 SP_CS_CTRL_REG1: { SHARED_SIZE = 0 | CONSTANTRAMMODE = CONSTLEN_128 }
00000000 SP_CS_BRANCH_COND: 0
00000000 SP_CS_OBJ_FIRST_EXEC_OFFSET: 0
17e0995019d62 SP_CS_OBJ_START: 0x17e0995019d62
@@ -153023,7 +153023,7 @@ WARNING: 64b discontinuity (no _LO dword for 890d)
00000001 SP_FS_TEX_COUNT: 1
00000000 SP_UNKNOWN_A9A8: 0
00100000 SP_CS_CTRL_REG0: { THREADSIZE = THREAD128 | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 }
00000000 SP_CS_UNKNOWN_A9B1: { SHARED_SIZE = 0 }
00000000 SP_CS_CTRL_REG1: { SHARED_SIZE = 0 | CONSTANTRAMMODE = CONSTLEN_128 }
00000000 SP_CS_BRANCH_COND: 0
00000000 SP_CS_OBJ_FIRST_EXEC_OFFSET: 0
17e0995019d62 SP_CS_OBJ_START: 0x17e0995019d62

View File

@@ -180,14 +180,14 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel)
}
uint32_t shared_size = MAX2(((int)v->shared_size - 1) / 1024, 1);
OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
OUT_RING(ring, A6XX_SP_CS_UNKNOWN_A9B1_SHARED_SIZE(shared_size) |
A6XX_SP_CS_UNKNOWN_A9B1_UNK6);
OUT_PKT4(ring, REG_A6XX_SP_CS_CTRL_REG1, 1);
OUT_RING(ring, A6XX_SP_CS_CTRL_REG1_SHARED_SIZE(shared_size) |
A6XX_SP_CS_CTRL_REG1_CONSTANTRAMMODE(CONSTLEN_256));
if (CHIP == A6XX && a6xx_backend->info->a6xx.has_lpac) {
OUT_PKT4(ring, REG_A6XX_HLSQ_CS_UNKNOWN_B9D0, 1);
OUT_RING(ring, A6XX_HLSQ_CS_UNKNOWN_B9D0_SHARED_SIZE(1) |
A6XX_HLSQ_CS_UNKNOWN_B9D0_UNK6);
OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CTRL_REG1, 1);
OUT_RING(ring, A6XX_HLSQ_CS_CTRL_REG1_SHARED_SIZE(1) |
A6XX_HLSQ_CS_CTRL_REG1_CONSTANTRAMMODE(CONSTLEN_256));
}
uint32_t local_invocation_id, work_group_id;

View File

@@ -5121,8 +5121,15 @@ to upconvert to 32b float internally?
<bitfield name="MERGEDREGS" pos="31" type="boolean"/>
</reg32>
<enum name="a6xx_const_ram_mode">
<value value="0x0" name="CONSTLEN_128"/>
<value value="0x1" name="CONSTLEN_192"/>
<value value="0x2" name="CONSTLEN_256"/>
<value value="0x3" name="CONSTLEN_512"/> <!-- a7xx only -->
</enum>
<!-- set for compute shaders -->
<reg32 offset="0xa9b1" name="SP_CS_UNKNOWN_A9B1" usage="cmd">
<reg32 offset="0xa9b1" name="SP_CS_CTRL_REG1" usage="cmd">
<bitfield name="SHARED_SIZE" low="0" high="4" type="uint">
<doc>
If 0 - all 32k of shared storage is enabled, otherwise
@@ -5133,9 +5140,13 @@ to upconvert to 32b float internally?
always return 0)
</doc>
</bitfield>
<bitfield name="UNK5" pos="5" type="boolean"/>
<!-- always 1 ? -->
<bitfield name="UNK6" pos="6" type="boolean"/>
<bitfield name="CONSTANTRAMMODE" low="5" high="6" type="a6xx_const_ram_mode">
<doc>
This defines the split between consts and local
memory in the Local Buffer. The programmed value
must be at least the actual CONSTLEN.
</doc>
</bitfield>
</reg32>
<reg32 offset="0xa9b2" name="SP_CS_BRANCH_COND" type="hex" usage="cmd"/>
<reg32 offset="0xa9b3" name="SP_CS_OBJ_FIRST_EXEC_OFFSET" type="uint" usage="cmd"/>
@@ -5758,12 +5769,10 @@ to upconvert to 32b float internally?
</reg64>
</array>
<!-- new in a6xx gen4, mirror of SP_CS_UNKNOWN_A9B1? -->
<reg32 offset="0xb9d0" name="HLSQ_CS_UNKNOWN_B9D0" variants="A6XX" usage="cmd">
<!-- new in a6xx gen4, mirror of SP_CS_CTRL_REG1? -->
<reg32 offset="0xb9d0" name="HLSQ_CS_CTRL_REG1" variants="A6XX" usage="cmd">
<bitfield name="SHARED_SIZE" low="0" high="4" type="uint"/>
<bitfield name="UNK5" pos="5" type="boolean"/>
<!-- always 1 ? -->
<bitfield name="UNK6" pos="6" type="boolean"/>
<bitfield name="CONSTANTRAMMODE" low="5" high="6" type="a6xx_const_ram_mode"/>
</reg32>
<reg32 offset="0xbb00" name="HLSQ_DRAW_CMD" variants="A6XX">

View File

@@ -2374,7 +2374,7 @@ tu_init_cmdbuf_start_a725_quirk(struct tu_device *device)
.threadmode = MULTI,
.threadsize = THREAD128,
.mergedregs = true));
tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_UNKNOWN_A9B1(.shared_size = 1));
tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CTRL_REG1(.shared_size = 1));
tu_cs_emit_regs(&sub_cs, HLSQ_CS_KERNEL_GROUP_X(A7XX, 1),
HLSQ_CS_KERNEL_GROUP_Y(A7XX, 1),
HLSQ_CS_KERNEL_GROUP_Z(A7XX, 1));

View File

@@ -1530,14 +1530,14 @@ tu6_emit_cs_config(struct tu_cs *cs,
tu6_emit_xs(cs, MESA_SHADER_COMPUTE, v, pvtmem, binary_iova);
uint32_t shared_size = MAX2(((int)v->shared_size - 1) / 1024, 1);
tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
tu_cs_emit(cs, A6XX_SP_CS_UNKNOWN_A9B1_SHARED_SIZE(shared_size) |
A6XX_SP_CS_UNKNOWN_A9B1_UNK6);
tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_CTRL_REG1, 1);
tu_cs_emit(cs, A6XX_SP_CS_CTRL_REG1_SHARED_SIZE(shared_size) |
A6XX_SP_CS_CTRL_REG1_CONSTANTRAMMODE(CONSTLEN_256));
if (CHIP == A6XX && cs->device->physical_device->info->a6xx.has_lpac) {
tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CS_UNKNOWN_B9D0, 1);
tu_cs_emit(cs, A6XX_HLSQ_CS_UNKNOWN_B9D0_SHARED_SIZE(shared_size) |
A6XX_HLSQ_CS_UNKNOWN_B9D0_UNK6);
tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CS_CTRL_REG1, 1);
tu_cs_emit(cs, A6XX_HLSQ_CS_CTRL_REG1_SHARED_SIZE(shared_size) |
A6XX_HLSQ_CS_CTRL_REG1_CONSTANTRAMMODE(CONSTLEN_256));
}
uint32_t local_invocation_id =

View File

@@ -220,14 +220,14 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
uint32_t shared_size =
MAX2(((int)(cs->v->cs.req_local_mem + info->variable_shared_mem) - 1) / 1024, 1);
OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
OUT_RING(ring, A6XX_SP_CS_UNKNOWN_A9B1_SHARED_SIZE(shared_size) |
A6XX_SP_CS_UNKNOWN_A9B1_UNK6);
OUT_PKT4(ring, REG_A6XX_SP_CS_CTRL_REG1, 1);
OUT_RING(ring, A6XX_SP_CS_CTRL_REG1_SHARED_SIZE(shared_size) |
A6XX_SP_CS_CTRL_REG1_CONSTANTRAMMODE(CONSTLEN_256));
if (CHIP == A6XX && ctx->screen->info->a6xx.has_lpac) {
OUT_PKT4(ring, REG_A6XX_HLSQ_CS_UNKNOWN_B9D0, 1);
OUT_RING(ring, A6XX_HLSQ_CS_UNKNOWN_B9D0_SHARED_SIZE(shared_size) |
A6XX_HLSQ_CS_UNKNOWN_B9D0_UNK6);
OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CTRL_REG1, 1);
OUT_RING(ring, A6XX_HLSQ_CS_CTRL_REG1_SHARED_SIZE(shared_size) |
A6XX_HLSQ_CS_CTRL_REG1_CONSTANTRAMMODE(CONSTLEN_256));
}
const unsigned *local_size =