freedreno/a6xx: Define CONSTANTRAMMODE
While we're here, give SP_CS_UNKNOWN_A9B1 a better name.
Fixes: 5879eaac18
("ir3: Increase compute const size on a7xx")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34746>
(cherry picked from commit 57986ae5ec57820e4e06d7674f1496de58f4fd0e)
This commit is contained in:

committed by
Eric Engestrom

parent
f6450df88f
commit
2a06a20a4a
@@ -74,7 +74,7 @@
|
||||
"description": "freedreno/a6xx: Define CONSTANTRAMMODE",
|
||||
"nominated": true,
|
||||
"nomination_type": 2,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": "5879eaac185ed1c167fd01aff9b91c7cbe43ab0a",
|
||||
"notes": null
|
||||
|
@@ -7184,7 +7184,7 @@ WARNING: 64b discontinuity (no _LO dword for 890d)
|
||||
00000080 SP_FS_TEX_COUNT: 128
|
||||
0000f000 SP_UNKNOWN_A9A8: 0xf000
|
||||
00421800 SP_CS_CTRL_REG0: { THREADSIZE = THREAD64 | UNK22 | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 8 }
|
||||
0000001f SP_CS_UNKNOWN_A9B1: { SHARED_SIZE = 31 }
|
||||
0000001f SP_CS_CTRL_REG1: { SHARED_SIZE = 31 | CONSTANTRAMMODE = CONSTLEN_128 }
|
||||
00000000 SP_CS_BRANCH_COND: 0
|
||||
00000000 SP_CS_OBJ_FIRST_EXEC_OFFSET: 0
|
||||
8c415420 SP_CS_OBJ_START: 0x8c415420
|
||||
@@ -7252,7 +7252,7 @@ WARNING: 64b discontinuity (no _LO dword for 890d)
|
||||
00000080 SP_FS_TEX_COUNT: 128
|
||||
0000f000 SP_UNKNOWN_A9A8: 0xf000
|
||||
00421800 SP_CS_CTRL_REG0: { THREADSIZE = THREAD64 | UNK22 | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 8 }
|
||||
0000001f SP_CS_UNKNOWN_A9B1: { SHARED_SIZE = 31 }
|
||||
0000001f SP_CS_CTRL_REG1: { SHARED_SIZE = 31 | CONSTANTRAMMODE = CONSTLEN_128 }
|
||||
00000000 SP_CS_BRANCH_COND: 0
|
||||
00000000 SP_CS_OBJ_FIRST_EXEC_OFFSET: 0
|
||||
8c415420 SP_CS_OBJ_START: 0x8c415420
|
||||
|
@@ -18960,7 +18960,7 @@ WARNING: 64b discontinuity (no _LO dword for 890d)
|
||||
00000080 SP_FS_TEX_COUNT: 128
|
||||
00000000 SP_UNKNOWN_A9A8: 0
|
||||
00100000 SP_CS_CTRL_REG0: { THREADSIZE = THREAD128 | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 }
|
||||
00000000 SP_CS_UNKNOWN_A9B1: { SHARED_SIZE = 0 }
|
||||
00000000 SP_CS_CTRL_REG1: { SHARED_SIZE = 0 | CONSTANTRAMMODE = CONSTLEN_128 }
|
||||
00000000 SP_CS_BRANCH_COND: 0
|
||||
00000000 SP_CS_OBJ_FIRST_EXEC_OFFSET: 0
|
||||
10019a300 SP_CS_OBJ_START: 0x10019a300
|
||||
@@ -19028,7 +19028,7 @@ WARNING: 64b discontinuity (no _LO dword for 890d)
|
||||
00000080 SP_FS_TEX_COUNT: 128
|
||||
00000000 SP_UNKNOWN_A9A8: 0
|
||||
00100000 SP_CS_CTRL_REG0: { THREADSIZE = THREAD128 | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 }
|
||||
00000000 SP_CS_UNKNOWN_A9B1: { SHARED_SIZE = 0 }
|
||||
00000000 SP_CS_CTRL_REG1: { SHARED_SIZE = 0 | CONSTANTRAMMODE = CONSTLEN_128 }
|
||||
00000000 SP_CS_BRANCH_COND: 0
|
||||
00000000 SP_CS_OBJ_FIRST_EXEC_OFFSET: 0
|
||||
10019a300 SP_CS_OBJ_START: 0x10019a300
|
||||
|
@@ -152955,7 +152955,7 @@ WARNING: 64b discontinuity (no _LO dword for 890d)
|
||||
00000001 SP_FS_TEX_COUNT: 1
|
||||
00000000 SP_UNKNOWN_A9A8: 0
|
||||
00100000 SP_CS_CTRL_REG0: { THREADSIZE = THREAD128 | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 }
|
||||
00000000 SP_CS_UNKNOWN_A9B1: { SHARED_SIZE = 0 }
|
||||
00000000 SP_CS_CTRL_REG1: { SHARED_SIZE = 0 | CONSTANTRAMMODE = CONSTLEN_128 }
|
||||
00000000 SP_CS_BRANCH_COND: 0
|
||||
00000000 SP_CS_OBJ_FIRST_EXEC_OFFSET: 0
|
||||
17e0995019d62 SP_CS_OBJ_START: 0x17e0995019d62
|
||||
@@ -153023,7 +153023,7 @@ WARNING: 64b discontinuity (no _LO dword for 890d)
|
||||
00000001 SP_FS_TEX_COUNT: 1
|
||||
00000000 SP_UNKNOWN_A9A8: 0
|
||||
00100000 SP_CS_CTRL_REG0: { THREADSIZE = THREAD128 | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 }
|
||||
00000000 SP_CS_UNKNOWN_A9B1: { SHARED_SIZE = 0 }
|
||||
00000000 SP_CS_CTRL_REG1: { SHARED_SIZE = 0 | CONSTANTRAMMODE = CONSTLEN_128 }
|
||||
00000000 SP_CS_BRANCH_COND: 0
|
||||
00000000 SP_CS_OBJ_FIRST_EXEC_OFFSET: 0
|
||||
17e0995019d62 SP_CS_OBJ_START: 0x17e0995019d62
|
||||
|
@@ -180,14 +180,14 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel)
|
||||
}
|
||||
|
||||
uint32_t shared_size = MAX2(((int)v->shared_size - 1) / 1024, 1);
|
||||
OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
|
||||
OUT_RING(ring, A6XX_SP_CS_UNKNOWN_A9B1_SHARED_SIZE(shared_size) |
|
||||
A6XX_SP_CS_UNKNOWN_A9B1_UNK6);
|
||||
OUT_PKT4(ring, REG_A6XX_SP_CS_CTRL_REG1, 1);
|
||||
OUT_RING(ring, A6XX_SP_CS_CTRL_REG1_SHARED_SIZE(shared_size) |
|
||||
A6XX_SP_CS_CTRL_REG1_CONSTANTRAMMODE(CONSTLEN_256));
|
||||
|
||||
if (CHIP == A6XX && a6xx_backend->info->a6xx.has_lpac) {
|
||||
OUT_PKT4(ring, REG_A6XX_HLSQ_CS_UNKNOWN_B9D0, 1);
|
||||
OUT_RING(ring, A6XX_HLSQ_CS_UNKNOWN_B9D0_SHARED_SIZE(1) |
|
||||
A6XX_HLSQ_CS_UNKNOWN_B9D0_UNK6);
|
||||
OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CTRL_REG1, 1);
|
||||
OUT_RING(ring, A6XX_HLSQ_CS_CTRL_REG1_SHARED_SIZE(1) |
|
||||
A6XX_HLSQ_CS_CTRL_REG1_CONSTANTRAMMODE(CONSTLEN_256));
|
||||
}
|
||||
|
||||
uint32_t local_invocation_id, work_group_id;
|
||||
|
@@ -5121,8 +5121,15 @@ to upconvert to 32b float internally?
|
||||
<bitfield name="MERGEDREGS" pos="31" type="boolean"/>
|
||||
</reg32>
|
||||
|
||||
<enum name="a6xx_const_ram_mode">
|
||||
<value value="0x0" name="CONSTLEN_128"/>
|
||||
<value value="0x1" name="CONSTLEN_192"/>
|
||||
<value value="0x2" name="CONSTLEN_256"/>
|
||||
<value value="0x3" name="CONSTLEN_512"/> <!-- a7xx only -->
|
||||
</enum>
|
||||
|
||||
<!-- set for compute shaders -->
|
||||
<reg32 offset="0xa9b1" name="SP_CS_UNKNOWN_A9B1" usage="cmd">
|
||||
<reg32 offset="0xa9b1" name="SP_CS_CTRL_REG1" usage="cmd">
|
||||
<bitfield name="SHARED_SIZE" low="0" high="4" type="uint">
|
||||
<doc>
|
||||
If 0 - all 32k of shared storage is enabled, otherwise
|
||||
@@ -5133,9 +5140,13 @@ to upconvert to 32b float internally?
|
||||
always return 0)
|
||||
</doc>
|
||||
</bitfield>
|
||||
<bitfield name="UNK5" pos="5" type="boolean"/>
|
||||
<!-- always 1 ? -->
|
||||
<bitfield name="UNK6" pos="6" type="boolean"/>
|
||||
<bitfield name="CONSTANTRAMMODE" low="5" high="6" type="a6xx_const_ram_mode">
|
||||
<doc>
|
||||
This defines the split between consts and local
|
||||
memory in the Local Buffer. The programmed value
|
||||
must be at least the actual CONSTLEN.
|
||||
</doc>
|
||||
</bitfield>
|
||||
</reg32>
|
||||
<reg32 offset="0xa9b2" name="SP_CS_BRANCH_COND" type="hex" usage="cmd"/>
|
||||
<reg32 offset="0xa9b3" name="SP_CS_OBJ_FIRST_EXEC_OFFSET" type="uint" usage="cmd"/>
|
||||
@@ -5758,12 +5769,10 @@ to upconvert to 32b float internally?
|
||||
</reg64>
|
||||
</array>
|
||||
|
||||
<!-- new in a6xx gen4, mirror of SP_CS_UNKNOWN_A9B1? -->
|
||||
<reg32 offset="0xb9d0" name="HLSQ_CS_UNKNOWN_B9D0" variants="A6XX" usage="cmd">
|
||||
<!-- new in a6xx gen4, mirror of SP_CS_CTRL_REG1? -->
|
||||
<reg32 offset="0xb9d0" name="HLSQ_CS_CTRL_REG1" variants="A6XX" usage="cmd">
|
||||
<bitfield name="SHARED_SIZE" low="0" high="4" type="uint"/>
|
||||
<bitfield name="UNK5" pos="5" type="boolean"/>
|
||||
<!-- always 1 ? -->
|
||||
<bitfield name="UNK6" pos="6" type="boolean"/>
|
||||
<bitfield name="CONSTANTRAMMODE" low="5" high="6" type="a6xx_const_ram_mode"/>
|
||||
</reg32>
|
||||
|
||||
<reg32 offset="0xbb00" name="HLSQ_DRAW_CMD" variants="A6XX">
|
||||
|
@@ -2374,7 +2374,7 @@ tu_init_cmdbuf_start_a725_quirk(struct tu_device *device)
|
||||
.threadmode = MULTI,
|
||||
.threadsize = THREAD128,
|
||||
.mergedregs = true));
|
||||
tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_UNKNOWN_A9B1(.shared_size = 1));
|
||||
tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CTRL_REG1(.shared_size = 1));
|
||||
tu_cs_emit_regs(&sub_cs, HLSQ_CS_KERNEL_GROUP_X(A7XX, 1),
|
||||
HLSQ_CS_KERNEL_GROUP_Y(A7XX, 1),
|
||||
HLSQ_CS_KERNEL_GROUP_Z(A7XX, 1));
|
||||
|
@@ -1530,14 +1530,14 @@ tu6_emit_cs_config(struct tu_cs *cs,
|
||||
tu6_emit_xs(cs, MESA_SHADER_COMPUTE, v, pvtmem, binary_iova);
|
||||
|
||||
uint32_t shared_size = MAX2(((int)v->shared_size - 1) / 1024, 1);
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
|
||||
tu_cs_emit(cs, A6XX_SP_CS_UNKNOWN_A9B1_SHARED_SIZE(shared_size) |
|
||||
A6XX_SP_CS_UNKNOWN_A9B1_UNK6);
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_CTRL_REG1, 1);
|
||||
tu_cs_emit(cs, A6XX_SP_CS_CTRL_REG1_SHARED_SIZE(shared_size) |
|
||||
A6XX_SP_CS_CTRL_REG1_CONSTANTRAMMODE(CONSTLEN_256));
|
||||
|
||||
if (CHIP == A6XX && cs->device->physical_device->info->a6xx.has_lpac) {
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CS_UNKNOWN_B9D0, 1);
|
||||
tu_cs_emit(cs, A6XX_HLSQ_CS_UNKNOWN_B9D0_SHARED_SIZE(shared_size) |
|
||||
A6XX_HLSQ_CS_UNKNOWN_B9D0_UNK6);
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CS_CTRL_REG1, 1);
|
||||
tu_cs_emit(cs, A6XX_HLSQ_CS_CTRL_REG1_SHARED_SIZE(shared_size) |
|
||||
A6XX_HLSQ_CS_CTRL_REG1_CONSTANTRAMMODE(CONSTLEN_256));
|
||||
}
|
||||
|
||||
uint32_t local_invocation_id =
|
||||
|
@@ -220,14 +220,14 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
|
||||
|
||||
uint32_t shared_size =
|
||||
MAX2(((int)(cs->v->cs.req_local_mem + info->variable_shared_mem) - 1) / 1024, 1);
|
||||
OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
|
||||
OUT_RING(ring, A6XX_SP_CS_UNKNOWN_A9B1_SHARED_SIZE(shared_size) |
|
||||
A6XX_SP_CS_UNKNOWN_A9B1_UNK6);
|
||||
OUT_PKT4(ring, REG_A6XX_SP_CS_CTRL_REG1, 1);
|
||||
OUT_RING(ring, A6XX_SP_CS_CTRL_REG1_SHARED_SIZE(shared_size) |
|
||||
A6XX_SP_CS_CTRL_REG1_CONSTANTRAMMODE(CONSTLEN_256));
|
||||
|
||||
if (CHIP == A6XX && ctx->screen->info->a6xx.has_lpac) {
|
||||
OUT_PKT4(ring, REG_A6XX_HLSQ_CS_UNKNOWN_B9D0, 1);
|
||||
OUT_RING(ring, A6XX_HLSQ_CS_UNKNOWN_B9D0_SHARED_SIZE(shared_size) |
|
||||
A6XX_HLSQ_CS_UNKNOWN_B9D0_UNK6);
|
||||
OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CTRL_REG1, 1);
|
||||
OUT_RING(ring, A6XX_HLSQ_CS_CTRL_REG1_SHARED_SIZE(shared_size) |
|
||||
A6XX_HLSQ_CS_CTRL_REG1_CONSTANTRAMMODE(CONSTLEN_256));
|
||||
}
|
||||
|
||||
const unsigned *local_size =
|
||||
|
Reference in New Issue
Block a user