freedreno/computerator: Fix remaining issues with A7XX
Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23217>
This commit is contained in:

committed by
Marge Bot

parent
b0ea4883f0
commit
7e10a175c7
@@ -119,11 +119,13 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel)
|
||||
struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel);
|
||||
struct a6xx_backend *a6xx_backend = to_a6xx_backend(ir3_kernel->backend);
|
||||
struct ir3_shader_variant *v = ir3_kernel->v;
|
||||
const unsigned *local_size = kernel->local_size;
|
||||
const struct ir3_info *i = &v->info;
|
||||
enum a6xx_threadsize thrsz = i->double_threadsize ? THREAD128 : THREAD64;
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_SP_MODE_CONTROL, 1);
|
||||
OUT_RING(ring, A6XX_SP_MODE_CONTROL_CONSTANT_DEMOTION_ENABLE | 4);
|
||||
OUT_REG(ring, A6XX_SP_MODE_CONTROL(.constant_demotion_enable = true,
|
||||
.isammode = ISAMMODE_GL,
|
||||
.shared_consts_enable = false));
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_SP_PERFCTR_ENABLE, 1);
|
||||
OUT_RING(ring, A6XX_SP_PERFCTR_ENABLE_CS);
|
||||
@@ -168,6 +170,14 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel)
|
||||
COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) |
|
||||
COND(ir3_kernel->info.early_preamble, A6XX_SP_CS_CTRL_REG0_EARLYPREAMBLE) |
|
||||
A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(v)));
|
||||
if (CHIP == A7XX) {
|
||||
OUT_REG(ring, HLSQ_FS_CNTL_0(CHIP, .threadsize = THREAD64));
|
||||
|
||||
OUT_REG(ring, HLSQ_CONTROL_2_REG(CHIP, .dword = 0xfcfcfcfc),
|
||||
HLSQ_CONTROL_3_REG(CHIP, .dword = 0xfcfcfcfc),
|
||||
HLSQ_CONTROL_4_REG(CHIP, .dword = 0xfcfcfcfc),
|
||||
HLSQ_CONTROL_5_REG(CHIP, .dword = 0x0000fc00), );
|
||||
}
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
|
||||
OUT_RING(ring, A6XX_SP_CS_UNKNOWN_A9B1_SHARED_SIZE(1) |
|
||||
@@ -192,16 +202,28 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel)
|
||||
A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID(local_invocation_id));
|
||||
OUT_RING(ring, A6XX_HLSQ_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) |
|
||||
A6XX_HLSQ_CS_CNTL_1_THREADSIZE(thrsz));
|
||||
} else {
|
||||
enum a7xx_cs_yalign yalign = (local_size[1] % 8 == 0) ? CS_YALIGN_8
|
||||
: (local_size[1] % 4 == 0) ? CS_YALIGN_4
|
||||
: (local_size[1] % 2 == 0) ? CS_YALIGN_2
|
||||
: CS_YALIGN_1;
|
||||
|
||||
OUT_REG(ring, A7XX_HLSQ_CS_CNTL_1(.linearlocalidregid = regid(63, 0),
|
||||
.threadsize = thrsz,
|
||||
.unk11 = true,
|
||||
.unk22 = true,
|
||||
.yalign = yalign, ));
|
||||
}
|
||||
|
||||
if (CHIP == A7XX || a6xx_backend->info->a6xx.has_lpac) {
|
||||
OUT_PKT4(ring, REG_A6XX_SP_CS_CNTL_0, 2);
|
||||
OUT_PKT4(ring, REG_A6XX_SP_CS_CNTL_0, 1);
|
||||
OUT_RING(ring, A6XX_SP_CS_CNTL_0_WGIDCONSTID(work_group_id) |
|
||||
A6XX_SP_CS_CNTL_0_WGSIZECONSTID(regid(63, 0)) |
|
||||
A6XX_SP_CS_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) |
|
||||
A6XX_SP_CS_CNTL_0_LOCALIDREGID(local_invocation_id));
|
||||
OUT_RING(ring, A6XX_SP_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) |
|
||||
A6XX_SP_CS_CNTL_1_THREADSIZE(thrsz));
|
||||
OUT_REG(ring,
|
||||
SP_CS_CNTL_1(CHIP, .linearlocalidregid = regid(63, 0),
|
||||
.threadsize = thrsz, ));
|
||||
}
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_SP_CS_OBJ_START, 2);
|
||||
@@ -463,6 +485,12 @@ a6xx_emit_grid(struct kernel *kernel, uint32_t grid[3],
|
||||
.localsizey = local_size[1] - 1,
|
||||
.localsizez = local_size[2] - 1,
|
||||
));
|
||||
if (CHIP == A7XX) {
|
||||
OUT_REG(ring, A7XX_HLSQ_CS_LOCAL_SIZE(.localsizex = local_size[0] - 1,
|
||||
.localsizey = local_size[1] - 1,
|
||||
.localsizez = local_size[2] - 1, ));
|
||||
}
|
||||
|
||||
OUT_REG(ring, HLSQ_CS_NDRANGE_1(CHIP,
|
||||
.globalsize_x = local_size[0] * num_groups[0],
|
||||
));
|
||||
|
@@ -29,8 +29,13 @@ computerator_files = [
|
||||
|
||||
computerator_cpp_args = cpp.get_supported_arguments([
|
||||
'-Wno-sign-compare',
|
||||
'-Wno-array-bounds',
|
||||
])
|
||||
|
||||
if meson.is_cross_build()
|
||||
computerator_cpp_args += '-Wno-array-bounds'
|
||||
endif
|
||||
|
||||
computerator = executable(
|
||||
'computerator',
|
||||
computerator_files,
|
||||
|
@@ -3684,7 +3684,7 @@ to upconvert to 32b float internally?
|
||||
<bitfield name="LOCALIDREGID" low="24" high="31" type="a3xx_regid"/>
|
||||
</reg32>
|
||||
<!-- new in a6xx gen4, matches HLSQ_CS_CNTL_1 -->
|
||||
<reg32 offset="0xa9c3" name="SP_CS_CNTL_1" usage="cmd">
|
||||
<reg32 offset="0xa9c3" name="SP_CS_CNTL_1" variants="A6XX" usage="cmd">
|
||||
<!-- gl_LocalInvocationIndex -->
|
||||
<bitfield name="LINEARLOCALIDREGID" low="0" high="7" type="a3xx_regid"/>
|
||||
<!-- a650 has 6 "SP cores" (but 3 "SP"). this makes it use only
|
||||
@@ -3694,11 +3694,20 @@ to upconvert to 32b float internally?
|
||||
<bitfield name="THREADSIZE" pos="9" type="a6xx_threadsize"/>
|
||||
<!-- 1 thread per wave (ignored if bit9 set) -->
|
||||
<bitfield name="THREADSIZE_SCALAR" pos="10" type="boolean"/>
|
||||
</reg32>
|
||||
|
||||
<reg32 offset="0xa9c3" name="SP_CS_CNTL_1" variants="A7XX-" usage="cmd">
|
||||
<!-- gl_LocalInvocationIndex -->
|
||||
<bitfield name="LINEARLOCALIDREGID" low="0" high="7" type="a3xx_regid"/>
|
||||
<!-- Must match SP_CS_CTRL -->
|
||||
<bitfield name="THREADSIZE" pos="8" type="a6xx_threadsize"/>
|
||||
<!-- 1 thread per wave (would hang if THREAD128 is also set) -->
|
||||
<bitfield name="THREADSIZE_SCALAR" pos="9" type="boolean"/>
|
||||
|
||||
<!-- Affects getone. If enabled, getone sometimes executed 1? less times
|
||||
than there are subgroups.
|
||||
-->
|
||||
<bitfield name="UNK15" pos="15" type="boolean" variants="A7XX"/>
|
||||
<bitfield name="UNK15" pos="15" type="boolean"/>
|
||||
</reg32>
|
||||
|
||||
<!-- TODO: two 64kb aligned addresses at a9d0/a9d2 -->
|
||||
@@ -4146,13 +4155,22 @@ to upconvert to 32b float internally?
|
||||
<reg32 offset="0xa9dd" name="HLSQ_CS_KERNEL_GROUP_Y" variants="A7XX-" usage="rp_blit"/>
|
||||
<reg32 offset="0xa9de" name="HLSQ_CS_KERNEL_GROUP_Z" variants="A7XX-" usage="rp_blit"/>
|
||||
|
||||
<reg32 offset="0xa9db" name="HLSQ_CS_UNKNOWN_A9DB" variants="A7XX-" usage="rp_blit">
|
||||
<enum name="a7xx_cs_yalign">
|
||||
<value name="CS_YALIGN_1" value="8"/>
|
||||
<value name="CS_YALIGN_2" value="4"/>
|
||||
<value name="CS_YALIGN_4" value="2"/>
|
||||
<value name="CS_YALIGN_8" value="1"/>
|
||||
</enum>
|
||||
|
||||
<reg32 offset="0xa9db" name="HLSQ_CS_CNTL_1" variants="A7XX-" usage="rp_blit">
|
||||
<!-- gl_LocalInvocationIndex -->
|
||||
<bitfield name="LINEARLOCALIDREGID" low="0" high="7" type="a3xx_regid"/>
|
||||
<!-- Must match SP_CS_CTRL -->
|
||||
<bitfield name="THREADSIZE" pos="9" type="a6xx_threadsize"/>
|
||||
<bitfield name="UNK11" pos="11" type="boolean"/>
|
||||
<bitfield name="UNK22" pos="22" type="boolean"/>
|
||||
<bitfield name="UNK27" low="27" high="30" type="uint" variants="A7XX"/>
|
||||
<!-- TODO: other bits -->
|
||||
<bitfield name="UNK26" pos="26" type="boolean"/>
|
||||
<bitfield name="YALIGN" low="27" high="30" type="a7xx_cs_yalign"/>
|
||||
</reg32>
|
||||
|
||||
<reg32 offset="0xa9df" name="HLSQ_CS_LOCAL_SIZE" variants="A7XX-" usage="cmd">
|
||||
|
Reference in New Issue
Block a user