radeonsi: don't use a constant buffer for the copy_image compute shader
just use user SGPRs and 16-bit values for the x,y,z coordinates. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9795>
This commit is contained in:
@@ -496,8 +496,6 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u
|
||||
/* Only src can have DCC.*/
|
||||
((struct si_texture *)src)->surface.u.gfx9.dcc.pipe_aligned);
|
||||
|
||||
struct pipe_constant_buffer saved_cb = {};
|
||||
|
||||
struct si_images *images = &sctx->images[PIPE_SHADER_COMPUTE];
|
||||
struct pipe_image_view saved_image[2] = {0};
|
||||
util_copy_image_view(&saved_image[0], &images->views[0]);
|
||||
@@ -506,14 +504,9 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u
|
||||
void *saved_cs = sctx->cs_shader_state.program;
|
||||
|
||||
if (!is_dcc_decompress) {
|
||||
unsigned data[] = {src_box->x, src_box->y, src_box->z, 0, dstx, dsty, dstz, 0};
|
||||
|
||||
si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &saved_cb);
|
||||
|
||||
struct pipe_constant_buffer cb = {};
|
||||
cb.buffer_size = sizeof(data);
|
||||
cb.user_buffer = data;
|
||||
ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, false, &cb);
|
||||
sctx->cs_user_data[0] = src_box->x | (dstx << 16);
|
||||
sctx->cs_user_data[1] = src_box->y | (dsty << 16);
|
||||
sctx->cs_user_data[2] = src_box->z | (dstz << 16);
|
||||
}
|
||||
|
||||
struct pipe_image_view image[2] = {0};
|
||||
@@ -615,9 +608,6 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u
|
||||
ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, 0, saved_image);
|
||||
for (int i = 0; i < 2; i++)
|
||||
pipe_resource_reference(&saved_image[i].resource, NULL);
|
||||
if (!is_dcc_decompress) {
|
||||
ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, true, &saved_cb);
|
||||
}
|
||||
}
|
||||
|
||||
void si_retile_dcc(struct si_context *sctx, struct si_texture *tex)
|
||||
|
@@ -504,21 +504,23 @@ void *si_create_copy_image_compute_shader(struct pipe_context *ctx)
|
||||
{
|
||||
static const char text[] =
|
||||
"COMP\n"
|
||||
"PROPERTY CS_USER_DATA_COMPONENTS_AMD 3\n"
|
||||
"DCL SV[0], THREAD_ID\n"
|
||||
"DCL SV[1], BLOCK_ID\n"
|
||||
"DCL SV[2], BLOCK_SIZE\n"
|
||||
"DCL SV[3], CS_USER_DATA_AMD\n"
|
||||
"DCL IMAGE[0], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n"
|
||||
"DCL IMAGE[1], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n"
|
||||
"DCL CONST[0][0..1]\n" // 0:xyzw 1:xyzw
|
||||
"DCL TEMP[0..4], LOCAL\n"
|
||||
"DCL TEMP[0..3], LOCAL\n"
|
||||
"IMM[0] UINT32 {65535, 16, 0, 0}\n"
|
||||
|
||||
"MOV TEMP[0].xyz, CONST[0][0].xyzw\n"
|
||||
"UMAD TEMP[1].xyz, SV[1].xyzz, SV[2].xyzz, SV[0].xyzz\n"
|
||||
"UADD TEMP[2].xyz, TEMP[1].xyzx, TEMP[0].xyzx\n"
|
||||
"LOAD TEMP[3], IMAGE[0], TEMP[2].xyzx, 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n"
|
||||
"MOV TEMP[4].xyz, CONST[0][1].xyzw\n"
|
||||
"UADD TEMP[2].xyz, TEMP[1].xyzx, TEMP[4].xyzx\n"
|
||||
"STORE IMAGE[1], TEMP[2].xyzz, TEMP[3], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n"
|
||||
"UMAD TEMP[0].xyz, SV[1], SV[2], SV[0]\n" /* threadID.xyz */
|
||||
"AND TEMP[1].xyz, SV[3], IMM[0].xxxx\n" /* src.xyz */
|
||||
"UADD TEMP[1].xyz, TEMP[1], TEMP[0]\n" /* src.xyz + threadID.xyz */
|
||||
"LOAD TEMP[3], IMAGE[0], TEMP[1], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n"
|
||||
"USHR TEMP[2].xyz, SV[3], IMM[0].yyyy\n" /* dst.xyz */
|
||||
"UADD TEMP[2].xyz, TEMP[2], TEMP[0]\n" /* dst.xyz + threadID.xyz */
|
||||
"STORE IMAGE[1], TEMP[2], TEMP[3], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n"
|
||||
"END\n";
|
||||
|
||||
struct tgsi_token tokens[1024];
|
||||
@@ -542,20 +544,22 @@ void *si_create_copy_image_compute_shader_1d_array(struct pipe_context *ctx)
|
||||
"PROPERTY CS_FIXED_BLOCK_WIDTH 64\n"
|
||||
"PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n"
|
||||
"PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
|
||||
"PROPERTY CS_USER_DATA_COMPONENTS_AMD 3\n"
|
||||
"DCL SV[0], THREAD_ID\n"
|
||||
"DCL SV[1], BLOCK_ID\n"
|
||||
"DCL SV[2], CS_USER_DATA_AMD\n"
|
||||
"DCL IMAGE[0], 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n"
|
||||
"DCL IMAGE[1], 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n"
|
||||
"DCL CONST[0][0..1]\n" // 0:xyzw 1:xyzw
|
||||
"DCL TEMP[0..4], LOCAL\n"
|
||||
"IMM[0] UINT32 {64, 1, 0, 0}\n"
|
||||
"MOV TEMP[0].xy, CONST[0][0].xzzw\n"
|
||||
"UMAD TEMP[1].xy, SV[1].xyzz, IMM[0].xyyy, SV[0].xyzz\n"
|
||||
"UADD TEMP[2].xy, TEMP[1].xyzx, TEMP[0].xyzx\n"
|
||||
"LOAD TEMP[3], IMAGE[0], TEMP[2].xyzx, 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n"
|
||||
"MOV TEMP[4].xy, CONST[0][1].xzzw\n"
|
||||
"UADD TEMP[2].xy, TEMP[1].xyzx, TEMP[4].xyzx\n"
|
||||
"STORE IMAGE[1], TEMP[2].xyzz, TEMP[3], 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n"
|
||||
"IMM[0] UINT32 {64, 1, 65535, 16}\n"
|
||||
|
||||
"UMAD TEMP[0].xz, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n" /* threadID.xz */
|
||||
"AND TEMP[1].xz, SV[2], IMM[0].zzzz\n" /* src.xz */
|
||||
"UADD TEMP[1].xz, TEMP[1], TEMP[0]\n" /* src.xz + threadID.xz */
|
||||
"LOAD TEMP[3], IMAGE[0], TEMP[1].xzzz, 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n"
|
||||
"USHR TEMP[2].xz, SV[2], IMM[0].wwww\n" /* dst.xz */
|
||||
"UADD TEMP[2].xz, TEMP[2], TEMP[0]\n" /* dst.xz + threadID.xz */
|
||||
"STORE IMAGE[1], TEMP[2].xzzz, TEMP[3], 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n"
|
||||
"END\n";
|
||||
|
||||
struct tgsi_token tokens[1024];
|
||||
|
Reference in New Issue
Block a user