radeonsi: move the no-AA small prim precision cull constant into an SGPR

This reduces the scalar load from vec4 to vec2.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17864>
This commit is contained in:
Marek Olšák
2022-07-28 17:18:04 -04:00
committed by Marge Bot
parent 788dce46a3
commit fa46f3d40e
4 changed files with 17 additions and 9 deletions

View File

@@ -960,11 +960,13 @@ static void cull_primitive(struct si_shader_context *ctx,
options.cull_w = true;
if (prim_is_lines) {
LLVMValueRef terms = ac_build_load_to_sgpr(&ctx->ac, ptr, LLVMConstInt(ctx->ac.i32, 2, 0));
terms = LLVMBuildBitCast(builder, terms, ctx->ac.v4f32, "");
ptr = LLVMBuildPointerCast(ctx->ac.builder, ptr,
LLVMPointerType(ctx->ac.v2f32, AC_ADDR_SPACE_CONST_32BIT), "");
LLVMValueRef terms = ac_build_load_to_sgpr(&ctx->ac, ptr, LLVMConstInt(ctx->ac.i32, 4, 0));
terms = LLVMBuildBitCast(builder, terms, ctx->ac.v2f32, "");
clip_half_line_width[0] = ac_llvm_extract_elem(&ctx->ac, terms, 0);
clip_half_line_width[1] = ac_llvm_extract_elem(&ctx->ac, terms, 1);
small_prim_precision = ac_llvm_extract_elem(&ctx->ac, terms, 2);
small_prim_precision = GET_FIELD(ctx, GS_STATE_SMALL_PRIM_PRECISION_NO_AA);
options.num_vertices = 2;
options.cull_small_prims = shader->key.ge.opt.ngg_culling & SI_NGG_CULL_SMALL_LINES_DIAMOND_EXIT;
@@ -974,11 +976,6 @@ static void cull_primitive(struct si_shader_context *ctx,
} else {
/* Get the small prim filter precision. */
small_prim_precision = GET_FIELD(ctx, GS_STATE_SMALL_PRIM_PRECISION);
small_prim_precision =
LLVMBuildOr(builder, small_prim_precision, LLVMConstInt(ctx->ac.i32, 0x70, 0), "");
small_prim_precision =
LLVMBuildShl(builder, small_prim_precision, LLVMConstInt(ctx->ac.i32, 23, 0), "");
small_prim_precision = LLVMBuildBitCast(builder, small_prim_precision, ctx->ac.f32, "");
options.num_vertices = 3;
options.cull_front = shader->key.ge.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE;
@@ -987,6 +984,13 @@ static void cull_primitive(struct si_shader_context *ctx,
options.cull_zero_area = options.cull_front || options.cull_back;
}
/* Extract the small prim precision. */
small_prim_precision =
LLVMBuildOr(builder, small_prim_precision, LLVMConstInt(ctx->ac.i32, 0x70, 0), "");
small_prim_precision =
LLVMBuildShl(builder, small_prim_precision, LLVMConstInt(ctx->ac.i32, 23, 0), "");
small_prim_precision = LLVMBuildBitCast(builder, small_prim_precision, ctx->ac.f32, "");
/* Tell ES threads whether their vertex survived. */
LLVMValueRef params[] = {
out_prim_accepted,

View File

@@ -913,8 +913,8 @@ struct si_small_prim_cull_info {
float scale[2], translate[2];
float scale_no_aa[2], translate_no_aa[2];
float clip_half_line_width[2]; /* line_width * 0.5 in clip space in X and Y directions */
float small_prim_precision_no_aa; /* same as the small prim precision, but ignores MSAA */
/* The above fields are uploaded to memory. The below fields are passed via user SGPRs. */
float small_prim_precision_no_aa; /* same as the small prim precision, but ignores MSAA */
float small_prim_precision;
};

View File

@@ -272,6 +272,8 @@ enum
* in the shader via vs_state_bits in legacy GS, the GS copy shader, and any NGG shader.
*/
/* bit gap */
#define GS_STATE_SMALL_PRIM_PRECISION_NO_AA__SHIFT 18
#define GS_STATE_SMALL_PRIM_PRECISION_NO_AA__MASK 0xf
#define GS_STATE_SMALL_PRIM_PRECISION__SHIFT 22
#define GS_STATE_SMALL_PRIM_PRECISION__MASK 0xf
#define GS_STATE_STREAMOUT_QUERY_ENABLED__SHIFT 26

View File

@@ -141,6 +141,8 @@ static void si_emit_cull_state(struct si_context *sctx)
*
* So pass only the first 4 bits of the float exponent to the shader.
*/
SET_FIELD(sctx->current_gs_state, GS_STATE_SMALL_PRIM_PRECISION_NO_AA,
(fui(info.small_prim_precision_no_aa) >> 23) & 0xf);
SET_FIELD(sctx->current_gs_state, GS_STATE_SMALL_PRIM_PRECISION,
(fui(info.small_prim_precision) >> 23) & 0xf);
}