radeonsi: move the no-AA small prim precision cull constant into an SGPR
This reduces the scalar load from vec4 to vec2. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Reviewed-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17864>
This commit is contained in:
@@ -960,11 +960,13 @@ static void cull_primitive(struct si_shader_context *ctx,
|
||||
options.cull_w = true;
|
||||
|
||||
if (prim_is_lines) {
|
||||
LLVMValueRef terms = ac_build_load_to_sgpr(&ctx->ac, ptr, LLVMConstInt(ctx->ac.i32, 2, 0));
|
||||
terms = LLVMBuildBitCast(builder, terms, ctx->ac.v4f32, "");
|
||||
ptr = LLVMBuildPointerCast(ctx->ac.builder, ptr,
|
||||
LLVMPointerType(ctx->ac.v2f32, AC_ADDR_SPACE_CONST_32BIT), "");
|
||||
LLVMValueRef terms = ac_build_load_to_sgpr(&ctx->ac, ptr, LLVMConstInt(ctx->ac.i32, 4, 0));
|
||||
terms = LLVMBuildBitCast(builder, terms, ctx->ac.v2f32, "");
|
||||
clip_half_line_width[0] = ac_llvm_extract_elem(&ctx->ac, terms, 0);
|
||||
clip_half_line_width[1] = ac_llvm_extract_elem(&ctx->ac, terms, 1);
|
||||
small_prim_precision = ac_llvm_extract_elem(&ctx->ac, terms, 2);
|
||||
small_prim_precision = GET_FIELD(ctx, GS_STATE_SMALL_PRIM_PRECISION_NO_AA);
|
||||
|
||||
options.num_vertices = 2;
|
||||
options.cull_small_prims = shader->key.ge.opt.ngg_culling & SI_NGG_CULL_SMALL_LINES_DIAMOND_EXIT;
|
||||
@@ -974,11 +976,6 @@ static void cull_primitive(struct si_shader_context *ctx,
|
||||
} else {
|
||||
/* Get the small prim filter precision. */
|
||||
small_prim_precision = GET_FIELD(ctx, GS_STATE_SMALL_PRIM_PRECISION);
|
||||
small_prim_precision =
|
||||
LLVMBuildOr(builder, small_prim_precision, LLVMConstInt(ctx->ac.i32, 0x70, 0), "");
|
||||
small_prim_precision =
|
||||
LLVMBuildShl(builder, small_prim_precision, LLVMConstInt(ctx->ac.i32, 23, 0), "");
|
||||
small_prim_precision = LLVMBuildBitCast(builder, small_prim_precision, ctx->ac.f32, "");
|
||||
|
||||
options.num_vertices = 3;
|
||||
options.cull_front = shader->key.ge.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE;
|
||||
@@ -987,6 +984,13 @@ static void cull_primitive(struct si_shader_context *ctx,
|
||||
options.cull_zero_area = options.cull_front || options.cull_back;
|
||||
}
|
||||
|
||||
/* Extract the small prim precision. */
|
||||
small_prim_precision =
|
||||
LLVMBuildOr(builder, small_prim_precision, LLVMConstInt(ctx->ac.i32, 0x70, 0), "");
|
||||
small_prim_precision =
|
||||
LLVMBuildShl(builder, small_prim_precision, LLVMConstInt(ctx->ac.i32, 23, 0), "");
|
||||
small_prim_precision = LLVMBuildBitCast(builder, small_prim_precision, ctx->ac.f32, "");
|
||||
|
||||
/* Tell ES threads whether their vertex survived. */
|
||||
LLVMValueRef params[] = {
|
||||
out_prim_accepted,
|
||||
|
@@ -913,8 +913,8 @@ struct si_small_prim_cull_info {
|
||||
float scale[2], translate[2];
|
||||
float scale_no_aa[2], translate_no_aa[2];
|
||||
float clip_half_line_width[2]; /* line_width * 0.5 in clip space in X and Y directions */
|
||||
float small_prim_precision_no_aa; /* same as the small prim precision, but ignores MSAA */
|
||||
/* The above fields are uploaded to memory. The below fields are passed via user SGPRs. */
|
||||
float small_prim_precision_no_aa; /* same as the small prim precision, but ignores MSAA */
|
||||
float small_prim_precision;
|
||||
};
|
||||
|
||||
|
@@ -272,6 +272,8 @@ enum
|
||||
* in the shader via vs_state_bits in legacy GS, the GS copy shader, and any NGG shader.
|
||||
*/
|
||||
/* bit gap */
|
||||
#define GS_STATE_SMALL_PRIM_PRECISION_NO_AA__SHIFT 18
|
||||
#define GS_STATE_SMALL_PRIM_PRECISION_NO_AA__MASK 0xf
|
||||
#define GS_STATE_SMALL_PRIM_PRECISION__SHIFT 22
|
||||
#define GS_STATE_SMALL_PRIM_PRECISION__MASK 0xf
|
||||
#define GS_STATE_STREAMOUT_QUERY_ENABLED__SHIFT 26
|
||||
|
@@ -141,6 +141,8 @@ static void si_emit_cull_state(struct si_context *sctx)
|
||||
*
|
||||
* So pass only the first 4 bits of the float exponent to the shader.
|
||||
*/
|
||||
SET_FIELD(sctx->current_gs_state, GS_STATE_SMALL_PRIM_PRECISION_NO_AA,
|
||||
(fui(info.small_prim_precision_no_aa) >> 23) & 0xf);
|
||||
SET_FIELD(sctx->current_gs_state, GS_STATE_SMALL_PRIM_PRECISION,
|
||||
(fui(info.small_prim_precision) >> 23) & 0xf);
|
||||
}
|
||||
|
Reference in New Issue
Block a user