radv: use typed buffer loads for vertex input fetches
This drastically reduces the number of SGPRs because the driver now uses descriptors per vertex binding, instead of per vertex attribute format. 29077 shaders in 15096 tests Totals: SGPRS: 1354285 -> 1282109 (-5.33 %) VGPRS: 909896 -> 908800 (-0.12 %) Spilled SGPRs: 24840 -> 24811 (-0.12 %) Code Size: 49221144 -> 48986628 (-0.48 %) bytes Max Waves: 243930 -> 244229 (0.12 %) Totals from affected shaders: SGPRS: 390648 -> 318472 (-18.48 %) VGPRS: 288432 -> 287336 (-0.38 %) Spilled SGPRs: 94 -> 65 (-30.85 %) Code Size: 11548412 -> 11313896 (-2.03 %) bytes Max Waves: 86460 -> 86759 (0.35 %) This gives a really tiny boost. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
This commit is contained in:
@@ -2008,6 +2008,8 @@ adjust_vertex_fetch_alpha(struct radv_shader_context *ctx,
|
||||
|
||||
LLVMValueRef c30 = LLVMConstInt(ctx->ac.i32, 30, 0);
|
||||
|
||||
alpha = LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.f32, "");
|
||||
|
||||
if (adjustment == RADV_ALPHA_ADJUST_SSCALED)
|
||||
alpha = LLVMBuildFPToUI(ctx->ac.builder, alpha, ctx->ac.i32, "");
|
||||
else
|
||||
@@ -2035,7 +2037,7 @@ adjust_vertex_fetch_alpha(struct radv_shader_context *ctx,
|
||||
alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, "");
|
||||
}
|
||||
|
||||
return alpha;
|
||||
return LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.i32, "");
|
||||
}
|
||||
|
||||
static unsigned
|
||||
@@ -2096,7 +2098,7 @@ radv_fixup_vertex_input_fetches(struct radv_shader_context *ctx,
|
||||
|
||||
for (unsigned i = num_channels; i < 4; i++) {
|
||||
chan[i] = i == 3 ? one : zero;
|
||||
chan[i] = ac_to_float(&ctx->ac, chan[i]);
|
||||
chan[i] = ac_to_integer(&ctx->ac, chan[i]);
|
||||
}
|
||||
|
||||
return ac_build_gather_values(&ctx->ac, chan, 4);
|
||||
@@ -2154,20 +2156,49 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
|
||||
} else
|
||||
buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id,
|
||||
ctx->abi.base_vertex, "");
|
||||
t_offset = LLVMConstInt(ctx->ac.i32, attrib_index, false);
|
||||
|
||||
t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
|
||||
|
||||
/* Adjust the number of channels to load based on the vertex
|
||||
* attribute format.
|
||||
*/
|
||||
unsigned num_format_channels = get_num_channels_from_data_format(data_format);
|
||||
unsigned num_channels = MIN2(num_input_channels, num_format_channels);
|
||||
unsigned attrib_binding = ctx->options->key.vs.vertex_attribute_bindings[attrib_index];
|
||||
unsigned attrib_offset = ctx->options->key.vs.vertex_attribute_offsets[attrib_index];
|
||||
unsigned attrib_stride = ctx->options->key.vs.vertex_attribute_strides[attrib_index];
|
||||
|
||||
input = ac_build_buffer_load_format(&ctx->ac, t_list,
|
||||
if (attrib_stride != 0 && attrib_offset > attrib_stride) {
|
||||
LLVMValueRef buffer_offset =
|
||||
LLVMConstInt(ctx->ac.i32,
|
||||
attrib_offset / attrib_stride, false);
|
||||
|
||||
buffer_index = LLVMBuildAdd(ctx->ac.builder,
|
||||
buffer_index,
|
||||
ctx->ac.i32_0,
|
||||
num_channels, false, true);
|
||||
buffer_offset, "");
|
||||
|
||||
attrib_offset = attrib_offset % attrib_stride;
|
||||
}
|
||||
|
||||
t_offset = LLVMConstInt(ctx->ac.i32, attrib_binding, false);
|
||||
t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
|
||||
|
||||
input = ac_build_tbuffer_load(&ctx->ac, t_list, buffer_index,
|
||||
LLVMConstInt(ctx->ac.i32, attrib_offset, false),
|
||||
ctx->ac.i32_0, ctx->ac.i32_0,
|
||||
num_channels,
|
||||
data_format, num_format,
|
||||
false, false, true);
|
||||
|
||||
if (ctx->options->key.vs.post_shuffle & (1 << attrib_index)) {
|
||||
if (num_channels > 1) {
|
||||
LLVMValueRef c[4];
|
||||
c[0] = ac_llvm_extract_elem(&ctx->ac, input, 2);
|
||||
c[1] = ac_llvm_extract_elem(&ctx->ac, input, 1);
|
||||
c[2] = ac_llvm_extract_elem(&ctx->ac, input, 0);
|
||||
c[3] = ac_llvm_extract_elem(&ctx->ac, input, 3);
|
||||
|
||||
input = ac_build_gather_values(&ctx->ac, c, 4);
|
||||
}
|
||||
}
|
||||
|
||||
input = radv_fixup_vertex_input_fetches(ctx, input, num_channels,
|
||||
is_float);
|
||||
|
Reference in New Issue
Block a user