From 01314d08800282935e9a70bdbed4ce0396b2c992 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Tue, 5 Jul 2022 13:44:24 +0200 Subject: [PATCH] radeonsi: use LLVMBuildLoad2 for inter-stage outputs loads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PS case was covered by the previous commit, so we can use f32 everywhere. Reviewed-by: Mihai Preda Reviewed-by: Marek Olšák Part-of: --- .../drivers/radeonsi/gfx10_shader_ngg.c | 31 +++++++++++-------- .../drivers/radeonsi/si_shader_llvm_gs.c | 2 +- .../drivers/radeonsi/si_shader_llvm_tess.c | 4 +-- .../drivers/radeonsi/si_shader_llvm_vs.c | 6 ++-- 4 files changed, 24 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index ffd58d502c5..f0ddb30f800 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -1044,7 +1044,7 @@ void gfx10_ngg_culling_build_end(struct si_shader_context *ctx) pos_index = i; for (unsigned j = 0; j < 4; j++) { - position[j] = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + j], ""); + position[j] = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, addrs[4 * i + j], ""); } /* Store Position.W into LDS. */ @@ -1071,7 +1071,7 @@ void gfx10_ngg_culling_build_end(struct si_shader_context *ctx) if (!(clipdist_enable & BITFIELD_BIT(index))) continue; - LLVMValueRef distance = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + j], ""); + LLVMValueRef distance = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, addrs[4 * i + j], ""); add_clipdist_bit(ctx, distance, index, &packed_data); has_clipdist_mask = true; } @@ -1079,7 +1079,7 @@ void gfx10_ngg_culling_build_end(struct si_shader_context *ctx) case VARYING_SLOT_CLIP_VERTEX: for (unsigned j = 0; j < 4; j++) - clipvertex[j] = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + j], ""); + clipvertex[j] = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, addrs[4 * i + j], ""); if (add_clipdist_bits_for_clipvertex(ctx, clipdist_enable, clipvertex, &packed_data)) has_clipdist_mask = true; @@ -1291,7 +1291,8 @@ void gfx10_ngg_culling_build_end(struct si_shader_context *ctx) /* Store Position.XYZW into LDS. */ for (unsigned chan = 0; chan < 4; chan++) { LLVMBuildStore( - builder, ac_to_integer(&ctx->ac, LLVMBuildLoad(builder, addrs[4 * pos_index + chan], "")), + builder, ac_to_integer(&ctx->ac, + LLVMBuildLoad2(builder, ctx->ac.f32, addrs[4 * pos_index + chan], "")), ac_build_gep0(&ctx->ac, new_vtx, LLVMConstInt(ctx->ac.i32, lds_pos_x + chan, 0))); } @@ -1532,15 +1533,16 @@ void gfx10_ngg_build_end(struct si_shader_context *ctx) */ if (ctx->so.num_outputs) { tmp = ac_build_gep0(&ctx->ac, vertex_ptr, LLVMConstInt(ctx->ac.i32, 4 * i + j, false)); - tmp2 = LLVMBuildLoad(builder, addrs[4 * i + j], ""); - tmp2 = ac_to_integer(&ctx->ac, tmp2); + tmp2 = LLVMBuildLoad2(builder, ctx->ac.f32, addrs[4 * i + j], ""); + LLVMTypeRef type = ac_to_integer_type(&ctx->ac, ctx->ac.f32); + tmp2 = LLVMBuildBitCast(ctx->ac.builder, tmp2, type, ""); LLVMBuildStore(builder, tmp2, tmp); } } /* Store the edgeflag at the end (if streamout is enabled) */ if (info->output_semantic[i] == VARYING_SLOT_EDGE && gfx10_ngg_writes_user_edgeflags(ctx->shader)) { - LLVMValueRef edgeflag = LLVMBuildLoad(builder, addrs[4 * i], ""); + LLVMValueRef edgeflag = LLVMBuildLoad2(builder, ctx->ac.f32, addrs[4 * i], ""); /* The output is a float, but the hw expects a 1-bit integer. */ edgeflag = LLVMBuildFPToUI(ctx->ac.builder, edgeflag, ctx->ac.i32, ""); edgeflag = ac_build_umin(&ctx->ac, edgeflag, ctx->ac.i32_1); @@ -1706,11 +1708,12 @@ void gfx10_ngg_build_end(struct si_shader_context *ctx) tmp = LLVMConstInt(ctx->ac.i32, lds_pos_x + j, 0); tmp = ac_build_gep0(&ctx->ac, vertex_ptr, tmp); tmp = LLVMBuildLoad2(builder, ctx->ac.i32, tmp, ""); - outputs[i].values[j] = ac_to_float(&ctx->ac, tmp); + outputs[i].values[j] = LLVMBuildBitCast(ctx->ac.builder, tmp, + ac_to_float_type(&ctx->ac, ctx->ac.i32), ""); } } else { for (unsigned j = 0; j < 4; j++) { - outputs[i].values[j] = LLVMBuildLoad(builder, addrs[4 * i + j], ""); + outputs[i].values[j] = LLVMBuildLoad2(builder, ctx->ac.f32, addrs[4 * i + j], ""); } } } @@ -1732,7 +1735,7 @@ void gfx10_ngg_build_end(struct si_shader_context *ctx) outputs[i].values[0] = si_get_primitive_id(ctx, 0); } - outputs[i].values[0] = ac_to_float(&ctx->ac, outputs[i].values[0]); + outputs[i].values[0] = LLVMBuildBitCast(ctx->ac.builder, outputs[i].values[0], ctx->ac.f32, ""); for (unsigned j = 1; j < 4; j++) outputs[i].values[j] = LLVMGetUndef(ctx->ac.f32); i++; @@ -1870,8 +1873,9 @@ void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream, LL ((info->output_streams[i] >> (2 * chan)) & 3) != stream) continue; - LLVMValueRef out_val = LLVMBuildLoad(builder, addrs[4 * i + chan], ""); - out_val = ac_to_integer(&ctx->ac, out_val); + LLVMValueRef out_val = LLVMBuildLoad2(builder, ctx->ac.f32, addrs[4 * i + chan], ""); + LLVMTypeRef as_int = ac_to_integer_type(&ctx->ac, ctx->ac.f32); + out_val = LLVMBuildBitCast(ctx->ac.builder, out_val, as_int, ""); LLVMBuildStore(builder, out_val, ngg_gs_get_emit_output_ptr(ctx, vertexptr, out_idx)); } } @@ -2304,7 +2308,8 @@ void gfx10_ngg_gs_build_end(struct si_shader_context *ctx) for (unsigned j = 0; j < 4; j++, out_idx++) { tmp = ngg_gs_get_emit_output_ptr(ctx, vertexptr, out_idx); - tmp = LLVMBuildLoad(builder, tmp, ""); + tmp = LLVMBuildLoad2(builder, ctx->ac.i32, tmp, ""); + assert(LLVMGetTypeKind(LLVMTypeOf(tmp)) != LLVMPointerTypeKind); outputs[i].values[j] = ac_to_float(&ctx->ac, tmp); outputs[i].vertex_streams = info->output_streams[i]; } diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c index ada45fff5a2..8d4121afec9 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c @@ -210,7 +210,7 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVM ((info->output_streams[i] >> (2 * chan)) & 3) != stream) continue; - LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], ""); + LLVMValueRef out_val = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, addrs[4 * i + chan], ""); LLVMValueRef voffset = LLVMConstInt(ctx->ac.i32, offset * shader->selector->info.base.gs.vertices_out, 0); offset++; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c index 196e83adcaf..4c0ea6476ba 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c @@ -523,7 +523,7 @@ void si_llvm_tcs_build_end(struct si_shader_context *ctx) for (unsigned i = 0; i < 6; i++) { int loc = i < 4 ? outer_loc : inner_loc; LLVMValueRef value = loc < 0 ? LLVMGetUndef(ctx->ac.f32) : - LLVMBuildLoad(builder, ctx->abi.outputs[loc * 4 + i % 4], ""); + LLVMBuildLoad2(builder, ctx->ac.f32, ctx->abi.outputs[loc * 4 + i % 4], ""); value = ac_to_float(&ctx->ac, value); ret = LLVMBuildInsertValue(builder, ret, value, vgpr++, ""); } @@ -585,7 +585,7 @@ void si_llvm_ls_build_end(struct si_shader_context *ctx) if (!(info->output_usagemask[i] & (1 << chan))) continue; - LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], ""); + LLVMValueRef value = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, addrs[4 * i + chan], ""); ctx->return_value = LLVMBuildInsertValue(ctx->ac.builder, ctx->return_value, value, ret_offset + param * 4 + chan, ""); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c index 76a7c8e1015..ac45d41a16b 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c @@ -499,7 +499,7 @@ static void si_vertex_color_clamping(struct si_shader_context *ctx, continue; for (unsigned j = 0; j < 4; j++) { - outputs[i].values[j] = LLVMBuildLoad(ctx->ac.builder, addr[i][j], ""); + outputs[i].values[j] = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, addr[i][j], ""); } } } @@ -794,7 +794,7 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx, LLVMValueRef num_ex for (unsigned i = 0; i < shader->info.nr_param_exports; i++) { for (unsigned chan = 0; chan < 4; chan++) { param_exports[i].out[chan] = - LLVMBuildLoad(ctx->ac.builder, param_exports[i].out[chan], ""); + LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, param_exports[i].out[chan], ""); } LLVMValueRef vdata = ac_build_gather_values_extended(&ctx->ac, param_exports[i].out, @@ -826,7 +826,7 @@ void si_llvm_vs_build_end(struct si_shader_context *ctx) outputs[i].semantic = info->output_semantic[i]; for (j = 0; j < 4; j++) { - outputs[i].values[j] = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + j], ""); + outputs[i].values[j] = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, addrs[4 * i + j], ""); outputs[i].vertex_streams = info->output_streams[i]; } }