nir,vtn,aco,ac/llvm: make cube_face_coord_amd more direct
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22636>
This commit is contained in:
@@ -2503,14 +2503,9 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
||||
Temp src[3] = {emit_extract_vector(ctx, in, 0, v1), emit_extract_vector(ctx, in, 1, v1),
|
||||
emit_extract_vector(ctx, in, 2, v1)};
|
||||
Temp ma = bld.vop3(aco_opcode::v_cubema_f32, bld.def(v1), src[0], src[1], src[2]);
|
||||
ma = bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), ma);
|
||||
Temp sc = bld.vop3(aco_opcode::v_cubesc_f32, bld.def(v1), src[0], src[1], src[2]);
|
||||
Temp tc = bld.vop3(aco_opcode::v_cubetc_f32, bld.def(v1), src[0], src[1], src[2]);
|
||||
sc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::c32(0x3f000000u /*0.5*/),
|
||||
bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), sc, ma));
|
||||
tc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::c32(0x3f000000u /*0.5*/),
|
||||
bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tc, ma));
|
||||
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), sc, tc);
|
||||
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), sc, tc, ma);
|
||||
break;
|
||||
}
|
||||
case nir_op_cube_face_index_amd: {
|
||||
|
@@ -1183,21 +1183,14 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
||||
|
||||
case nir_op_cube_face_coord_amd: {
|
||||
src[0] = ac_to_float(&ctx->ac, src[0]);
|
||||
LLVMValueRef results[2];
|
||||
LLVMValueRef results[3];
|
||||
LLVMValueRef in[3];
|
||||
for (unsigned chan = 0; chan < 3; chan++)
|
||||
in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
|
||||
results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc", ctx->ac.f32, in, 3,
|
||||
0);
|
||||
results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc", ctx->ac.f32, in, 3,
|
||||
0);
|
||||
LLVMValueRef ma = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubema", ctx->ac.f32, in, 3, 0);
|
||||
results[0] = ac_build_fdiv(&ctx->ac, results[0], ma);
|
||||
results[1] = ac_build_fdiv(&ctx->ac, results[1], ma);
|
||||
LLVMValueRef offset = LLVMConstReal(ctx->ac.f32, 0.5);
|
||||
results[0] = LLVMBuildFAdd(ctx->ac.builder, results[0], offset, "");
|
||||
results[1] = LLVMBuildFAdd(ctx->ac.builder, results[1], offset, "");
|
||||
result = ac_build_gather_values(&ctx->ac, results, 2);
|
||||
results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc", ctx->ac.f32, in, 3, 0);
|
||||
results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc", ctx->ac.f32, in, 3, 0);
|
||||
results[2] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubema", ctx->ac.f32, in, 3, 0);
|
||||
result = ac_build_gather_values(&ctx->ac, results, 3);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@@ -537,16 +537,15 @@ for (unsigned bit = 0; bit < bit_size; bit++) {
|
||||
""")
|
||||
|
||||
# AMD_gcn_shader extended instructions
|
||||
unop_horiz("cube_face_coord_amd", 2, tfloat32, 3, tfloat32, """
|
||||
dst.x = dst.y = 0.0;
|
||||
unop_horiz("cube_face_coord_amd", 3, tfloat32, 3, tfloat32, """
|
||||
dst.x = dst.y = dst.z = 0.0;
|
||||
float absX = fabsf(src0.x);
|
||||
float absY = fabsf(src0.y);
|
||||
float absZ = fabsf(src0.z);
|
||||
|
||||
float ma = 0.0;
|
||||
if (absX >= absY && absX >= absZ) { ma = 2 * src0.x; }
|
||||
if (absY >= absX && absY >= absZ) { ma = 2 * src0.y; }
|
||||
if (absZ >= absX && absZ >= absY) { ma = 2 * src0.z; }
|
||||
if (absX >= absY && absX >= absZ) { dst.z = 2 * src0.x; }
|
||||
if (absY >= absX && absY >= absZ) { dst.z = 2 * src0.y; }
|
||||
if (absZ >= absX && absZ >= absY) { dst.z = 2 * src0.z; }
|
||||
|
||||
if (src0.x >= 0 && absX >= absY && absX >= absZ) { dst.x = -src0.z; dst.y = -src0.y; }
|
||||
if (src0.x < 0 && absX >= absY && absX >= absZ) { dst.x = src0.z; dst.y = -src0.y; }
|
||||
@@ -554,9 +553,6 @@ if (src0.y >= 0 && absY >= absX && absY >= absZ) { dst.x = src0.x; dst.y = src0.
|
||||
if (src0.y < 0 && absY >= absX && absY >= absZ) { dst.x = src0.x; dst.y = -src0.z; }
|
||||
if (src0.z >= 0 && absZ >= absX && absZ >= absY) { dst.x = src0.x; dst.y = -src0.y; }
|
||||
if (src0.z < 0 && absZ >= absX && absZ >= absY) { dst.x = -src0.x; dst.y = -src0.y; }
|
||||
|
||||
dst.x = dst.x * (1.0f / ma) + 0.5f;
|
||||
dst.y = dst.y * (1.0f / ma) + 0.5f;
|
||||
""")
|
||||
|
||||
unop_horiz("cube_face_index_amd", 1, tfloat32, 3, tfloat32, """
|
||||
|
@@ -35,9 +35,13 @@ vtn_handle_amd_gcn_shader_instruction(struct vtn_builder *b, SpvOp ext_opcode,
|
||||
case CubeFaceIndexAMD:
|
||||
def = nir_cube_face_index_amd(&b->nb, vtn_get_nir_ssa(b, w[5]));
|
||||
break;
|
||||
case CubeFaceCoordAMD:
|
||||
case CubeFaceCoordAMD: {
|
||||
def = nir_cube_face_coord_amd(&b->nb, vtn_get_nir_ssa(b, w[5]));
|
||||
nir_ssa_def *st = nir_channels(&b->nb, def, 0x3);
|
||||
nir_ssa_def *invma = nir_frcp(&b->nb, nir_channel(&b->nb, def, 2));
|
||||
def = nir_ffma_imm2(&b->nb, st, invma, 0.5);
|
||||
break;
|
||||
}
|
||||
case TimeAMD: {
|
||||
def = nir_pack_64_2x32(&b->nb, nir_shader_clock(&b->nb, NIR_SCOPE_SUBGROUP));
|
||||
break;
|
||||
|
Reference in New Issue
Block a user