amd,nir: remove byte_permute_amd intrinsic

It's unused and if we ever want to use it again we should make it an alu
opcode instead.

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21445>
This commit is contained in:
Georg Lehmann
2023-02-21 14:06:46 +01:00
committed by Marge Bot
parent e316416dd0
commit ee47cc8256
5 changed files with 0 additions and 23 deletions

View File

@@ -8796,15 +8796,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
emit_wqm(bld, wqm_tmp, dst); emit_wqm(bld, wqm_tmp, dst);
break; break;
} }
case nir_intrinsic_byte_permute_amd: {
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
assert(dst.regClass() == v1);
assert(ctx->program->gfx_level >= GFX8);
bld.vop3(aco_opcode::v_perm_b32, Definition(dst), get_ssa_temp(ctx, instr->src[0].ssa),
as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa)),
as_vgpr(ctx, get_ssa_temp(ctx, instr->src[2].ssa)));
break;
}
case nir_intrinsic_lane_permute_16_amd: { case nir_intrinsic_lane_permute_16_amd: {
Temp src = get_ssa_temp(ctx, instr->src[0].ssa); Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);

View File

@@ -619,7 +619,6 @@ init_context(isel_context* ctx, nir_shader* shader)
case nir_intrinsic_load_tess_coord: case nir_intrinsic_load_tess_coord:
case nir_intrinsic_write_invocation_amd: case nir_intrinsic_write_invocation_amd:
case nir_intrinsic_mbcnt_amd: case nir_intrinsic_mbcnt_amd:
case nir_intrinsic_byte_permute_amd:
case nir_intrinsic_lane_permute_16_amd: case nir_intrinsic_lane_permute_16_amd:
case nir_intrinsic_load_instance_id: case nir_intrinsic_load_instance_id:
case nir_intrinsic_ssbo_atomic_add: case nir_intrinsic_ssbo_atomic_add:

View File

@@ -4168,16 +4168,6 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
result = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, visit_first_invocation(ctx), result = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, visit_first_invocation(ctx),
ac_get_thread_id(&ctx->ac), ""); ac_get_thread_id(&ctx->ac), "");
break; break;
case nir_intrinsic_byte_permute_amd:
if (LLVM_VERSION_MAJOR < 13) {
assert("unimplemented byte_permute, LLVM 12 doesn't have amdgcn.perm");
break;
}
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.perm", ctx->ac.i32,
(LLVMValueRef[]){get_src(ctx, instr->src[0]),
get_src(ctx, instr->src[1]),
get_src(ctx, instr->src[2])}, 3, 0);
break;
case nir_intrinsic_lane_permute_16_amd: case nir_intrinsic_lane_permute_16_amd:
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.permlane16", ctx->ac.i32, result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.permlane16", ctx->ac.i32,
(LLVMValueRef[]){get_src(ctx, instr->src[0]), (LLVMValueRef[]){get_src(ctx, instr->src[0]),

View File

@@ -392,7 +392,6 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
case nir_intrinsic_quad_swap_horizontal: case nir_intrinsic_quad_swap_horizontal:
case nir_intrinsic_quad_swap_vertical: case nir_intrinsic_quad_swap_vertical:
case nir_intrinsic_quad_swap_diagonal: case nir_intrinsic_quad_swap_diagonal:
case nir_intrinsic_byte_permute_amd:
case nir_intrinsic_load_deref: case nir_intrinsic_load_deref:
case nir_intrinsic_load_shared: case nir_intrinsic_load_shared:
case nir_intrinsic_load_shared2_amd: case nir_intrinsic_load_shared2_amd:

View File

@@ -465,8 +465,6 @@ intrinsic("write_invocation_amd", src_comp=[0, 0, 1], dest_comp=0, bit_sizes=src
flags=[CAN_ELIMINATE]) flags=[CAN_ELIMINATE])
# src = [ mask, addition ] # src = [ mask, addition ]
intrinsic("mbcnt_amd", src_comp=[1, 1], dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE]) intrinsic("mbcnt_amd", src_comp=[1, 1], dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE])
# Compiled to v_perm_b32. src = [ in_bytes_hi, in_bytes_lo, selector ]
intrinsic("byte_permute_amd", src_comp=[1, 1, 1], dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE, CAN_REORDER])
# Compiled to v_permlane16_b32. src = [ value, lanesel_lo, lanesel_hi ] # Compiled to v_permlane16_b32. src = [ value, lanesel_lo, lanesel_hi ]
intrinsic("lane_permute_16_amd", src_comp=[1, 1, 1], dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE]) intrinsic("lane_permute_16_amd", src_comp=[1, 1, 1], dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE])