aco,nir: add dpp16_shift_amd intrinsic

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24650>
This commit is contained in:
Georg Lehmann
2023-08-12 16:49:00 +02:00
committed by Marge Bot
parent 1f430b1111
commit 2d3f536174
4 changed files with 21 additions and 0 deletions

View File

@@ -8594,6 +8594,20 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
set_wqm(ctx);
break;
}
case nir_intrinsic_dpp16_shift_amd: {
Temp src = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
Temp dst = get_ssa_temp(ctx, &instr->def);
int delta = nir_intrinsic_base(instr);
assert(delta >= -15 && delta <= 15 && delta != 0);
assert(instr->def.bit_size != 1 && instr->def.bit_size < 64);
assert(ctx->options->gfx_level >= GFX8);
uint16_t dpp_ctrl = delta < 0 ? dpp_row_sr(-delta) : dpp_row_sl(delta);
bld.vop1_dpp(aco_opcode::v_mov_b32, Definition(dst), src, dpp_ctrl);
set_wqm(ctx);
break;
}
case nir_intrinsic_quad_broadcast:
case nir_intrinsic_quad_swap_horizontal:
case nir_intrinsic_quad_swap_vertical:

View File

@@ -484,6 +484,7 @@ init_context(isel_context* ctx, nir_shader* shader)
case nir_intrinsic_write_invocation_amd:
case nir_intrinsic_mbcnt_amd:
case nir_intrinsic_lane_permute_16_amd:
case nir_intrinsic_dpp16_shift_amd:
case nir_intrinsic_load_instance_id:
case nir_intrinsic_ssbo_atomic:
case nir_intrinsic_ssbo_atomic_swap:

View File

@@ -705,6 +705,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
case nir_intrinsic_write_invocation_amd:
case nir_intrinsic_mbcnt_amd:
case nir_intrinsic_lane_permute_16_amd:
case nir_intrinsic_dpp16_shift_amd:
case nir_intrinsic_elect:
case nir_intrinsic_elect_any_ir3:
case nir_intrinsic_load_tlb_color_brcm:

View File

@@ -545,6 +545,11 @@ intrinsic("write_invocation_amd", src_comp=[0, 0, 1], dest_comp=0, bit_sizes=src
intrinsic("mbcnt_amd", src_comp=[1, 1], dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE])
# Compiled to v_permlane16_b32. src = [ value, lanesel_lo, lanesel_hi ]
intrinsic("lane_permute_16_amd", src_comp=[1, 1, 1], dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE])
# subgroup shuffle up/down with cluster size 16.
# base in [-15, -1]: DPP_ROW_SR
# base in [ 1, 15]: DPP_ROW_SL, otherwise invalid.
# Returns zero for invocations that try to read out of bounds
intrinsic("dpp16_shift_amd", src_comp=[0], dest_comp=0, bit_sizes=src0, indices=[BASE], flags=[CAN_ELIMINATE])
# Basic Geometry Shader intrinsics.
#