aco,nir: add dpp16_shift_amd intrinsic
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24650>
This commit is contained in:
@@ -8594,6 +8594,20 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
set_wqm(ctx);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_dpp16_shift_amd: {
|
||||
Temp src = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
|
||||
Temp dst = get_ssa_temp(ctx, &instr->def);
|
||||
int delta = nir_intrinsic_base(instr);
|
||||
assert(delta >= -15 && delta <= 15 && delta != 0);
|
||||
assert(instr->def.bit_size != 1 && instr->def.bit_size < 64);
|
||||
assert(ctx->options->gfx_level >= GFX8);
|
||||
|
||||
uint16_t dpp_ctrl = delta < 0 ? dpp_row_sr(-delta) : dpp_row_sl(delta);
|
||||
bld.vop1_dpp(aco_opcode::v_mov_b32, Definition(dst), src, dpp_ctrl);
|
||||
|
||||
set_wqm(ctx);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_quad_broadcast:
|
||||
case nir_intrinsic_quad_swap_horizontal:
|
||||
case nir_intrinsic_quad_swap_vertical:
|
||||
|
@@ -484,6 +484,7 @@ init_context(isel_context* ctx, nir_shader* shader)
|
||||
case nir_intrinsic_write_invocation_amd:
|
||||
case nir_intrinsic_mbcnt_amd:
|
||||
case nir_intrinsic_lane_permute_16_amd:
|
||||
case nir_intrinsic_dpp16_shift_amd:
|
||||
case nir_intrinsic_load_instance_id:
|
||||
case nir_intrinsic_ssbo_atomic:
|
||||
case nir_intrinsic_ssbo_atomic_swap:
|
||||
|
@@ -705,6 +705,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
||||
case nir_intrinsic_write_invocation_amd:
|
||||
case nir_intrinsic_mbcnt_amd:
|
||||
case nir_intrinsic_lane_permute_16_amd:
|
||||
case nir_intrinsic_dpp16_shift_amd:
|
||||
case nir_intrinsic_elect:
|
||||
case nir_intrinsic_elect_any_ir3:
|
||||
case nir_intrinsic_load_tlb_color_brcm:
|
||||
|
@@ -545,6 +545,11 @@ intrinsic("write_invocation_amd", src_comp=[0, 0, 1], dest_comp=0, bit_sizes=src
|
||||
intrinsic("mbcnt_amd", src_comp=[1, 1], dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE])
|
||||
# Compiled to v_permlane16_b32. src = [ value, lanesel_lo, lanesel_hi ]
|
||||
intrinsic("lane_permute_16_amd", src_comp=[1, 1, 1], dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE])
|
||||
# subgroup shuffle up/down with cluster size 16.
|
||||
# base in [-15, -1]: DPP_ROW_SR
|
||||
# base in [ 1, 15]: DPP_ROW_SL, otherwise invalid.
|
||||
# Returns zero for invocations that try to read out of bounds
|
||||
intrinsic("dpp16_shift_amd", src_comp=[0], dest_comp=0, bit_sizes=src0, indices=[BASE], flags=[CAN_ELIMINATE])
|
||||
|
||||
# Basic Geometry Shader intrinsics.
|
||||
#
|
||||
|
Reference in New Issue
Block a user