From 8fabde3be4dcdec744a007c521e72664cf5fbed4 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sun, 19 Feb 2023 22:09:01 +0100 Subject: [PATCH] aco/gfx11: use dpp_row_xmask and dpp_row_share Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_builder_h.py | 18 +++++++++++++++++- src/amd/compiler/aco_instruction_selection.cpp | 6 ++++++ src/amd/compiler/aco_print_ir.cpp | 4 ++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py index 816bc33aa02..0cc869b1073 100644 --- a/src/amd/compiler/aco_builder_h.py +++ b/src/amd/compiler/aco_builder_h.py @@ -43,7 +43,9 @@ enum dpp_ctrl { dpp_row_mirror = 0x140, dpp_row_half_mirror = 0x141, dpp_row_bcast15 = 0x142, - dpp_row_bcast31 = 0x143 + dpp_row_bcast31 = 0x143, + _dpp_row_share = 0x150, + _dpp_row_xmask = 0x160, }; inline dpp_ctrl @@ -74,6 +76,20 @@ dpp_row_rr(unsigned amount) return (dpp_ctrl)(((unsigned) _dpp_row_rr) | amount); } +inline dpp_ctrl +dpp_row_share(unsigned lane) +{ + assert(lane < 16); + return (dpp_ctrl)(((unsigned) _dpp_row_share) | lane); +} + +inline dpp_ctrl +dpp_row_xmask(unsigned mask) +{ + assert(mask < 16); + return (dpp_ctrl)(((unsigned) _dpp_row_xmask) | mask); +} + inline unsigned ds_pattern_bitmode(unsigned and_mask, unsigned or_mask, unsigned xor_mask) { diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 1de943bde4f..9a47f3d573f 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -264,6 +264,12 @@ emit_masked_swizzle(isel_context* ctx, Builder& bld, Temp src, unsigned mask) dpp_ctrl = dpp_row_mirror; } else if (and_mask == 0x1f && !or_mask && xor_mask == 0x7) { dpp_ctrl = dpp_row_half_mirror; + } else if (ctx->options->gfx_level >= GFX11 && and_mask == 0x10 && or_mask < 0x10 && + xor_mask < 0x10) { + dpp_ctrl = dpp_row_share(or_mask ^ xor_mask); + } else if (ctx->options->gfx_level >= GFX11 && and_mask == 0x1f && !or_mask && + xor_mask < 0x10) { + dpp_ctrl = dpp_row_xmask(xor_mask); } else if (ctx->options->gfx_level >= GFX10 && (and_mask & 0x18) == 0x18 && or_mask < 8 && xor_mask < 8) { Builder::Result ret = bld.vop1_dpp8(aco_opcode::v_mov_b32, bld.def(v1), src); diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp index b0f4ae471c1..c1f5a11bf06 100644 --- a/src/amd/compiler/aco_print_ir.cpp +++ b/src/amd/compiler/aco_print_ir.cpp @@ -667,6 +667,10 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins fprintf(output, " row_bcast:15"); } else if (dpp.dpp_ctrl == dpp_row_bcast31) { fprintf(output, " row_bcast:31"); + } else if (dpp.dpp_ctrl >= dpp_row_share(0) && dpp.dpp_ctrl <= dpp_row_share(15)) { + fprintf(output, " row_share:%d", dpp.dpp_ctrl & 0xf); + } else if (dpp.dpp_ctrl >= dpp_row_xmask(0) && dpp.dpp_ctrl <= dpp_row_xmask(15)) { + fprintf(output, " row_xmask:%d", dpp.dpp_ctrl & 0xf); } else { fprintf(output, " dpp_ctrl:0x%.3x", dpp.dpp_ctrl); }