diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 563c6055992..fa7eafda0a9 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4184,10 +4184,13 @@ fs_visitor::swizzle_nir_scratch_addr(const brw::fs_builder &bld, } static unsigned -choose_oword_block_size_dwords(unsigned dwords) +choose_oword_block_size_dwords(const struct intel_device_info *devinfo, + unsigned dwords) { unsigned block; - if (dwords >= 32) { + if (devinfo->has_lsc && dwords >= 64) { + block = 64; + } else if (dwords >= 32) { block = 32; } else if (dwords >= 16) { block = 16; @@ -5670,7 +5673,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr while (loaded < total) { const unsigned block = - choose_oword_block_size_dwords(total - loaded); + choose_oword_block_size_dwords(devinfo, total - loaded); const unsigned block_bytes = block * 4; const fs_builder &ubld = block == 8 ? ubld8 : ubld16; @@ -5707,7 +5710,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr while (written < total) { const unsigned block = - choose_oword_block_size_dwords(total - written); + choose_oword_block_size_dwords(devinfo, total - written); fs_reg srcs[A64_LOGICAL_NUM_SRCS]; srcs[A64_LOGICAL_ADDRESS] = address; @@ -5751,7 +5754,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr while (loaded < total) { const unsigned block = - choose_oword_block_size_dwords(total - loaded); + choose_oword_block_size_dwords(devinfo, total - loaded); const unsigned block_bytes = block * 4; srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(block); @@ -5793,7 +5796,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr while (written < total) { const unsigned block = - choose_oword_block_size_dwords(total - written); + choose_oword_block_size_dwords(devinfo, total - written); srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(block); srcs[SURFACE_LOGICAL_SRC_DATA] = diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 9f460bb9294..cfcf9a0e8a5 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -1829,6 +1829,79 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst) inst->src[3] = payload2; } +static void +lower_lsc_block_logical_send(const fs_builder &bld, fs_inst *inst) +{ + const intel_device_info *devinfo = bld.shader->devinfo; + assert(devinfo->has_lsc); + + /* Get the logical send arguments. */ + const fs_reg &addr = inst->src[SURFACE_LOGICAL_SRC_ADDRESS]; + const fs_reg &src = inst->src[SURFACE_LOGICAL_SRC_DATA]; + const fs_reg &surface = inst->src[SURFACE_LOGICAL_SRC_SURFACE]; + const fs_reg &surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE]; + const fs_reg &arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG]; + assert(arg.file == IMM); + assert(inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS].file == BAD_FILE); + assert(inst->src[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK].file == BAD_FILE); + + const bool is_stateless = + surface.file == IMM && (surface.ud == BRW_BTI_STATELESS || + surface.ud == GFX8_BTI_STATELESS_NON_COHERENT); + + const bool has_side_effects = inst->has_side_effects(); + + const bool write = inst->opcode == SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL; + + fs_builder ubld = bld.exec_all().group(1, 0); + fs_reg ex_desc = ubld.vgrf(BRW_REGISTER_TYPE_UD); + if (is_stateless) { + ubld.AND(ex_desc, retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD), + brw_imm_ud(INTEL_MASK(31, 10))); + } else { + ubld.MOV(ex_desc, surface_handle); + } + + fs_reg data; + if (write) { + const unsigned src_sz = inst->components_read(SURFACE_LOGICAL_SRC_DATA); + data = retype(bld.move_to_vgrf(src, src_sz), BRW_REGISTER_TYPE_UD); + } + + inst->opcode = SHADER_OPCODE_SEND; + if (surface.file == IMM && surface.ud == GFX7_BTI_SLM) + inst->sfid = GFX12_SFID_SLM; + else + inst->sfid = GFX12_SFID_UGM; + inst->desc = lsc_msg_desc(devinfo, + write ? LSC_OP_STORE : LSC_OP_LOAD, + 1 /* exec_size */, + inst->sfid == GFX12_SFID_SLM ? + LSC_ADDR_SURFTYPE_FLAT : LSC_ADDR_SURFTYPE_BSS, + LSC_ADDR_SIZE_A32, + 1 /* num_coordinates */, + LSC_DATA_SIZE_D32, + arg.ud /* num_channels */, + true /* transpose */, + LSC_CACHE_LOAD_L1STATE_L3MOCS, + !write /* has_dest */); + + inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc); + inst->size_written = lsc_msg_desc_dest_len(devinfo, inst->desc) * REG_SIZE; + inst->exec_size = 1; + inst->ex_mlen = write ? DIV_ROUND_UP(arg.ud, 8) : 0; + inst->header_size = 0; + inst->send_has_side_effects = has_side_effects; + inst->send_is_volatile = !has_side_effects; + + inst->resize_sources(4); + + inst->src[0] = brw_imm_ud(0); /* desc */ + inst->src[1] = ex_desc; /* ex_desc */ + inst->src[2] = addr; /* payload */ + inst->src[3] = data; /* payload2 */ +} + static void lower_surface_block_logical_send(const fs_builder &bld, fs_inst *inst) { @@ -2031,6 +2104,36 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst) !inst->dst.is_null()); break; } + case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL: + case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: + inst->exec_size = 1; + inst->desc = lsc_msg_desc(devinfo, + LSC_OP_LOAD, + 1 /* exec_size */, + LSC_ADDR_SURFTYPE_FLAT, + LSC_ADDR_SIZE_A64, + 1 /* num_coordinates */, + LSC_DATA_SIZE_D32, + arg /* num_channels */, + true /* transpose */, + LSC_CACHE_LOAD_L1STATE_L3MOCS, + true /* has_dest */); + break; + case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL: + inst->exec_size = 1; + inst->desc = lsc_msg_desc(devinfo, + LSC_OP_STORE, + 1 /* exec_size */, + LSC_ADDR_SURFTYPE_FLAT, + LSC_ADDR_SIZE_A64, + 1 /* num_coordinates */, + LSC_DATA_SIZE_D32, + arg /* num_channels */, + true /* transpose */, + LSC_CACHE_LOAD_L1STATE_L3MOCS, + false /* has_dest */); + + break; default: unreachable("Unknown A64 logical instruction"); } @@ -2662,6 +2765,10 @@ fs_visitor::lower_logical_sends() case SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL: + if (devinfo->has_lsc) { + lower_lsc_block_logical_send(ibld, inst); + break; + } lower_surface_block_logical_send(ibld, inst); break; @@ -2675,13 +2782,13 @@ fs_visitor::lower_logical_sends() case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL: case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL: case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT64_LOGICAL: + case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL: + case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: + case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL: if (devinfo->has_lsc) { lower_lsc_a64_logical_send(ibld, inst); break; } - case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL: - case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: - case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL: lower_a64_logical_send(ibld, inst); break;