From e09cfda0de500a0c99948a802023832e9659f1a1 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 27 Dec 2022 15:32:24 +0200 Subject: [PATCH] intel/fs: lower get_buffer_size like other logical sends This will also enable the use of the bindless heap. Signed-off-by: Lionel Landwerlin Reviewed-by: Kenneth Graunke Part-of: --- src/intel/compiler/brw_eu_defines.h | 9 ++++ src/intel/compiler/brw_fs.cpp | 2 - src/intel/compiler/brw_fs.h | 3 -- src/intel/compiler/brw_fs_generator.cpp | 49 ------------------- src/intel/compiler/brw_fs_nir.cpp | 6 ++- .../compiler/brw_lower_logical_sends.cpp | 39 +++++++++++++++ 6 files changed, 52 insertions(+), 56 deletions(-) diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index f40e87be456..824e54acf22 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -898,6 +898,15 @@ enum pull_uniform_constant_srcs { PULL_UNIFORM_CONSTANT_SRCS, }; +enum get_buffer_size_srcs { + /** Surface binding table index */ + GET_BUFFER_SIZE_SRC_SURFACE, + /** LOD */ + GET_BUFFER_SIZE_SRC_LOD, + + GET_BUFFER_SIZE_SRCS +}; + enum surface_logical_srcs { /** Surface binding table index */ SURFACE_LOGICAL_SRC_SURFACE, diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 85f7d881bc7..ff89c39ff69 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -258,7 +258,6 @@ fs_inst::is_control_source(unsigned arg) const case FS_OPCODE_INTERPOLATE_AT_SAMPLE: case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: - case SHADER_OPCODE_GET_BUFFER_SIZE: return arg == 1; case SHADER_OPCODE_MOV_INDIRECT: @@ -4998,7 +4997,6 @@ get_lowered_simd_width(const struct brw_compiler *compiler, return MIN2(8, inst->exec_size); case FS_OPCODE_LINTERP: - case SHADER_OPCODE_GET_BUFFER_SIZE: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: case FS_OPCODE_PACK_HALF_2x16_SPLIT: case FS_OPCODE_INTERPOLATE_AT_SAMPLE: diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 1b7b243727e..51e49e505cd 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -615,9 +615,6 @@ private: void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg surface_index, struct brw_reg sampler_index); - void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst, - struct brw_reg src, - struct brw_reg surf_index); void generate_ddx(const fs_inst *inst, struct brw_reg dst, struct brw_reg src); void generate_ddy(const fs_inst *inst, diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 56f7815fa5d..2167cbe8706 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -967,51 +967,6 @@ fs_generator::generate_linterp(fs_inst *inst, } } -void -fs_generator::generate_get_buffer_size(fs_inst *inst, - struct brw_reg dst, - struct brw_reg src, - struct brw_reg surf_index) -{ - assert(devinfo->ver >= 7); - assert(surf_index.file == BRW_IMMEDIATE_VALUE); - - uint32_t simd_mode; - int rlen = 4; - - switch (inst->exec_size) { - case 8: - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; - break; - case 16: - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; - break; - default: - unreachable("Invalid width for texture instruction"); - } - - if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) { - rlen = 8; - dst = vec16(dst); - } - - uint32_t return_format = - devinfo->ver >= 8 ? GFX8_SAMPLER_RETURN_FORMAT_32BITS : - BRW_SAMPLER_RETURN_FORMAT_SINT32; - brw_SAMPLE(p, - retype(dst, BRW_REGISTER_TYPE_UW), - inst->base_mrf, - src, - surf_index.ud, - 0, - GFX5_SAMPLER_MESSAGE_SAMPLE_RESINFO, - rlen, /* response length */ - inst->mlen, - inst->header_size > 0, - simd_mode, - return_format); -} - void fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg surface_index, @@ -2097,10 +2052,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, send_count++; break; - case SHADER_OPCODE_GET_BUFFER_SIZE: - generate_get_buffer_size(inst, dst, src[0], src[1]); - send_count++; - break; case SHADER_OPCODE_TEX: case FS_OPCODE_TXB: case SHADER_OPCODE_TXD: diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index be24207d49c..00a095c997f 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -5008,9 +5008,11 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr /* Set LOD = 0 */ ubld.MOV(src_payload, brw_imm_d(0)); - const unsigned index = ssbo_index; + fs_reg srcs[GET_BUFFER_SIZE_SRCS]; + srcs[GET_BUFFER_SIZE_SRC_SURFACE] = brw_imm_ud(ssbo_index); + srcs[GET_BUFFER_SIZE_SRC_LOD] = src_payload; fs_inst *inst = ubld.emit(SHADER_OPCODE_GET_BUFFER_SIZE, ret_payload, - src_payload, brw_imm_ud(index)); + srcs, GET_BUFFER_SIZE_SRCS); inst->header_size = 0; inst->mlen = 1; inst->size_written = 4 * REG_SIZE; diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 08a79feb34c..766b96a6d8f 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -2703,6 +2703,41 @@ lower_trace_ray_logical_send(const fs_builder &bld, fs_inst *inst) inst->src[3] = payload; } +static void +lower_get_buffer_size(const fs_builder &bld, fs_inst *inst) +{ + const intel_device_info *devinfo = bld.shader->devinfo; + assert(devinfo->ver >= 7); + /* Since we can only execute this instruction on uniform bti/surface + * handles, brw_fs_nir.cpp should already have limited this to SIMD8. + */ + assert(inst->exec_size == 8); + + fs_reg surface = inst->src[GET_BUFFER_SIZE_SRC_SURFACE]; + fs_reg lod = inst->src[GET_BUFFER_SIZE_SRC_LOD]; + + inst->opcode = SHADER_OPCODE_SEND; + inst->mlen = inst->exec_size / 8; + inst->resize_sources(3); + inst->ex_mlen = 0; + inst->ex_desc = 0; + + /* src[0] & src[1] are filled by setup_surface_descriptors() */ + inst->src[2] = lod; + + const uint32_t return_format = devinfo->ver >= 8 ? + GFX8_SAMPLER_RETURN_FORMAT_32BITS : BRW_SAMPLER_RETURN_FORMAT_SINT32; + + const uint32_t desc = brw_sampler_desc(devinfo, 0, 0, + GFX5_SAMPLER_MESSAGE_SAMPLE_RESINFO, + BRW_SAMPLER_SIMD_MODE_SIMD8, + return_format); + + inst->dst = retype(inst->dst, BRW_REGISTER_TYPE_UW); + inst->sfid = BRW_SFID_SAMPLER; + setup_surface_descriptors(bld, inst, desc, surface, fs_reg() /* surface_handle */); +} + bool fs_visitor::lower_logical_sends() { @@ -2786,6 +2821,10 @@ fs_visitor::lower_logical_sends() lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_SAMPLEINFO); break; + case SHADER_OPCODE_GET_BUFFER_SIZE: + lower_get_buffer_size(ibld, inst); + break; + case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: