intel/fs: lower get_buffer_size like other logical sends

This will also enable the use of the bindless heap.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21645>
This commit is contained in:
Lionel Landwerlin
2022-12-27 15:32:24 +02:00
committed by Marge Bot
parent a66944dfbc
commit e09cfda0de
6 changed files with 52 additions and 56 deletions

View File

@@ -898,6 +898,15 @@ enum pull_uniform_constant_srcs {
PULL_UNIFORM_CONSTANT_SRCS,
};
enum get_buffer_size_srcs {
/** Surface binding table index */
GET_BUFFER_SIZE_SRC_SURFACE,
/** LOD */
GET_BUFFER_SIZE_SRC_LOD,
GET_BUFFER_SIZE_SRCS
};
enum surface_logical_srcs {
/** Surface binding table index */
SURFACE_LOGICAL_SRC_SURFACE,

View File

@@ -258,7 +258,6 @@ fs_inst::is_control_source(unsigned arg) const
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
case SHADER_OPCODE_GET_BUFFER_SIZE:
return arg == 1;
case SHADER_OPCODE_MOV_INDIRECT:
@@ -4998,7 +4997,6 @@ get_lowered_simd_width(const struct brw_compiler *compiler,
return MIN2(8, inst->exec_size);
case FS_OPCODE_LINTERP:
case SHADER_OPCODE_GET_BUFFER_SIZE:
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
case FS_OPCODE_PACK_HALF_2x16_SPLIT:
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:

View File

@@ -615,9 +615,6 @@ private:
void generate_tex(fs_inst *inst, struct brw_reg dst,
struct brw_reg surface_index,
struct brw_reg sampler_index);
void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst,
struct brw_reg src,
struct brw_reg surf_index);
void generate_ddx(const fs_inst *inst,
struct brw_reg dst, struct brw_reg src);
void generate_ddy(const fs_inst *inst,

View File

@@ -967,51 +967,6 @@ fs_generator::generate_linterp(fs_inst *inst,
}
}
void
fs_generator::generate_get_buffer_size(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src,
struct brw_reg surf_index)
{
assert(devinfo->ver >= 7);
assert(surf_index.file == BRW_IMMEDIATE_VALUE);
uint32_t simd_mode;
int rlen = 4;
switch (inst->exec_size) {
case 8:
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
break;
case 16:
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
break;
default:
unreachable("Invalid width for texture instruction");
}
if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
rlen = 8;
dst = vec16(dst);
}
uint32_t return_format =
devinfo->ver >= 8 ? GFX8_SAMPLER_RETURN_FORMAT_32BITS :
BRW_SAMPLER_RETURN_FORMAT_SINT32;
brw_SAMPLE(p,
retype(dst, BRW_REGISTER_TYPE_UW),
inst->base_mrf,
src,
surf_index.ud,
0,
GFX5_SAMPLER_MESSAGE_SAMPLE_RESINFO,
rlen, /* response length */
inst->mlen,
inst->header_size > 0,
simd_mode,
return_format);
}
void
fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst,
struct brw_reg surface_index,
@@ -2097,10 +2052,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
send_count++;
break;
case SHADER_OPCODE_GET_BUFFER_SIZE:
generate_get_buffer_size(inst, dst, src[0], src[1]);
send_count++;
break;
case SHADER_OPCODE_TEX:
case FS_OPCODE_TXB:
case SHADER_OPCODE_TXD:

View File

@@ -5008,9 +5008,11 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
/* Set LOD = 0 */
ubld.MOV(src_payload, brw_imm_d(0));
const unsigned index = ssbo_index;
fs_reg srcs[GET_BUFFER_SIZE_SRCS];
srcs[GET_BUFFER_SIZE_SRC_SURFACE] = brw_imm_ud(ssbo_index);
srcs[GET_BUFFER_SIZE_SRC_LOD] = src_payload;
fs_inst *inst = ubld.emit(SHADER_OPCODE_GET_BUFFER_SIZE, ret_payload,
src_payload, brw_imm_ud(index));
srcs, GET_BUFFER_SIZE_SRCS);
inst->header_size = 0;
inst->mlen = 1;
inst->size_written = 4 * REG_SIZE;

View File

@@ -2703,6 +2703,41 @@ lower_trace_ray_logical_send(const fs_builder &bld, fs_inst *inst)
inst->src[3] = payload;
}
static void
lower_get_buffer_size(const fs_builder &bld, fs_inst *inst)
{
const intel_device_info *devinfo = bld.shader->devinfo;
assert(devinfo->ver >= 7);
/* Since we can only execute this instruction on uniform bti/surface
* handles, brw_fs_nir.cpp should already have limited this to SIMD8.
*/
assert(inst->exec_size == 8);
fs_reg surface = inst->src[GET_BUFFER_SIZE_SRC_SURFACE];
fs_reg lod = inst->src[GET_BUFFER_SIZE_SRC_LOD];
inst->opcode = SHADER_OPCODE_SEND;
inst->mlen = inst->exec_size / 8;
inst->resize_sources(3);
inst->ex_mlen = 0;
inst->ex_desc = 0;
/* src[0] & src[1] are filled by setup_surface_descriptors() */
inst->src[2] = lod;
const uint32_t return_format = devinfo->ver >= 8 ?
GFX8_SAMPLER_RETURN_FORMAT_32BITS : BRW_SAMPLER_RETURN_FORMAT_SINT32;
const uint32_t desc = brw_sampler_desc(devinfo, 0, 0,
GFX5_SAMPLER_MESSAGE_SAMPLE_RESINFO,
BRW_SAMPLER_SIMD_MODE_SIMD8,
return_format);
inst->dst = retype(inst->dst, BRW_REGISTER_TYPE_UW);
inst->sfid = BRW_SFID_SAMPLER;
setup_surface_descriptors(bld, inst, desc, surface, fs_reg() /* surface_handle */);
}
bool
fs_visitor::lower_logical_sends()
{
@@ -2786,6 +2821,10 @@ fs_visitor::lower_logical_sends()
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_SAMPLEINFO);
break;
case SHADER_OPCODE_GET_BUFFER_SIZE:
lower_get_buffer_size(ibld, inst);
break;
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: