intel/fs: enable extended bindless surface offset
Gives use 4Gb of bindless surface state on Gfx12.5+ instead of 64Mb. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21645>
This commit is contained in:

committed by
Marge Bot

parent
01fc9a06bd
commit
6d6877bf99
@@ -114,6 +114,13 @@ struct brw_compiler {
|
|||||||
*/
|
*/
|
||||||
bool indirect_ubos_use_sampler;
|
bool indirect_ubos_use_sampler;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gfx12.5+ has a bit in the SEND instruction extending the bindless
|
||||||
|
* surface offset range from 20 to 26 bits, effectively giving us 4Gb of
|
||||||
|
* bindless surface descriptors instead of 64Mb previously.
|
||||||
|
*/
|
||||||
|
bool extended_bindless_surface_offset;
|
||||||
|
|
||||||
struct nir_shader *clc_shader;
|
struct nir_shader *clc_shader;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -2469,6 +2469,8 @@ brw_disassemble_inst(FILE *file, const struct brw_isa_info *isa,
|
|||||||
if (space)
|
if (space)
|
||||||
string(file, " ");
|
string(file, " ");
|
||||||
}
|
}
|
||||||
|
if (devinfo->verx10 >= 125 && brw_inst_send_ex_bso(devinfo, inst))
|
||||||
|
format(file, " ex_bso");
|
||||||
if (brw_sfid_is_lsc(sfid)) {
|
if (brw_sfid_is_lsc(sfid)) {
|
||||||
lsc_disassemble_ex_desc(devinfo, imm_desc, imm_ex_desc, file);
|
lsc_disassemble_ex_desc(devinfo, imm_desc, imm_ex_desc, file);
|
||||||
} else {
|
} else {
|
||||||
|
@@ -1721,6 +1721,7 @@ brw_send_indirect_split_message(struct brw_codegen *p,
|
|||||||
struct brw_reg ex_desc,
|
struct brw_reg ex_desc,
|
||||||
unsigned ex_desc_imm,
|
unsigned ex_desc_imm,
|
||||||
bool ex_desc_scratch,
|
bool ex_desc_scratch,
|
||||||
|
bool ex_bso,
|
||||||
bool eot);
|
bool eot);
|
||||||
|
|
||||||
void brw_ff_sync(struct brw_codegen *p,
|
void brw_ff_sync(struct brw_codegen *p,
|
||||||
|
@@ -2739,6 +2739,7 @@ brw_send_indirect_split_message(struct brw_codegen *p,
|
|||||||
struct brw_reg ex_desc,
|
struct brw_reg ex_desc,
|
||||||
unsigned ex_desc_imm,
|
unsigned ex_desc_imm,
|
||||||
bool ex_desc_scratch,
|
bool ex_desc_scratch,
|
||||||
|
bool ex_bso,
|
||||||
bool eot)
|
bool eot)
|
||||||
{
|
{
|
||||||
const struct intel_device_info *devinfo = p->devinfo;
|
const struct intel_device_info *devinfo = p->devinfo;
|
||||||
@@ -2777,6 +2778,12 @@ brw_send_indirect_split_message(struct brw_codegen *p,
|
|||||||
!ex_desc_scratch &&
|
!ex_desc_scratch &&
|
||||||
(devinfo->ver >= 12 ||
|
(devinfo->ver >= 12 ||
|
||||||
((ex_desc.ud | ex_desc_imm) & INTEL_MASK(15, 12)) == 0)) {
|
((ex_desc.ud | ex_desc_imm) & INTEL_MASK(15, 12)) == 0)) {
|
||||||
|
/* ATS-M PRMs, Volume 2d: Command Reference: Structures,
|
||||||
|
* EU_INSTRUCTION_SEND instruction
|
||||||
|
*
|
||||||
|
* "ExBSO: Exists If: ([ExDesc.IsReg]==true)"
|
||||||
|
*/
|
||||||
|
assert(!ex_bso);
|
||||||
ex_desc.ud |= ex_desc_imm;
|
ex_desc.ud |= ex_desc_imm;
|
||||||
} else {
|
} else {
|
||||||
const struct tgl_swsb swsb = brw_get_default_swsb(p);
|
const struct tgl_swsb swsb = brw_get_default_swsb(p);
|
||||||
@@ -2799,7 +2806,7 @@ brw_send_indirect_split_message(struct brw_codegen *p,
|
|||||||
* descriptor which comes from the address register. If we don't OR
|
* descriptor which comes from the address register. If we don't OR
|
||||||
* those two bits in, the external unit may get confused and hang.
|
* those two bits in, the external unit may get confused and hang.
|
||||||
*/
|
*/
|
||||||
unsigned imm_part = ex_desc_imm | sfid | eot << 5;
|
unsigned imm_part = ex_bso ? 0 : (ex_desc_imm | sfid | eot << 5);
|
||||||
|
|
||||||
if (ex_desc_scratch) {
|
if (ex_desc_scratch) {
|
||||||
/* Or the scratch surface offset together with the immediate part of
|
/* Or the scratch surface offset together with the immediate part of
|
||||||
@@ -2852,6 +2859,10 @@ brw_send_indirect_split_message(struct brw_codegen *p,
|
|||||||
brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, ex_desc.subnr >> 2);
|
brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, ex_desc.subnr >> 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ex_bso) {
|
||||||
|
brw_inst_set_send_ex_bso(devinfo, send, true);
|
||||||
|
brw_inst_set_send_src1_len(devinfo, send, GET_BITS(ex_desc_imm, 10, 6));
|
||||||
|
}
|
||||||
brw_inst_set_sfid(devinfo, send, sfid);
|
brw_inst_set_sfid(devinfo, send, sfid);
|
||||||
brw_inst_set_eot(devinfo, send, eot);
|
brw_inst_set_eot(devinfo, send, eot);
|
||||||
}
|
}
|
||||||
|
@@ -343,7 +343,8 @@ fs_generator::generate_send(fs_inst *inst,
|
|||||||
*/
|
*/
|
||||||
brw_send_indirect_split_message(p, inst->sfid, dst, payload, payload2,
|
brw_send_indirect_split_message(p, inst->sfid, dst, payload, payload2,
|
||||||
desc, desc_imm, ex_desc, ex_desc_imm,
|
desc, desc_imm, ex_desc, ex_desc_imm,
|
||||||
inst->send_ex_desc_scratch, inst->eot);
|
inst->send_ex_desc_scratch,
|
||||||
|
inst->send_ex_bso, inst->eot);
|
||||||
if (inst->check_tdr)
|
if (inst->check_tdr)
|
||||||
brw_inst_set_opcode(p->isa, brw_last_inst,
|
brw_inst_set_opcode(p->isa, brw_last_inst,
|
||||||
devinfo->ver >= 12 ? BRW_OPCODE_SENDC : BRW_OPCODE_SENDSC);
|
devinfo->ver >= 12 ? BRW_OPCODE_SENDC : BRW_OPCODE_SENDSC);
|
||||||
|
@@ -620,8 +620,10 @@ FC(send_sel_reg32_desc, /* 4+ */ 77, 77, /* 12+ */ 48, 48, devinfo->ver
|
|||||||
FC(send_sel_reg32_ex_desc, /* 4+ */ 61, 61, /* 12+ */ 49, 49, devinfo->ver >= 9)
|
FC(send_sel_reg32_ex_desc, /* 4+ */ 61, 61, /* 12+ */ 49, 49, devinfo->ver >= 9)
|
||||||
F8(send_src0_reg_file, /* 4+ */ 38, 37, /* 8+ */ 42, 41, /* 12+ */ 66, 66)
|
F8(send_src0_reg_file, /* 4+ */ 38, 37, /* 8+ */ 42, 41, /* 12+ */ 66, 66)
|
||||||
FC(send_src1_reg_nr, /* 4+ */ 51, 44, /* 12+ */ 111, 104, devinfo->ver >= 9)
|
FC(send_src1_reg_nr, /* 4+ */ 51, 44, /* 12+ */ 111, 104, devinfo->ver >= 9)
|
||||||
|
FC(send_src1_len, /* 4+ */ -1, -1, /* 12+ */ 103, 99, devinfo->verx10 >= 125)
|
||||||
FC(send_src1_reg_file, /* 4+ */ 36, 36, /* 12+ */ 98, 98, devinfo->ver >= 9)
|
FC(send_src1_reg_file, /* 4+ */ 36, 36, /* 12+ */ 98, 98, devinfo->ver >= 9)
|
||||||
FC(send_dst_reg_file, /* 4+ */ 35, 35, /* 12+ */ 50, 50, devinfo->ver >= 9)
|
FC(send_dst_reg_file, /* 4+ */ 35, 35, /* 12+ */ 50, 50, devinfo->ver >= 9)
|
||||||
|
FC(send_ex_bso, /* 4+ */ -1, -1, /* 12+ */ 39, 39, devinfo->verx10 >= 125)
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
/* Message descriptor bits */
|
/* Message descriptor bits */
|
||||||
|
@@ -177,6 +177,9 @@ struct backend_instruction {
|
|||||||
* the scratch surface offset to build
|
* the scratch surface offset to build
|
||||||
* extended descriptor
|
* extended descriptor
|
||||||
*/
|
*/
|
||||||
|
bool send_ex_bso:1; /**< Only for SHADER_OPCODE_SEND, use extended bindless
|
||||||
|
* surface offset (26bits instead of 20bits)
|
||||||
|
*/
|
||||||
bool predicate_trivial:1; /**< The predication mask applied to this
|
bool predicate_trivial:1; /**< The predication mask applied to this
|
||||||
* instruction is guaranteed to be uniform and
|
* instruction is guaranteed to be uniform and
|
||||||
* a superset of the execution mask of the
|
* a superset of the execution mask of the
|
||||||
|
@@ -808,6 +808,7 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||||||
unsigned coord_components,
|
unsigned coord_components,
|
||||||
unsigned grad_components)
|
unsigned grad_components)
|
||||||
{
|
{
|
||||||
|
const brw_compiler *compiler = bld.shader->compiler;
|
||||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
const enum brw_reg_type payload_type =
|
const enum brw_reg_type payload_type =
|
||||||
brw_reg_type_from_bit_size(payload_type_bit_size, BRW_REGISTER_TYPE_F);
|
brw_reg_type_from_bit_size(payload_type_bit_size, BRW_REGISTER_TYPE_F);
|
||||||
@@ -1153,6 +1154,7 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||||||
* we can use the surface handle directly as the extended descriptor.
|
* we can use the surface handle directly as the extended descriptor.
|
||||||
*/
|
*/
|
||||||
inst->src[1] = retype(surface_handle, BRW_REGISTER_TYPE_UD);
|
inst->src[1] = retype(surface_handle, BRW_REGISTER_TYPE_UD);
|
||||||
|
inst->send_ex_bso = compiler->extended_bindless_surface_offset;
|
||||||
} else {
|
} else {
|
||||||
/* Immediate portion of the descriptor */
|
/* Immediate portion of the descriptor */
|
||||||
inst->desc = brw_sampler_desc(devinfo,
|
inst->desc = brw_sampler_desc(devinfo,
|
||||||
@@ -1344,6 +1346,7 @@ setup_surface_descriptors(const fs_builder &bld, fs_inst *inst, uint32_t desc,
|
|||||||
const fs_reg &surface, const fs_reg &surface_handle)
|
const fs_reg &surface, const fs_reg &surface_handle)
|
||||||
{
|
{
|
||||||
const ASSERTED intel_device_info *devinfo = bld.shader->devinfo;
|
const ASSERTED intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
|
const brw_compiler *compiler = bld.shader->compiler;
|
||||||
|
|
||||||
/* We must have exactly one of surface and surface_handle */
|
/* We must have exactly one of surface and surface_handle */
|
||||||
assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE));
|
assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE));
|
||||||
@@ -1362,6 +1365,7 @@ setup_surface_descriptors(const fs_builder &bld, fs_inst *inst, uint32_t desc,
|
|||||||
* we can use the surface handle directly as the extended descriptor.
|
* we can use the surface handle directly as the extended descriptor.
|
||||||
*/
|
*/
|
||||||
inst->src[1] = retype(surface_handle, BRW_REGISTER_TYPE_UD);
|
inst->src[1] = retype(surface_handle, BRW_REGISTER_TYPE_UD);
|
||||||
|
inst->send_ex_bso = compiler->extended_bindless_surface_offset;
|
||||||
} else {
|
} else {
|
||||||
inst->desc = desc;
|
inst->desc = desc;
|
||||||
const fs_builder ubld = bld.exec_all().group(1, 0);
|
const fs_builder ubld = bld.exec_all().group(1, 0);
|
||||||
@@ -1377,12 +1381,15 @@ setup_lsc_surface_descriptors(const fs_builder &bld, fs_inst *inst,
|
|||||||
uint32_t desc, const fs_reg &surface)
|
uint32_t desc, const fs_reg &surface)
|
||||||
{
|
{
|
||||||
const ASSERTED intel_device_info *devinfo = bld.shader->devinfo;
|
const ASSERTED intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
|
const brw_compiler *compiler = bld.shader->compiler;
|
||||||
|
|
||||||
inst->src[0] = brw_imm_ud(0); /* desc */
|
inst->src[0] = brw_imm_ud(0); /* desc */
|
||||||
|
|
||||||
enum lsc_addr_surface_type surf_type = lsc_msg_desc_addr_type(devinfo, desc);
|
enum lsc_addr_surface_type surf_type = lsc_msg_desc_addr_type(devinfo, desc);
|
||||||
switch (surf_type) {
|
switch (surf_type) {
|
||||||
case LSC_ADDR_SURFTYPE_BSS:
|
case LSC_ADDR_SURFTYPE_BSS:
|
||||||
|
inst->send_ex_bso = compiler->extended_bindless_surface_offset;
|
||||||
|
/* fall-through */
|
||||||
case LSC_ADDR_SURFTYPE_SS:
|
case LSC_ADDR_SURFTYPE_SS:
|
||||||
assert(surface.file != BAD_FILE);
|
assert(surface.file != BAD_FILE);
|
||||||
/* We assume that the driver provided the handle in the top 20 bits so
|
/* We assume that the driver provided the handle in the top 20 bits so
|
||||||
@@ -1415,6 +1422,7 @@ setup_lsc_surface_descriptors(const fs_builder &bld, fs_inst *inst,
|
|||||||
static void
|
static void
|
||||||
lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
{
|
{
|
||||||
|
const brw_compiler *compiler = bld.shader->compiler;
|
||||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
|
|
||||||
/* Get the logical send arguments. */
|
/* Get the logical send arguments. */
|
||||||
@@ -1646,6 +1654,8 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||||||
inst->header_size = header_sz;
|
inst->header_size = header_sz;
|
||||||
inst->send_has_side_effects = has_side_effects;
|
inst->send_has_side_effects = has_side_effects;
|
||||||
inst->send_is_volatile = !has_side_effects;
|
inst->send_is_volatile = !has_side_effects;
|
||||||
|
inst->send_ex_bso = surface_handle.file != BAD_FILE &&
|
||||||
|
compiler->extended_bindless_surface_offset;
|
||||||
|
|
||||||
/* Set up SFID and descriptors */
|
/* Set up SFID and descriptors */
|
||||||
inst->sfid = sfid;
|
inst->sfid = sfid;
|
||||||
@@ -1674,6 +1684,7 @@ lsc_bits_to_data_size(unsigned bit_size)
|
|||||||
static void
|
static void
|
||||||
lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
{
|
{
|
||||||
|
const brw_compiler *compiler = bld.shader->compiler;
|
||||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
assert(devinfo->has_lsc);
|
assert(devinfo->has_lsc);
|
||||||
|
|
||||||
@@ -1808,6 +1819,8 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||||||
inst->header_size = 0;
|
inst->header_size = 0;
|
||||||
inst->send_has_side_effects = has_side_effects;
|
inst->send_has_side_effects = has_side_effects;
|
||||||
inst->send_is_volatile = !has_side_effects;
|
inst->send_is_volatile = !has_side_effects;
|
||||||
|
inst->send_ex_bso = surf_type == LSC_ADDR_SURFTYPE_BSS &&
|
||||||
|
compiler->extended_bindless_surface_offset;
|
||||||
|
|
||||||
inst->resize_sources(4);
|
inst->resize_sources(4);
|
||||||
|
|
||||||
@@ -1828,6 +1841,7 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||||||
static void
|
static void
|
||||||
lower_lsc_block_logical_send(const fs_builder &bld, fs_inst *inst)
|
lower_lsc_block_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
{
|
{
|
||||||
|
const brw_compiler *compiler = bld.shader->compiler;
|
||||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
assert(devinfo->has_lsc);
|
assert(devinfo->has_lsc);
|
||||||
|
|
||||||
@@ -1893,6 +1907,8 @@ lower_lsc_block_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||||||
inst->header_size = 0;
|
inst->header_size = 0;
|
||||||
inst->send_has_side_effects = has_side_effects;
|
inst->send_has_side_effects = has_side_effects;
|
||||||
inst->send_is_volatile = !has_side_effects;
|
inst->send_is_volatile = !has_side_effects;
|
||||||
|
inst->send_ex_bso = surf_type == LSC_ADDR_SURFTYPE_BSS &&
|
||||||
|
compiler->extended_bindless_surface_offset;
|
||||||
|
|
||||||
inst->resize_sources(4);
|
inst->resize_sources(4);
|
||||||
|
|
||||||
@@ -2322,6 +2338,8 @@ lower_lsc_varying_pull_constant_logical_send(const fs_builder &bld,
|
|||||||
inst->opcode = SHADER_OPCODE_SEND;
|
inst->opcode = SHADER_OPCODE_SEND;
|
||||||
inst->sfid = GFX12_SFID_UGM;
|
inst->sfid = GFX12_SFID_UGM;
|
||||||
inst->resize_sources(3);
|
inst->resize_sources(3);
|
||||||
|
inst->send_ex_bso = surf_type == LSC_ADDR_SURFTYPE_BSS &&
|
||||||
|
compiler->extended_bindless_surface_offset;
|
||||||
|
|
||||||
assert(!compiler->indirect_ubos_use_sampler);
|
assert(!compiler->indirect_ubos_use_sampler);
|
||||||
|
|
||||||
@@ -3019,6 +3037,8 @@ fs_visitor::lower_uniform_pull_constant_loads()
|
|||||||
/* Update the original instruction. */
|
/* Update the original instruction. */
|
||||||
inst->opcode = SHADER_OPCODE_SEND;
|
inst->opcode = SHADER_OPCODE_SEND;
|
||||||
inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);
|
inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);
|
||||||
|
inst->send_ex_bso = surface_handle.file != BAD_FILE &&
|
||||||
|
compiler->extended_bindless_surface_offset;
|
||||||
inst->ex_mlen = 0;
|
inst->ex_mlen = 0;
|
||||||
inst->header_size = 0;
|
inst->header_size = 0;
|
||||||
inst->send_has_side_effects = false;
|
inst->send_has_side_effects = false;
|
||||||
|
Reference in New Issue
Block a user