intel/fs: enable UBO accesses through bindless heap
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21645>
This commit is contained in:

committed by
Marge Bot

parent
068bf1378d
commit
ad9bc1ffb5
@@ -890,6 +890,8 @@ enum tex_logical_srcs {
|
|||||||
enum pull_uniform_constant_srcs {
|
enum pull_uniform_constant_srcs {
|
||||||
/** Surface binding table index */
|
/** Surface binding table index */
|
||||||
PULL_UNIFORM_CONSTANT_SRC_SURFACE,
|
PULL_UNIFORM_CONSTANT_SRC_SURFACE,
|
||||||
|
/** Surface bindless handle */
|
||||||
|
PULL_UNIFORM_CONSTANT_SRC_SURFACE_HANDLE,
|
||||||
/** Surface offset */
|
/** Surface offset */
|
||||||
PULL_UNIFORM_CONSTANT_SRC_OFFSET,
|
PULL_UNIFORM_CONSTANT_SRC_OFFSET,
|
||||||
/** Pull size */
|
/** Pull size */
|
||||||
@@ -898,6 +900,19 @@ enum pull_uniform_constant_srcs {
|
|||||||
PULL_UNIFORM_CONSTANT_SRCS,
|
PULL_UNIFORM_CONSTANT_SRCS,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum pull_varying_constant_srcs {
|
||||||
|
/** Surface binding table index */
|
||||||
|
PULL_VARYING_CONSTANT_SRC_SURFACE,
|
||||||
|
/** Surface bindless handle */
|
||||||
|
PULL_VARYING_CONSTANT_SRC_SURFACE_HANDLE,
|
||||||
|
/** Surface offset */
|
||||||
|
PULL_VARYING_CONSTANT_SRC_OFFSET,
|
||||||
|
/** Pull alignment */
|
||||||
|
PULL_VARYING_CONSTANT_SRC_ALIGNMENT,
|
||||||
|
|
||||||
|
PULL_VARYING_CONSTANT_SRCS,
|
||||||
|
};
|
||||||
|
|
||||||
enum get_buffer_size_srcs {
|
enum get_buffer_size_srcs {
|
||||||
/** Surface binding table index */
|
/** Surface binding table index */
|
||||||
GET_BUFFER_SIZE_SRC_SURFACE,
|
GET_BUFFER_SIZE_SRC_SURFACE,
|
||||||
|
@@ -168,7 +168,8 @@ fs_inst::resize_sources(uint8_t num_sources)
|
|||||||
void
|
void
|
||||||
fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
|
fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
|
||||||
const fs_reg &dst,
|
const fs_reg &dst,
|
||||||
const fs_reg &surf_index,
|
const fs_reg &surface,
|
||||||
|
const fs_reg &surface_handle,
|
||||||
const fs_reg &varying_offset,
|
const fs_reg &varying_offset,
|
||||||
uint32_t const_offset,
|
uint32_t const_offset,
|
||||||
uint8_t alignment)
|
uint8_t alignment)
|
||||||
@@ -194,9 +195,15 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
|
|||||||
* result.
|
* result.
|
||||||
*/
|
*/
|
||||||
fs_reg vec4_result = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
|
fs_reg vec4_result = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
|
||||||
|
|
||||||
|
fs_reg srcs[PULL_VARYING_CONSTANT_SRCS];
|
||||||
|
srcs[PULL_VARYING_CONSTANT_SRC_SURFACE] = surface;
|
||||||
|
srcs[PULL_VARYING_CONSTANT_SRC_SURFACE_HANDLE] = surface_handle;
|
||||||
|
srcs[PULL_VARYING_CONSTANT_SRC_OFFSET] = vec4_offset;
|
||||||
|
srcs[PULL_VARYING_CONSTANT_SRC_ALIGNMENT] = brw_imm_ud(alignment);
|
||||||
|
|
||||||
fs_inst *inst = bld.emit(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
|
fs_inst *inst = bld.emit(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
|
||||||
vec4_result, surf_index, vec4_offset,
|
vec4_result, srcs, PULL_VARYING_CONSTANT_SRCS);
|
||||||
brw_imm_ud(alignment));
|
|
||||||
inst->size_written = 4 * vec4_result.component_size(inst->exec_size);
|
inst->size_written = 4 * vec4_result.component_size(inst->exec_size);
|
||||||
|
|
||||||
shuffle_from_32bit_read(bld, dst, vec4_result,
|
shuffle_from_32bit_read(bld, dst, vec4_result,
|
||||||
@@ -2513,6 +2520,7 @@ fs_visitor::lower_constant_loads()
|
|||||||
|
|
||||||
VARYING_PULL_CONSTANT_LOAD(ibld, inst->dst,
|
VARYING_PULL_CONSTANT_LOAD(ibld, inst->dst,
|
||||||
brw_imm_ud(index),
|
brw_imm_ud(index),
|
||||||
|
fs_reg() /* surface_handle */,
|
||||||
inst->src[1],
|
inst->src[1],
|
||||||
pull_index * 4, 4);
|
pull_index * 4, 4);
|
||||||
inst->remove(block);
|
inst->remove(block);
|
||||||
|
@@ -216,7 +216,8 @@ public:
|
|||||||
|
|
||||||
void VARYING_PULL_CONSTANT_LOAD(const brw::fs_builder &bld,
|
void VARYING_PULL_CONSTANT_LOAD(const brw::fs_builder &bld,
|
||||||
const fs_reg &dst,
|
const fs_reg &dst,
|
||||||
const fs_reg &surf_index,
|
const fs_reg &surface,
|
||||||
|
const fs_reg &surface_handle,
|
||||||
const fs_reg &varying_offset,
|
const fs_reg &varying_offset,
|
||||||
uint32_t const_offset,
|
uint32_t const_offset,
|
||||||
uint8_t alignment);
|
uint8_t alignment);
|
||||||
|
@@ -4643,26 +4643,20 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||||||
}
|
}
|
||||||
|
|
||||||
case nir_intrinsic_load_ubo: {
|
case nir_intrinsic_load_ubo: {
|
||||||
fs_reg surf_index;
|
fs_reg surface, surface_handle;
|
||||||
if (nir_src_is_const(instr->src[0])) {
|
|
||||||
const unsigned index = nir_src_as_uint(instr->src[0]);
|
if (get_nir_src_bindless(instr->src[0]))
|
||||||
surf_index = brw_imm_ud(index);
|
surface_handle = get_nir_buffer_intrinsic_index(bld, instr);
|
||||||
} else {
|
else
|
||||||
/* The block index is not a constant. Evaluate the index expression
|
surface = get_nir_buffer_intrinsic_index(bld, instr);
|
||||||
* per-channel and add the base UBO index; we have to select a value
|
|
||||||
* from any live channel.
|
|
||||||
*/
|
|
||||||
surf_index = vgrf(glsl_type::uint_type);
|
|
||||||
bld.MOV(surf_index, get_nir_src(instr->src[0]));
|
|
||||||
surf_index = bld.emit_uniformize(surf_index);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!nir_src_is_const(instr->src[1])) {
|
if (!nir_src_is_const(instr->src[1])) {
|
||||||
fs_reg base_offset = retype(get_nir_src(instr->src[1]),
|
fs_reg base_offset = retype(get_nir_src(instr->src[1]),
|
||||||
BRW_REGISTER_TYPE_UD);
|
BRW_REGISTER_TYPE_UD);
|
||||||
|
|
||||||
for (int i = 0; i < instr->num_components; i++)
|
for (int i = 0; i < instr->num_components; i++)
|
||||||
VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index,
|
VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i),
|
||||||
|
surface, surface_handle,
|
||||||
base_offset, i * type_sz(dest.type),
|
base_offset, i * type_sz(dest.type),
|
||||||
nir_dest_bit_size(instr->dest) / 8);
|
nir_dest_bit_size(instr->dest) / 8);
|
||||||
|
|
||||||
@@ -4717,9 +4711,10 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||||||
(block_sz - base % block_sz) / type_size);
|
(block_sz - base % block_sz) / type_size);
|
||||||
|
|
||||||
fs_reg srcs[PULL_UNIFORM_CONSTANT_SRCS];
|
fs_reg srcs[PULL_UNIFORM_CONSTANT_SRCS];
|
||||||
srcs[PULL_UNIFORM_CONSTANT_SRC_SURFACE] = surf_index;
|
srcs[PULL_UNIFORM_CONSTANT_SRC_SURFACE] = surface;
|
||||||
srcs[PULL_UNIFORM_CONSTANT_SRC_OFFSET] = brw_imm_ud(base & ~(block_sz - 1));
|
srcs[PULL_UNIFORM_CONSTANT_SRC_SURFACE_HANDLE] = surface_handle;
|
||||||
srcs[PULL_UNIFORM_CONSTANT_SRC_SIZE] = brw_imm_ud(block_sz);
|
srcs[PULL_UNIFORM_CONSTANT_SRC_OFFSET] = brw_imm_ud(base & ~(block_sz - 1));
|
||||||
|
srcs[PULL_UNIFORM_CONSTANT_SRC_SIZE] = brw_imm_ud(block_sz);
|
||||||
|
|
||||||
ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts,
|
ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts,
|
||||||
srcs, PULL_UNIFORM_CONSTANT_SRCS);
|
srcs, PULL_UNIFORM_CONSTANT_SRCS);
|
||||||
|
@@ -2301,16 +2301,23 @@ lower_lsc_varying_pull_constant_logical_send(const fs_builder &bld,
|
|||||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
ASSERTED const brw_compiler *compiler = bld.shader->compiler;
|
ASSERTED const brw_compiler *compiler = bld.shader->compiler;
|
||||||
|
|
||||||
fs_reg surface = inst->src[0];
|
fs_reg surface = inst->src[PULL_VARYING_CONSTANT_SRC_SURFACE];
|
||||||
|
fs_reg surface_handle = inst->src[PULL_VARYING_CONSTANT_SRC_SURFACE_HANDLE];
|
||||||
|
fs_reg offset_B = inst->src[PULL_VARYING_CONSTANT_SRC_OFFSET];
|
||||||
|
fs_reg alignment_B = inst->src[PULL_VARYING_CONSTANT_SRC_ALIGNMENT];
|
||||||
|
|
||||||
/* We are switching the instruction from an ALU-like instruction to a
|
/* We are switching the instruction from an ALU-like instruction to a
|
||||||
* send-from-grf instruction. Since sends can't handle strides or
|
* send-from-grf instruction. Since sends can't handle strides or
|
||||||
* source modifiers, we have to make a copy of the offset source.
|
* source modifiers, we have to make a copy of the offset source.
|
||||||
*/
|
*/
|
||||||
fs_reg ubo_offset = bld.move_to_vgrf(inst->src[1], 1);
|
fs_reg ubo_offset = bld.move_to_vgrf(offset_B, 1);
|
||||||
|
|
||||||
assert(inst->src[2].file == BRW_IMMEDIATE_VALUE);
|
enum lsc_addr_surface_type surf_type =
|
||||||
unsigned alignment = inst->src[2].ud;
|
surface_handle.file == BAD_FILE ?
|
||||||
|
LSC_ADDR_SURFTYPE_BTI : LSC_ADDR_SURFTYPE_BSS;
|
||||||
|
|
||||||
|
assert(alignment_B.file == BRW_IMMEDIATE_VALUE);
|
||||||
|
unsigned alignment = alignment_B.ud;
|
||||||
|
|
||||||
inst->opcode = SHADER_OPCODE_SEND;
|
inst->opcode = SHADER_OPCODE_SEND;
|
||||||
inst->sfid = GFX12_SFID_UGM;
|
inst->sfid = GFX12_SFID_UGM;
|
||||||
@@ -2318,31 +2325,39 @@ lower_lsc_varying_pull_constant_logical_send(const fs_builder &bld,
|
|||||||
|
|
||||||
assert(!compiler->indirect_ubos_use_sampler);
|
assert(!compiler->indirect_ubos_use_sampler);
|
||||||
|
|
||||||
|
inst->src[0] = brw_imm_ud(0);
|
||||||
inst->src[2] = ubo_offset; /* payload */
|
inst->src[2] = ubo_offset; /* payload */
|
||||||
|
|
||||||
if (alignment >= 4) {
|
if (alignment >= 4) {
|
||||||
inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD_CMASK, inst->exec_size,
|
inst->desc =
|
||||||
LSC_ADDR_SURFTYPE_BTI, LSC_ADDR_SIZE_A32,
|
lsc_msg_desc(devinfo, LSC_OP_LOAD_CMASK, inst->exec_size,
|
||||||
1 /* num_coordinates */,
|
surf_type, LSC_ADDR_SIZE_A32,
|
||||||
LSC_DATA_SIZE_D32,
|
1 /* num_coordinates */,
|
||||||
4 /* num_channels */,
|
LSC_DATA_SIZE_D32,
|
||||||
false /* transpose */,
|
4 /* num_channels */,
|
||||||
LSC_CACHE_LOAD_L1STATE_L3MOCS,
|
false /* transpose */,
|
||||||
true /* has_dest */);
|
LSC_CACHE_LOAD_L1STATE_L3MOCS,
|
||||||
|
true /* has_dest */);
|
||||||
inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);
|
inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);
|
||||||
|
|
||||||
setup_lsc_surface_descriptors(bld, inst, inst->desc, surface);
|
setup_lsc_surface_descriptors(bld, inst, inst->desc,
|
||||||
|
surface.file != BAD_FILE ?
|
||||||
|
surface : surface_handle);
|
||||||
} else {
|
} else {
|
||||||
inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD, inst->exec_size,
|
inst->desc =
|
||||||
LSC_ADDR_SURFTYPE_BTI, LSC_ADDR_SIZE_A32,
|
lsc_msg_desc(devinfo, LSC_OP_LOAD, inst->exec_size,
|
||||||
1 /* num_coordinates */,
|
surf_type, LSC_ADDR_SIZE_A32,
|
||||||
LSC_DATA_SIZE_D32,
|
1 /* num_coordinates */,
|
||||||
1 /* num_channels */,
|
LSC_DATA_SIZE_D32,
|
||||||
false /* transpose */,
|
1 /* num_channels */,
|
||||||
LSC_CACHE_LOAD_L1STATE_L3MOCS,
|
false /* transpose */,
|
||||||
true /* has_dest */);
|
LSC_CACHE_LOAD_L1STATE_L3MOCS,
|
||||||
|
true /* has_dest */);
|
||||||
inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);
|
inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);
|
||||||
|
|
||||||
setup_lsc_surface_descriptors(bld, inst, inst->desc, surface);
|
setup_lsc_surface_descriptors(bld, inst, inst->desc,
|
||||||
|
surface.file != BAD_FILE ?
|
||||||
|
surface : surface_handle);
|
||||||
|
|
||||||
/* The byte scattered messages can only read one dword at a time so
|
/* The byte scattered messages can only read one dword at a time so
|
||||||
* we have to duplicate the message 4 times to read the full vec4.
|
* we have to duplicate the message 4 times to read the full vec4.
|
||||||
@@ -2375,55 +2390,56 @@ lower_varying_pull_constant_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||||||
const brw_compiler *compiler = bld.shader->compiler;
|
const brw_compiler *compiler = bld.shader->compiler;
|
||||||
|
|
||||||
if (devinfo->ver >= 7) {
|
if (devinfo->ver >= 7) {
|
||||||
fs_reg index = inst->src[0];
|
fs_reg surface = inst->src[PULL_VARYING_CONSTANT_SRC_SURFACE];
|
||||||
|
fs_reg surface_handle = inst->src[PULL_VARYING_CONSTANT_SRC_SURFACE_HANDLE];
|
||||||
|
fs_reg offset_B = inst->src[PULL_VARYING_CONSTANT_SRC_OFFSET];
|
||||||
|
|
||||||
/* We are switching the instruction from an ALU-like instruction to a
|
/* We are switching the instruction from an ALU-like instruction to a
|
||||||
* send-from-grf instruction. Since sends can't handle strides or
|
* send-from-grf instruction. Since sends can't handle strides or
|
||||||
* source modifiers, we have to make a copy of the offset source.
|
* source modifiers, we have to make a copy of the offset source.
|
||||||
*/
|
*/
|
||||||
fs_reg ubo_offset = bld.vgrf(BRW_REGISTER_TYPE_UD);
|
fs_reg ubo_offset = bld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||||
bld.MOV(ubo_offset, inst->src[1]);
|
bld.MOV(ubo_offset, offset_B);
|
||||||
|
|
||||||
assert(inst->src[2].file == BRW_IMMEDIATE_VALUE);
|
assert(inst->src[PULL_VARYING_CONSTANT_SRC_ALIGNMENT].file == BRW_IMMEDIATE_VALUE);
|
||||||
unsigned alignment = inst->src[2].ud;
|
unsigned alignment = inst->src[PULL_VARYING_CONSTANT_SRC_ALIGNMENT].ud;
|
||||||
|
|
||||||
inst->opcode = SHADER_OPCODE_SEND;
|
inst->opcode = SHADER_OPCODE_SEND;
|
||||||
inst->mlen = inst->exec_size / 8;
|
inst->mlen = inst->exec_size / 8;
|
||||||
inst->resize_sources(3);
|
inst->resize_sources(3);
|
||||||
|
|
||||||
if (index.file == IMM) {
|
/* src[0] & src[1] are filled by setup_surface_descriptors() */
|
||||||
inst->desc = index.ud & 0xff;
|
|
||||||
inst->src[0] = brw_imm_ud(0);
|
|
||||||
} else {
|
|
||||||
inst->desc = 0;
|
|
||||||
const fs_builder ubld = bld.exec_all().group(1, 0);
|
|
||||||
fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD);
|
|
||||||
ubld.AND(tmp, index, brw_imm_ud(0xff));
|
|
||||||
inst->src[0] = component(tmp, 0);
|
|
||||||
}
|
|
||||||
inst->src[1] = brw_imm_ud(0); /* ex_desc */
|
|
||||||
inst->src[2] = ubo_offset; /* payload */
|
inst->src[2] = ubo_offset; /* payload */
|
||||||
|
|
||||||
if (compiler->indirect_ubos_use_sampler) {
|
if (compiler->indirect_ubos_use_sampler) {
|
||||||
const unsigned simd_mode =
|
const unsigned simd_mode =
|
||||||
inst->exec_size <= 8 ? BRW_SAMPLER_SIMD_MODE_SIMD8 :
|
inst->exec_size <= 8 ? BRW_SAMPLER_SIMD_MODE_SIMD8 :
|
||||||
BRW_SAMPLER_SIMD_MODE_SIMD16;
|
BRW_SAMPLER_SIMD_MODE_SIMD16;
|
||||||
|
const uint32_t desc = brw_sampler_desc(devinfo, 0, 0,
|
||||||
|
GFX5_SAMPLER_MESSAGE_SAMPLE_LD,
|
||||||
|
simd_mode, 0);
|
||||||
|
|
||||||
inst->sfid = BRW_SFID_SAMPLER;
|
inst->sfid = BRW_SFID_SAMPLER;
|
||||||
inst->desc |= brw_sampler_desc(devinfo, 0, 0,
|
setup_surface_descriptors(bld, inst, desc, surface, surface_handle);
|
||||||
GFX5_SAMPLER_MESSAGE_SAMPLE_LD,
|
|
||||||
simd_mode, 0);
|
|
||||||
} else if (alignment >= 4) {
|
} else if (alignment >= 4) {
|
||||||
|
const uint32_t desc =
|
||||||
|
brw_dp_untyped_surface_rw_desc(devinfo, inst->exec_size,
|
||||||
|
4, /* num_channels */
|
||||||
|
false /* write */);
|
||||||
|
|
||||||
inst->sfid = (devinfo->verx10 >= 75 ?
|
inst->sfid = (devinfo->verx10 >= 75 ?
|
||||||
HSW_SFID_DATAPORT_DATA_CACHE_1 :
|
HSW_SFID_DATAPORT_DATA_CACHE_1 :
|
||||||
GFX7_SFID_DATAPORT_DATA_CACHE);
|
GFX7_SFID_DATAPORT_DATA_CACHE);
|
||||||
inst->desc |= brw_dp_untyped_surface_rw_desc(devinfo, inst->exec_size,
|
setup_surface_descriptors(bld, inst, desc, surface, surface_handle);
|
||||||
4, /* num_channels */
|
|
||||||
false /* write */);
|
|
||||||
} else {
|
} else {
|
||||||
|
const uint32_t desc =
|
||||||
|
brw_dp_byte_scattered_rw_desc(devinfo, inst->exec_size,
|
||||||
|
32, /* bit_size */
|
||||||
|
false /* write */);
|
||||||
|
|
||||||
inst->sfid = GFX7_SFID_DATAPORT_DATA_CACHE;
|
inst->sfid = GFX7_SFID_DATAPORT_DATA_CACHE;
|
||||||
inst->desc |= brw_dp_byte_scattered_rw_desc(devinfo, inst->exec_size,
|
setup_surface_descriptors(bld, inst, desc, surface, surface_handle);
|
||||||
32, /* bit_size */
|
|
||||||
false /* write */);
|
|
||||||
/* The byte scattered messages can only read one dword at a time so
|
/* The byte scattered messages can only read one dword at a time so
|
||||||
* we have to duplicate the message 4 times to read the full vec4.
|
* we have to duplicate the message 4 times to read the full vec4.
|
||||||
* Hopefully, dead code will clean up the mess if some of them aren't
|
* Hopefully, dead code will clean up the mess if some of them aren't
|
||||||
@@ -2447,16 +2463,22 @@ lower_varying_pull_constant_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
fs_reg surface = inst->src[PULL_VARYING_CONSTANT_SRC_SURFACE];
|
||||||
|
fs_reg offset = inst->src[PULL_VARYING_CONSTANT_SRC_OFFSET];
|
||||||
|
assert(inst->src[PULL_VARYING_CONSTANT_SRC_SURFACE_HANDLE].file == BAD_FILE);
|
||||||
|
|
||||||
const fs_reg payload(MRF, FIRST_PULL_LOAD_MRF(devinfo->ver),
|
const fs_reg payload(MRF, FIRST_PULL_LOAD_MRF(devinfo->ver),
|
||||||
BRW_REGISTER_TYPE_UD);
|
BRW_REGISTER_TYPE_UD);
|
||||||
|
|
||||||
bld.MOV(byte_offset(payload, REG_SIZE), inst->src[1]);
|
bld.MOV(byte_offset(payload, REG_SIZE), offset);
|
||||||
|
|
||||||
inst->opcode = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4;
|
inst->opcode = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4;
|
||||||
inst->resize_sources(1);
|
|
||||||
inst->base_mrf = payload.nr;
|
inst->base_mrf = payload.nr;
|
||||||
inst->header_size = 1;
|
inst->header_size = 1;
|
||||||
inst->mlen = 1 + inst->exec_size / 8;
|
inst->mlen = 1 + inst->exec_size / 8;
|
||||||
|
|
||||||
|
inst->resize_sources(1);
|
||||||
|
inst->src[0] = surface;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2965,8 +2987,10 @@ fs_visitor::lower_uniform_pull_constant_loads()
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
const fs_reg surface = inst->src[PULL_UNIFORM_CONSTANT_SRC_SURFACE];
|
const fs_reg surface = inst->src[PULL_UNIFORM_CONSTANT_SRC_SURFACE];
|
||||||
|
const fs_reg surface_handle = inst->src[PULL_UNIFORM_CONSTANT_SRC_SURFACE_HANDLE];
|
||||||
const fs_reg offset_B = inst->src[PULL_UNIFORM_CONSTANT_SRC_OFFSET];
|
const fs_reg offset_B = inst->src[PULL_UNIFORM_CONSTANT_SRC_OFFSET];
|
||||||
const fs_reg size_B = inst->src[PULL_UNIFORM_CONSTANT_SRC_SIZE];
|
const fs_reg size_B = inst->src[PULL_UNIFORM_CONSTANT_SRC_SIZE];
|
||||||
|
assert(surface.file == BAD_FILE || surface_handle.file == BAD_FILE);
|
||||||
assert(offset_B.file == IMM);
|
assert(offset_B.file == IMM);
|
||||||
assert(size_B.file == IMM);
|
assert(size_B.file == IMM);
|
||||||
|
|
||||||
@@ -2980,7 +3004,9 @@ fs_visitor::lower_uniform_pull_constant_loads()
|
|||||||
inst->sfid = GFX12_SFID_UGM;
|
inst->sfid = GFX12_SFID_UGM;
|
||||||
inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD,
|
inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD,
|
||||||
1 /* simd_size */,
|
1 /* simd_size */,
|
||||||
LSC_ADDR_SURFTYPE_BTI,
|
surface_handle.file == BAD_FILE ?
|
||||||
|
LSC_ADDR_SURFTYPE_BTI :
|
||||||
|
LSC_ADDR_SURFTYPE_BSS,
|
||||||
LSC_ADDR_SIZE_A32,
|
LSC_ADDR_SIZE_A32,
|
||||||
1 /* num_coordinates */,
|
1 /* num_coordinates */,
|
||||||
LSC_DATA_SIZE_D32,
|
LSC_DATA_SIZE_D32,
|
||||||
@@ -3001,7 +3027,9 @@ fs_visitor::lower_uniform_pull_constant_loads()
|
|||||||
/* Finally, the payload */
|
/* Finally, the payload */
|
||||||
|
|
||||||
inst->resize_sources(3);
|
inst->resize_sources(3);
|
||||||
setup_lsc_surface_descriptors(ubld, inst, inst->desc, surface);
|
setup_lsc_surface_descriptors(ubld, inst, inst->desc,
|
||||||
|
surface.file != BAD_FILE ?
|
||||||
|
surface : surface_handle);
|
||||||
inst->src[2] = payload;
|
inst->src[2] = payload;
|
||||||
|
|
||||||
invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
|
invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
|
||||||
@@ -3025,9 +3053,7 @@ fs_visitor::lower_uniform_pull_constant_loads()
|
|||||||
|
|
||||||
inst->resize_sources(4);
|
inst->resize_sources(4);
|
||||||
|
|
||||||
setup_surface_descriptors(ubld, inst, desc,
|
setup_surface_descriptors(ubld, inst, desc, surface, surface_handle);
|
||||||
inst->src[PULL_UNIFORM_CONSTANT_SRC_SURFACE],
|
|
||||||
fs_reg() /* surface_handle */);
|
|
||||||
|
|
||||||
inst->src[2] = header;
|
inst->src[2] = header;
|
||||||
inst->src[3] = fs_reg(); /* unused for reads */
|
inst->src[3] = fs_reg(); /* unused for reads */
|
||||||
|
Reference in New Issue
Block a user