intel/fs: switch register allocation spilling to use LSC on Gfx12.5+
v2: drop the hardcoded inst->mlen=1 (Rohan) v3: Move back to LOAD/STORE messages (limited to SIMD16 for LSC) v4: Also use 4 GRFs transpose loads for fills (Curro) v5: Reduce amount of needed register to build per lane offsets (Curro) Drop some now useless SIMD32 code Unify unspill code Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Francisco Jerez <currojerez@riseup.net> Reviewed-by: Rohan Garg <rohan.garg@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17555>
This commit is contained in:

committed by
Marge Bot

parent
3c6fa2703d
commit
37b3601052
@@ -1587,6 +1587,7 @@ brw_send_indirect_split_message(struct brw_codegen *p,
|
|||||||
unsigned desc_imm,
|
unsigned desc_imm,
|
||||||
struct brw_reg ex_desc,
|
struct brw_reg ex_desc,
|
||||||
unsigned ex_desc_imm,
|
unsigned ex_desc_imm,
|
||||||
|
bool ex_desc_scratch,
|
||||||
bool eot);
|
bool eot);
|
||||||
|
|
||||||
void brw_ff_sync(struct brw_codegen *p,
|
void brw_ff_sync(struct brw_codegen *p,
|
||||||
|
@@ -2746,6 +2746,7 @@ brw_send_indirect_split_message(struct brw_codegen *p,
|
|||||||
unsigned desc_imm,
|
unsigned desc_imm,
|
||||||
struct brw_reg ex_desc,
|
struct brw_reg ex_desc,
|
||||||
unsigned ex_desc_imm,
|
unsigned ex_desc_imm,
|
||||||
|
bool ex_desc_scratch,
|
||||||
bool eot)
|
bool eot)
|
||||||
{
|
{
|
||||||
const struct intel_device_info *devinfo = p->devinfo;
|
const struct intel_device_info *devinfo = p->devinfo;
|
||||||
@@ -2781,6 +2782,7 @@ brw_send_indirect_split_message(struct brw_codegen *p,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (ex_desc.file == BRW_IMMEDIATE_VALUE &&
|
if (ex_desc.file == BRW_IMMEDIATE_VALUE &&
|
||||||
|
!ex_desc_scratch &&
|
||||||
(devinfo->ver >= 12 ||
|
(devinfo->ver >= 12 ||
|
||||||
((ex_desc.ud | ex_desc_imm) & INTEL_MASK(15, 12)) == 0)) {
|
((ex_desc.ud | ex_desc_imm) & INTEL_MASK(15, 12)) == 0)) {
|
||||||
ex_desc.ud |= ex_desc_imm;
|
ex_desc.ud |= ex_desc_imm;
|
||||||
@@ -2807,7 +2809,16 @@ brw_send_indirect_split_message(struct brw_codegen *p,
|
|||||||
*/
|
*/
|
||||||
unsigned imm_part = ex_desc_imm | sfid | eot << 5;
|
unsigned imm_part = ex_desc_imm | sfid | eot << 5;
|
||||||
|
|
||||||
if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
|
if (ex_desc_scratch) {
|
||||||
|
/* Or the scratch surface offset together with the immediate part of
|
||||||
|
* the extended descriptor.
|
||||||
|
*/
|
||||||
|
assert(devinfo->verx10 >= 125);
|
||||||
|
brw_AND(p, addr,
|
||||||
|
retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
|
||||||
|
brw_imm_ud(INTEL_MASK(31, 10)));
|
||||||
|
brw_OR(p, addr, addr, brw_imm_ud(imm_part));
|
||||||
|
} else if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
|
||||||
/* ex_desc bits 15:12 don't exist in the instruction encoding prior
|
/* ex_desc bits 15:12 don't exist in the instruction encoding prior
|
||||||
* to Gfx12, so we may have fallen back to an indirect extended
|
* to Gfx12, so we may have fallen back to an indirect extended
|
||||||
* descriptor.
|
* descriptor.
|
||||||
|
@@ -335,13 +335,14 @@ fs_generator::generate_send(fs_inst *inst,
|
|||||||
uint32_t ex_desc_imm = inst->ex_desc |
|
uint32_t ex_desc_imm = inst->ex_desc |
|
||||||
brw_message_ex_desc(devinfo, inst->ex_mlen);
|
brw_message_ex_desc(devinfo, inst->ex_mlen);
|
||||||
|
|
||||||
if (ex_desc.file != BRW_IMMEDIATE_VALUE || ex_desc.ud || ex_desc_imm) {
|
if (ex_desc.file != BRW_IMMEDIATE_VALUE || ex_desc.ud || ex_desc_imm ||
|
||||||
|
inst->send_ex_desc_scratch) {
|
||||||
/* If we have any sort of extended descriptor, then we need SENDS. This
|
/* If we have any sort of extended descriptor, then we need SENDS. This
|
||||||
* also covers the dual-payload case because ex_mlen goes in ex_desc.
|
* also covers the dual-payload case because ex_mlen goes in ex_desc.
|
||||||
*/
|
*/
|
||||||
brw_send_indirect_split_message(p, inst->sfid, dst, payload, payload2,
|
brw_send_indirect_split_message(p, inst->sfid, dst, payload, payload2,
|
||||||
desc, desc_imm, ex_desc, ex_desc_imm,
|
desc, desc_imm, ex_desc, ex_desc_imm,
|
||||||
inst->eot);
|
inst->send_ex_desc_scratch, inst->eot);
|
||||||
if (inst->check_tdr)
|
if (inst->check_tdr)
|
||||||
brw_inst_set_opcode(p->isa, brw_last_inst,
|
brw_inst_set_opcode(p->isa, brw_last_inst,
|
||||||
devinfo->ver >= 12 ? BRW_OPCODE_SENDC : BRW_OPCODE_SENDSC);
|
devinfo->ver >= 12 ? BRW_OPCODE_SENDC : BRW_OPCODE_SENDSC);
|
||||||
|
@@ -348,10 +348,15 @@ private:
|
|||||||
void build_interference_graph(bool allow_spilling);
|
void build_interference_graph(bool allow_spilling);
|
||||||
void discard_interference_graph();
|
void discard_interference_graph();
|
||||||
|
|
||||||
|
fs_reg build_lane_offsets(const fs_builder &bld,
|
||||||
|
uint32_t spill_offset, int ip);
|
||||||
|
fs_reg build_single_offset(const fs_builder &bld,
|
||||||
|
uint32_t spill_offset, int ip);
|
||||||
|
|
||||||
void emit_unspill(const fs_builder &bld, struct shader_stats *stats,
|
void emit_unspill(const fs_builder &bld, struct shader_stats *stats,
|
||||||
fs_reg dst, uint32_t spill_offset, unsigned count);
|
fs_reg dst, uint32_t spill_offset, unsigned count, int ip);
|
||||||
void emit_spill(const fs_builder &bld, struct shader_stats *stats,
|
void emit_spill(const fs_builder &bld, struct shader_stats *stats,
|
||||||
fs_reg src, uint32_t spill_offset, unsigned count);
|
fs_reg src, uint32_t spill_offset, unsigned count, int ip);
|
||||||
|
|
||||||
void set_spill_costs();
|
void set_spill_costs();
|
||||||
int choose_spill_reg();
|
int choose_spill_reg();
|
||||||
@@ -448,6 +453,10 @@ namespace {
|
|||||||
unsigned
|
unsigned
|
||||||
spill_max_size(const backend_shader *s)
|
spill_max_size(const backend_shader *s)
|
||||||
{
|
{
|
||||||
|
/* LSC is limited to SIMD16 sends */
|
||||||
|
if (s->devinfo->has_lsc)
|
||||||
|
return 2;
|
||||||
|
|
||||||
/* FINISHME - On Gfx7+ it should be possible to avoid this limit
|
/* FINISHME - On Gfx7+ it should be possible to avoid this limit
|
||||||
* altogether by spilling directly from the temporary GRF
|
* altogether by spilling directly from the temporary GRF
|
||||||
* allocated to hold the result of the instruction (and the
|
* allocated to hold the result of the instruction (and the
|
||||||
@@ -661,7 +670,7 @@ fs_reg_alloc::build_interference_graph(bool allow_spilling)
|
|||||||
first_vgrf_node = node_count;
|
first_vgrf_node = node_count;
|
||||||
node_count += fs->alloc.count;
|
node_count += fs->alloc.count;
|
||||||
last_vgrf_node = node_count - 1;
|
last_vgrf_node = node_count - 1;
|
||||||
if (devinfo->ver >= 9 && allow_spilling) {
|
if ((devinfo->ver >= 9 && devinfo->verx10 < 125) && allow_spilling) {
|
||||||
scratch_header_node = node_count++;
|
scratch_header_node = node_count++;
|
||||||
} else {
|
} else {
|
||||||
scratch_header_node = -1;
|
scratch_header_node = -1;
|
||||||
@@ -742,11 +751,59 @@ fs_reg_alloc::discard_interference_graph()
|
|||||||
have_spill_costs = false;
|
have_spill_costs = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fs_reg
|
||||||
|
fs_reg_alloc::build_single_offset(const fs_builder &bld, uint32_t spill_offset, int ip)
|
||||||
|
{
|
||||||
|
fs_reg offset = retype(alloc_spill_reg(1, ip), BRW_REGISTER_TYPE_UD);
|
||||||
|
fs_inst *inst = bld.MOV(offset, brw_imm_ud(spill_offset));
|
||||||
|
_mesa_set_add(spill_insts, inst);
|
||||||
|
return offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
fs_reg
|
||||||
|
fs_reg_alloc::build_lane_offsets(const fs_builder &bld, uint32_t spill_offset, int ip)
|
||||||
|
{
|
||||||
|
/* LSC messages are limited to SIMD16 */
|
||||||
|
assert(bld.dispatch_width() <= 16);
|
||||||
|
|
||||||
|
const fs_builder ubld = bld.exec_all();
|
||||||
|
const unsigned reg_count = ubld.dispatch_width() / 8;
|
||||||
|
|
||||||
|
fs_reg offset = retype(alloc_spill_reg(reg_count, ip), BRW_REGISTER_TYPE_UD);
|
||||||
|
fs_inst *inst;
|
||||||
|
|
||||||
|
/* Build an offset per lane in SIMD8 */
|
||||||
|
inst = ubld.group(8, 0).MOV(retype(offset, BRW_REGISTER_TYPE_UW),
|
||||||
|
brw_imm_uv(0x76543210));
|
||||||
|
_mesa_set_add(spill_insts, inst);
|
||||||
|
inst = ubld.group(8, 0).MOV(offset, retype(offset, BRW_REGISTER_TYPE_UW));
|
||||||
|
_mesa_set_add(spill_insts, inst);
|
||||||
|
|
||||||
|
/* Build offsets in the upper 8 lanes of SIMD16 */
|
||||||
|
if (ubld.dispatch_width() > 8) {
|
||||||
|
inst = ubld.group(8, 0).ADD(
|
||||||
|
byte_offset(offset, REG_SIZE),
|
||||||
|
byte_offset(offset, 0),
|
||||||
|
brw_imm_ud(8));
|
||||||
|
_mesa_set_add(spill_insts, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Make the offset a dword */
|
||||||
|
inst = ubld.SHL(offset, offset, brw_imm_ud(2));
|
||||||
|
_mesa_set_add(spill_insts, inst);
|
||||||
|
|
||||||
|
/* Add the base offset */
|
||||||
|
inst = ubld.ADD(offset, offset, brw_imm_ud(spill_offset));
|
||||||
|
_mesa_set_add(spill_insts, inst);
|
||||||
|
|
||||||
|
return offset;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_reg_alloc::emit_unspill(const fs_builder &bld,
|
fs_reg_alloc::emit_unspill(const fs_builder &bld,
|
||||||
struct shader_stats *stats,
|
struct shader_stats *stats,
|
||||||
fs_reg dst,
|
fs_reg dst,
|
||||||
uint32_t spill_offset, unsigned count)
|
uint32_t spill_offset, unsigned count, int ip)
|
||||||
{
|
{
|
||||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
const unsigned reg_size = dst.component_size(bld.dispatch_width()) /
|
const unsigned reg_size = dst.component_size(bld.dispatch_width()) /
|
||||||
@@ -757,7 +814,53 @@ fs_reg_alloc::emit_unspill(const fs_builder &bld,
|
|||||||
++stats->fill_count;
|
++stats->fill_count;
|
||||||
|
|
||||||
fs_inst *unspill_inst;
|
fs_inst *unspill_inst;
|
||||||
if (devinfo->ver >= 9) {
|
if (devinfo->verx10 >= 125) {
|
||||||
|
/* LSC is limited to SIMD16 load/store but we can load more using
|
||||||
|
* transpose messages.
|
||||||
|
*/
|
||||||
|
const bool use_transpose = bld.dispatch_width() > 16;
|
||||||
|
const fs_builder ubld = use_transpose ? bld.exec_all().group(1, 0) : bld;
|
||||||
|
fs_reg offset;
|
||||||
|
if (use_transpose) {
|
||||||
|
offset = build_single_offset(ubld, spill_offset, ip);
|
||||||
|
} else {
|
||||||
|
offset = build_lane_offsets(ubld, spill_offset, ip);
|
||||||
|
}
|
||||||
|
/* We leave the extended descriptor empty and flag the instruction to
|
||||||
|
* ask the generated to insert the extended descriptor in the address
|
||||||
|
* register. That way we don't need to burn an additional register
|
||||||
|
* for register allocation spill/fill.
|
||||||
|
*/
|
||||||
|
fs_reg srcs[] = {
|
||||||
|
brw_imm_ud(0), /* desc */
|
||||||
|
brw_imm_ud(0), /* ex_desc */
|
||||||
|
offset, /* payload */
|
||||||
|
fs_reg(), /* payload2 */
|
||||||
|
};
|
||||||
|
|
||||||
|
unspill_inst = ubld.emit(SHADER_OPCODE_SEND, dst,
|
||||||
|
srcs, ARRAY_SIZE(srcs));
|
||||||
|
unspill_inst->sfid = GFX12_SFID_UGM;
|
||||||
|
unspill_inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD,
|
||||||
|
unspill_inst->exec_size,
|
||||||
|
LSC_ADDR_SURFTYPE_BSS,
|
||||||
|
LSC_ADDR_SIZE_A32,
|
||||||
|
1 /* num_coordinates */,
|
||||||
|
LSC_DATA_SIZE_D32,
|
||||||
|
use_transpose ? reg_size * 8 : 1 /* num_channels */,
|
||||||
|
use_transpose,
|
||||||
|
LSC_CACHE_LOAD_L1STATE_L3MOCS,
|
||||||
|
true /* has_dest */);
|
||||||
|
unspill_inst->header_size = 0;
|
||||||
|
unspill_inst->mlen =
|
||||||
|
lsc_msg_desc_src0_len(devinfo, unspill_inst->desc);
|
||||||
|
unspill_inst->ex_mlen = 0;
|
||||||
|
unspill_inst->size_written =
|
||||||
|
lsc_msg_desc_dest_len(devinfo, unspill_inst->desc) * REG_SIZE;
|
||||||
|
unspill_inst->send_has_side_effects = false;
|
||||||
|
unspill_inst->send_is_volatile = true;
|
||||||
|
unspill_inst->send_ex_desc_scratch = true;
|
||||||
|
} else if (devinfo->ver >= 9) {
|
||||||
fs_reg header = this->scratch_header;
|
fs_reg header = this->scratch_header;
|
||||||
fs_builder ubld = bld.exec_all().group(1, 0);
|
fs_builder ubld = bld.exec_all().group(1, 0);
|
||||||
assert(spill_offset % 16 == 0);
|
assert(spill_offset % 16 == 0);
|
||||||
@@ -765,15 +868,8 @@ fs_reg_alloc::emit_unspill(const fs_builder &bld,
|
|||||||
brw_imm_ud(spill_offset / 16));
|
brw_imm_ud(spill_offset / 16));
|
||||||
_mesa_set_add(spill_insts, unspill_inst);
|
_mesa_set_add(spill_insts, unspill_inst);
|
||||||
|
|
||||||
unsigned bti;
|
const unsigned bti = GFX8_BTI_STATELESS_NON_COHERENT;
|
||||||
fs_reg ex_desc;
|
const fs_reg ex_desc = brw_imm_ud(0);
|
||||||
if (devinfo->verx10 >= 125) {
|
|
||||||
bti = GFX9_BTI_BINDLESS;
|
|
||||||
ex_desc = component(this->scratch_header, 0);
|
|
||||||
} else {
|
|
||||||
bti = GFX8_BTI_STATELESS_NON_COHERENT;
|
|
||||||
ex_desc = brw_imm_ud(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
fs_reg srcs[] = { brw_imm_ud(0), ex_desc, header };
|
fs_reg srcs[] = { brw_imm_ud(0), ex_desc, header };
|
||||||
unspill_inst = bld.emit(SHADER_OPCODE_SEND, dst,
|
unspill_inst = bld.emit(SHADER_OPCODE_SEND, dst,
|
||||||
@@ -815,7 +911,7 @@ void
|
|||||||
fs_reg_alloc::emit_spill(const fs_builder &bld,
|
fs_reg_alloc::emit_spill(const fs_builder &bld,
|
||||||
struct shader_stats *stats,
|
struct shader_stats *stats,
|
||||||
fs_reg src,
|
fs_reg src,
|
||||||
uint32_t spill_offset, unsigned count)
|
uint32_t spill_offset, unsigned count, int ip)
|
||||||
{
|
{
|
||||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
const unsigned reg_size = src.component_size(bld.dispatch_width()) /
|
const unsigned reg_size = src.component_size(bld.dispatch_width()) /
|
||||||
@@ -826,7 +922,40 @@ fs_reg_alloc::emit_spill(const fs_builder &bld,
|
|||||||
++stats->spill_count;
|
++stats->spill_count;
|
||||||
|
|
||||||
fs_inst *spill_inst;
|
fs_inst *spill_inst;
|
||||||
if (devinfo->ver >= 9) {
|
if (devinfo->verx10 >= 125) {
|
||||||
|
fs_reg offset = build_lane_offsets(bld, spill_offset, ip);
|
||||||
|
/* We leave the extended descriptor empty and flag the instruction
|
||||||
|
* relocate the extended descriptor. That way the surface offset is
|
||||||
|
* directly put into the instruction and we don't need to use a
|
||||||
|
* register to hold it.
|
||||||
|
*/
|
||||||
|
fs_reg srcs[] = {
|
||||||
|
brw_imm_ud(0), /* desc */
|
||||||
|
brw_imm_ud(0), /* ex_desc */
|
||||||
|
offset, /* payload */
|
||||||
|
src, /* payload2 */
|
||||||
|
};
|
||||||
|
spill_inst = bld.emit(SHADER_OPCODE_SEND, bld.null_reg_f(),
|
||||||
|
srcs, ARRAY_SIZE(srcs));
|
||||||
|
spill_inst->sfid = GFX12_SFID_UGM;
|
||||||
|
spill_inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE,
|
||||||
|
bld.dispatch_width(),
|
||||||
|
LSC_ADDR_SURFTYPE_BSS,
|
||||||
|
LSC_ADDR_SIZE_A32,
|
||||||
|
1 /* num_coordinates */,
|
||||||
|
LSC_DATA_SIZE_D32,
|
||||||
|
1 /* num_channels */,
|
||||||
|
false /* transpose */,
|
||||||
|
LSC_CACHE_LOAD_L1STATE_L3MOCS,
|
||||||
|
false /* has_dest */);
|
||||||
|
spill_inst->header_size = 0;
|
||||||
|
spill_inst->mlen = lsc_msg_desc_src0_len(devinfo, spill_inst->desc);
|
||||||
|
spill_inst->ex_mlen = reg_size;
|
||||||
|
spill_inst->size_written = 0;
|
||||||
|
spill_inst->send_has_side_effects = true;
|
||||||
|
spill_inst->send_is_volatile = false;
|
||||||
|
spill_inst->send_ex_desc_scratch = true;
|
||||||
|
} else if (devinfo->ver >= 9) {
|
||||||
fs_reg header = this->scratch_header;
|
fs_reg header = this->scratch_header;
|
||||||
fs_builder ubld = bld.exec_all().group(1, 0);
|
fs_builder ubld = bld.exec_all().group(1, 0);
|
||||||
assert(spill_offset % 16 == 0);
|
assert(spill_offset % 16 == 0);
|
||||||
@@ -834,15 +963,8 @@ fs_reg_alloc::emit_spill(const fs_builder &bld,
|
|||||||
brw_imm_ud(spill_offset / 16));
|
brw_imm_ud(spill_offset / 16));
|
||||||
_mesa_set_add(spill_insts, spill_inst);
|
_mesa_set_add(spill_insts, spill_inst);
|
||||||
|
|
||||||
unsigned bti;
|
const unsigned bti = GFX8_BTI_STATELESS_NON_COHERENT;
|
||||||
fs_reg ex_desc;
|
const fs_reg ex_desc = brw_imm_ud(0);
|
||||||
if (devinfo->verx10 >= 125) {
|
|
||||||
bti = GFX9_BTI_BINDLESS;
|
|
||||||
ex_desc = component(this->scratch_header, 0);
|
|
||||||
} else {
|
|
||||||
bti = GFX8_BTI_STATELESS_NON_COHERENT;
|
|
||||||
ex_desc = brw_imm_ud(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
fs_reg srcs[] = { brw_imm_ud(0), ex_desc, header, src };
|
fs_reg srcs[] = { brw_imm_ud(0), ex_desc, header, src };
|
||||||
spill_inst = bld.emit(SHADER_OPCODE_SEND, bld.null_reg_f(),
|
spill_inst = bld.emit(SHADER_OPCODE_SEND, bld.null_reg_f(),
|
||||||
@@ -1033,25 +1155,16 @@ fs_reg_alloc::spill_reg(unsigned spill_reg)
|
|||||||
* SIMD16 mode, because we'd stomp the FB writes.
|
* SIMD16 mode, because we'd stomp the FB writes.
|
||||||
*/
|
*/
|
||||||
if (!fs->spilled_any_registers) {
|
if (!fs->spilled_any_registers) {
|
||||||
if (devinfo->ver >= 9) {
|
if (devinfo->verx10 >= 125) {
|
||||||
|
/* We will allocate a register on the fly */
|
||||||
|
} else if (devinfo->ver >= 9) {
|
||||||
this->scratch_header = alloc_scratch_header();
|
this->scratch_header = alloc_scratch_header();
|
||||||
fs_builder ubld = fs->bld.exec_all().group(8, 0).at(
|
fs_builder ubld = fs->bld.exec_all().group(8, 0).at(
|
||||||
fs->cfg->first_block(), fs->cfg->first_block()->start());
|
fs->cfg->first_block(), fs->cfg->first_block()->start());
|
||||||
|
|
||||||
fs_inst *inst;
|
fs_inst *inst = ubld.emit(SHADER_OPCODE_SCRATCH_HEADER,
|
||||||
if (devinfo->verx10 >= 125) {
|
this->scratch_header);
|
||||||
inst = ubld.MOV(this->scratch_header, brw_imm_ud(0));
|
_mesa_set_add(spill_insts, inst);
|
||||||
_mesa_set_add(spill_insts, inst);
|
|
||||||
inst = ubld.group(1, 0).AND(component(this->scratch_header, 0),
|
|
||||||
retype(brw_vec1_grf(0, 5),
|
|
||||||
BRW_REGISTER_TYPE_UD),
|
|
||||||
brw_imm_ud(INTEL_MASK(31, 10)));
|
|
||||||
_mesa_set_add(spill_insts, inst);
|
|
||||||
} else {
|
|
||||||
inst = ubld.emit(SHADER_OPCODE_SCRATCH_HEADER,
|
|
||||||
this->scratch_header);
|
|
||||||
_mesa_set_add(spill_insts, inst);
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
bool mrf_used[BRW_MAX_MRF(devinfo->ver)];
|
bool mrf_used[BRW_MAX_MRF(devinfo->ver)];
|
||||||
get_used_mrfs(fs, mrf_used);
|
get_used_mrfs(fs, mrf_used);
|
||||||
@@ -1112,7 +1225,7 @@ fs_reg_alloc::spill_reg(unsigned spill_reg)
|
|||||||
* unspill destination is a block-local temporary.
|
* unspill destination is a block-local temporary.
|
||||||
*/
|
*/
|
||||||
emit_unspill(ibld.exec_all().group(width, 0), &fs->shader_stats,
|
emit_unspill(ibld.exec_all().group(width, 0), &fs->shader_stats,
|
||||||
unspill_dst, subset_spill_offset, count);
|
unspill_dst, subset_spill_offset, count, ip);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1167,10 +1280,10 @@ fs_reg_alloc::spill_reg(unsigned spill_reg)
|
|||||||
if (inst->is_partial_write() ||
|
if (inst->is_partial_write() ||
|
||||||
(!inst->force_writemask_all && !per_channel))
|
(!inst->force_writemask_all && !per_channel))
|
||||||
emit_unspill(ubld, &fs->shader_stats, spill_src,
|
emit_unspill(ubld, &fs->shader_stats, spill_src,
|
||||||
subset_spill_offset, regs_written(inst));
|
subset_spill_offset, regs_written(inst), ip);
|
||||||
|
|
||||||
emit_spill(ubld.at(block, inst->next), &fs->shader_stats, spill_src,
|
emit_spill(ubld.at(block, inst->next), &fs->shader_stats, spill_src,
|
||||||
subset_spill_offset, regs_written(inst));
|
subset_spill_offset, regs_written(inst), ip);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (fs_inst *inst = (fs_inst *)before->next;
|
for (fs_inst *inst = (fs_inst *)before->next;
|
||||||
|
@@ -174,6 +174,10 @@ struct backend_instruction {
|
|||||||
bool check_tdr:1; /**< Only valid for SEND; turns it into a SENDC */
|
bool check_tdr:1; /**< Only valid for SEND; turns it into a SENDC */
|
||||||
bool send_has_side_effects:1; /**< Only valid for SHADER_OPCODE_SEND */
|
bool send_has_side_effects:1; /**< Only valid for SHADER_OPCODE_SEND */
|
||||||
bool send_is_volatile:1; /**< Only valid for SHADER_OPCODE_SEND */
|
bool send_is_volatile:1; /**< Only valid for SHADER_OPCODE_SEND */
|
||||||
|
bool send_ex_desc_scratch:1; /**< Only valid for SHADER_OPCODE_SEND, use
|
||||||
|
* the scratch surface offset to build
|
||||||
|
* extended descriptor
|
||||||
|
*/
|
||||||
bool eot:1;
|
bool eot:1;
|
||||||
|
|
||||||
/* Chooses which flag subregister (f0.0 to f1.1) is used for conditional
|
/* Chooses which flag subregister (f0.0 to f1.1) is used for conditional
|
||||||
|
Reference in New Issue
Block a user