intel/compiler: Create and use struct for GS thread payload

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Marcin Ślusarz <marcin.slusarz@intel.com>
Acked-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18176>
This commit is contained in:
Caio Oliveira
2022-08-22 22:23:17 -07:00
committed by Marge Bot
parent 7664c85b1d
commit 5b6987daee
5 changed files with 64 additions and 52 deletions

View File

@@ -1543,13 +1543,13 @@ fs_visitor::emit_gs_thread_end()
}
}
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles;
inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
srcs, ARRAY_SIZE(srcs));
inst->mlen = 1;
} else {
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles;
srcs[URB_LOGICAL_SRC_DATA] = this->final_gs_vertex_count;
inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
srcs, ARRAY_SIZE(srcs));
@@ -5852,50 +5852,6 @@ fs_visitor::dump_instruction(const backend_instruction *be_inst, FILE *file) con
fprintf(file, "\n");
}
void
fs_visitor::setup_gs_payload()
{
assert(stage == MESA_SHADER_GEOMETRY);
thread_payload &payload = this->payload();
struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data);
struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data);
/* R0: thread header, R1: output URB handles */
payload.num_regs = 2;
if (gs_prog_data->include_primitive_id) {
/* R2: Primitive ID 0..7 */
payload.num_regs++;
}
/* Always enable VUE handles so we can safely use pull model if needed.
*
* The push model for a GS uses a ton of register space even for trivial
* scenarios with just a few inputs, so just make things easier and a bit
* safer by always having pull model available.
*/
gs_prog_data->base.include_vue_handles = true;
/* R3..RN: ICP Handles for each incoming vertex (when using pull model) */
payload.num_regs += nir->info.gs.vertices_in;
/* Use a maximum of 24 registers for push-model inputs. */
const unsigned max_push_components = 24;
/* If pushing our inputs would take too many registers, reduce the URB read
* length (which is in HWords, or 8 registers), and resort to pulling.
*
* Note that the GS reads <URB Read Length> HWords for every vertex - so we
* have to multiply by VerticesIn to obtain the total storage requirement.
*/
if (8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in >
max_push_components) {
vue_prog_data->urb_read_length =
ROUND_DOWN_TO(max_push_components / nir->info.gs.vertices_in, 8) / 8;
}
}
void
fs_visitor::setup_cs_payload()
{
@@ -6702,7 +6658,7 @@ fs_visitor::run_gs()
{
assert(stage == MESA_SHADER_GEOMETRY);
setup_gs_payload();
payload_ = new gs_thread_payload(*this);
this->final_gs_vertex_count = vgrf(glsl_type::uint_type);

View File

@@ -116,6 +116,13 @@ struct tes_thread_payload : public thread_payload {
fs_reg urb_output;
};
struct gs_thread_payload : public thread_payload {
gs_thread_payload(const fs_visitor &v);
fs_reg urb_handles;
fs_reg primitive_id;
};
struct fs_thread_payload : public thread_payload {
fs_thread_payload(const fs_visitor &v,
bool &source_depth_to_render_target,
@@ -192,7 +199,6 @@ public:
bool run_mesh(bool allow_spilling);
void optimize();
void allocate_registers(bool allow_spilling);
void setup_gs_payload();
void setup_cs_payload();
bool fixup_sends_duplicate_payload();
void fixup_3src_null_dest();
@@ -471,6 +477,11 @@ public:
return *static_cast<tes_thread_payload *>(this->payload_);
}
gs_thread_payload &gs_payload() {
assert(stage == MESA_SHADER_GEOMETRY);
return *static_cast<gs_thread_payload *>(this->payload_);
}
fs_thread_payload &fs_payload() {
assert(stage == MESA_SHADER_FRAGMENT);
return *static_cast<fs_thread_payload *>(this->payload_);

View File

@@ -2337,7 +2337,7 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count)
sources[i] = this->control_data_bits;
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles;
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = per_slot_offset;
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = channel_mask;
srcs[URB_LOGICAL_SRC_DATA] = bld.vgrf(BRW_REGISTER_TYPE_F, length);
@@ -3155,8 +3155,7 @@ fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld,
case nir_intrinsic_load_primitive_id:
assert(stage == MESA_SHADER_GEOMETRY);
assert(brw_gs_prog_data(prog_data)->include_primitive_id);
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD),
retype(fs_reg(brw_vec8_grf(2, 0)), BRW_REGISTER_TYPE_UD));
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD), gs_payload().primitive_id);
break;
case nir_intrinsic_load_input:

View File

@@ -81,6 +81,52 @@ tes_thread_payload::tes_thread_payload()
num_regs = 5;
}
gs_thread_payload::gs_thread_payload(const fs_visitor &v)
{
struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(v.prog_data);
struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(v.prog_data);
/* R0: thread header. */
unsigned r = 1;
/* R1: output URB handles. */
urb_handles = retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD);
r++;
if (gs_prog_data->include_primitive_id) {
primitive_id = retype(brw_vec8_grf(2, 0), BRW_REGISTER_TYPE_UD);
r++;
}
/* Always enable VUE handles so we can safely use pull model if needed.
*
* The push model for a GS uses a ton of register space even for trivial
* scenarios with just a few inputs, so just make things easier and a bit
* safer by always having pull model available.
*/
gs_prog_data->base.include_vue_handles = true;
/* R3..RN: ICP Handles for each incoming vertex (when using pull model) */
r += v.nir->info.gs.vertices_in;
num_regs = r;
/* Use a maximum of 24 registers for push-model inputs. */
const unsigned max_push_components = 24;
/* If pushing our inputs would take too many registers, reduce the URB read
* length (which is in HWords, or 8 registers), and resort to pulling.
*
* Note that the GS reads <URB Read Length> HWords for every vertex - so we
* have to multiply by VerticesIn to obtain the total storage requirement.
*/
if (8 * vue_prog_data->urb_read_length * v.nir->info.gs.vertices_in >
max_push_components) {
vue_prog_data->urb_read_length =
ROUND_DOWN_TO(max_push_components / v.nir->info.gs.vertices_in, 8) / 8;
}
}
static inline void
setup_fs_payload_gfx6(fs_thread_payload &payload,
const fs_visitor &v,

View File

@@ -778,7 +778,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
urb_handle = tes_payload().urb_output;
break;
case MESA_SHADER_GEOMETRY:
urb_handle = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
urb_handle = gs_payload().urb_handles;
break;
default:
unreachable("invalid stage");