intel/compiler: Create and use struct for GS thread payload
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Marcin Ślusarz <marcin.slusarz@intel.com> Acked-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18176>
This commit is contained in:
@@ -1543,13 +1543,13 @@ fs_visitor::emit_gs_thread_end()
|
||||
}
|
||||
}
|
||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles;
|
||||
inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
|
||||
srcs, ARRAY_SIZE(srcs));
|
||||
inst->mlen = 1;
|
||||
} else {
|
||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles;
|
||||
srcs[URB_LOGICAL_SRC_DATA] = this->final_gs_vertex_count;
|
||||
inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
|
||||
srcs, ARRAY_SIZE(srcs));
|
||||
@@ -5852,50 +5852,6 @@ fs_visitor::dump_instruction(const backend_instruction *be_inst, FILE *file) con
|
||||
fprintf(file, "\n");
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::setup_gs_payload()
|
||||
{
|
||||
assert(stage == MESA_SHADER_GEOMETRY);
|
||||
thread_payload &payload = this->payload();
|
||||
|
||||
struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data);
|
||||
struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data);
|
||||
|
||||
/* R0: thread header, R1: output URB handles */
|
||||
payload.num_regs = 2;
|
||||
|
||||
if (gs_prog_data->include_primitive_id) {
|
||||
/* R2: Primitive ID 0..7 */
|
||||
payload.num_regs++;
|
||||
}
|
||||
|
||||
/* Always enable VUE handles so we can safely use pull model if needed.
|
||||
*
|
||||
* The push model for a GS uses a ton of register space even for trivial
|
||||
* scenarios with just a few inputs, so just make things easier and a bit
|
||||
* safer by always having pull model available.
|
||||
*/
|
||||
gs_prog_data->base.include_vue_handles = true;
|
||||
|
||||
/* R3..RN: ICP Handles for each incoming vertex (when using pull model) */
|
||||
payload.num_regs += nir->info.gs.vertices_in;
|
||||
|
||||
/* Use a maximum of 24 registers for push-model inputs. */
|
||||
const unsigned max_push_components = 24;
|
||||
|
||||
/* If pushing our inputs would take too many registers, reduce the URB read
|
||||
* length (which is in HWords, or 8 registers), and resort to pulling.
|
||||
*
|
||||
* Note that the GS reads <URB Read Length> HWords for every vertex - so we
|
||||
* have to multiply by VerticesIn to obtain the total storage requirement.
|
||||
*/
|
||||
if (8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in >
|
||||
max_push_components) {
|
||||
vue_prog_data->urb_read_length =
|
||||
ROUND_DOWN_TO(max_push_components / nir->info.gs.vertices_in, 8) / 8;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::setup_cs_payload()
|
||||
{
|
||||
@@ -6702,7 +6658,7 @@ fs_visitor::run_gs()
|
||||
{
|
||||
assert(stage == MESA_SHADER_GEOMETRY);
|
||||
|
||||
setup_gs_payload();
|
||||
payload_ = new gs_thread_payload(*this);
|
||||
|
||||
this->final_gs_vertex_count = vgrf(glsl_type::uint_type);
|
||||
|
||||
|
@@ -116,6 +116,13 @@ struct tes_thread_payload : public thread_payload {
|
||||
fs_reg urb_output;
|
||||
};
|
||||
|
||||
struct gs_thread_payload : public thread_payload {
|
||||
gs_thread_payload(const fs_visitor &v);
|
||||
|
||||
fs_reg urb_handles;
|
||||
fs_reg primitive_id;
|
||||
};
|
||||
|
||||
struct fs_thread_payload : public thread_payload {
|
||||
fs_thread_payload(const fs_visitor &v,
|
||||
bool &source_depth_to_render_target,
|
||||
@@ -192,7 +199,6 @@ public:
|
||||
bool run_mesh(bool allow_spilling);
|
||||
void optimize();
|
||||
void allocate_registers(bool allow_spilling);
|
||||
void setup_gs_payload();
|
||||
void setup_cs_payload();
|
||||
bool fixup_sends_duplicate_payload();
|
||||
void fixup_3src_null_dest();
|
||||
@@ -471,6 +477,11 @@ public:
|
||||
return *static_cast<tes_thread_payload *>(this->payload_);
|
||||
}
|
||||
|
||||
gs_thread_payload &gs_payload() {
|
||||
assert(stage == MESA_SHADER_GEOMETRY);
|
||||
return *static_cast<gs_thread_payload *>(this->payload_);
|
||||
}
|
||||
|
||||
fs_thread_payload &fs_payload() {
|
||||
assert(stage == MESA_SHADER_FRAGMENT);
|
||||
return *static_cast<fs_thread_payload *>(this->payload_);
|
||||
|
@@ -2337,7 +2337,7 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count)
|
||||
sources[i] = this->control_data_bits;
|
||||
|
||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles;
|
||||
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = per_slot_offset;
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = channel_mask;
|
||||
srcs[URB_LOGICAL_SRC_DATA] = bld.vgrf(BRW_REGISTER_TYPE_F, length);
|
||||
@@ -3155,8 +3155,7 @@ fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld,
|
||||
case nir_intrinsic_load_primitive_id:
|
||||
assert(stage == MESA_SHADER_GEOMETRY);
|
||||
assert(brw_gs_prog_data(prog_data)->include_primitive_id);
|
||||
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD),
|
||||
retype(fs_reg(brw_vec8_grf(2, 0)), BRW_REGISTER_TYPE_UD));
|
||||
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD), gs_payload().primitive_id);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_input:
|
||||
|
@@ -81,6 +81,52 @@ tes_thread_payload::tes_thread_payload()
|
||||
num_regs = 5;
|
||||
}
|
||||
|
||||
gs_thread_payload::gs_thread_payload(const fs_visitor &v)
|
||||
{
|
||||
struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(v.prog_data);
|
||||
struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(v.prog_data);
|
||||
|
||||
/* R0: thread header. */
|
||||
unsigned r = 1;
|
||||
|
||||
/* R1: output URB handles. */
|
||||
urb_handles = retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD);
|
||||
r++;
|
||||
|
||||
if (gs_prog_data->include_primitive_id) {
|
||||
primitive_id = retype(brw_vec8_grf(2, 0), BRW_REGISTER_TYPE_UD);
|
||||
r++;
|
||||
}
|
||||
|
||||
/* Always enable VUE handles so we can safely use pull model if needed.
|
||||
*
|
||||
* The push model for a GS uses a ton of register space even for trivial
|
||||
* scenarios with just a few inputs, so just make things easier and a bit
|
||||
* safer by always having pull model available.
|
||||
*/
|
||||
gs_prog_data->base.include_vue_handles = true;
|
||||
|
||||
/* R3..RN: ICP Handles for each incoming vertex (when using pull model) */
|
||||
r += v.nir->info.gs.vertices_in;
|
||||
|
||||
num_regs = r;
|
||||
|
||||
/* Use a maximum of 24 registers for push-model inputs. */
|
||||
const unsigned max_push_components = 24;
|
||||
|
||||
/* If pushing our inputs would take too many registers, reduce the URB read
|
||||
* length (which is in HWords, or 8 registers), and resort to pulling.
|
||||
*
|
||||
* Note that the GS reads <URB Read Length> HWords for every vertex - so we
|
||||
* have to multiply by VerticesIn to obtain the total storage requirement.
|
||||
*/
|
||||
if (8 * vue_prog_data->urb_read_length * v.nir->info.gs.vertices_in >
|
||||
max_push_components) {
|
||||
vue_prog_data->urb_read_length =
|
||||
ROUND_DOWN_TO(max_push_components / v.nir->info.gs.vertices_in, 8) / 8;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
setup_fs_payload_gfx6(fs_thread_payload &payload,
|
||||
const fs_visitor &v,
|
||||
|
@@ -778,7 +778,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
||||
urb_handle = tes_payload().urb_output;
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
urb_handle = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
|
||||
urb_handle = gs_payload().urb_handles;
|
||||
break;
|
||||
default:
|
||||
unreachable("invalid stage");
|
||||
|
Reference in New Issue
Block a user