intel/compiler: Create and use struct for GS thread payload
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Marcin Ślusarz <marcin.slusarz@intel.com> Acked-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18176>
This commit is contained in:
@@ -1543,13 +1543,13 @@ fs_visitor::emit_gs_thread_end()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||||
srcs[URB_LOGICAL_SRC_HANDLE] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
|
srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles;
|
||||||
inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
|
inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
|
||||||
srcs, ARRAY_SIZE(srcs));
|
srcs, ARRAY_SIZE(srcs));
|
||||||
inst->mlen = 1;
|
inst->mlen = 1;
|
||||||
} else {
|
} else {
|
||||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||||
srcs[URB_LOGICAL_SRC_HANDLE] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
|
srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles;
|
||||||
srcs[URB_LOGICAL_SRC_DATA] = this->final_gs_vertex_count;
|
srcs[URB_LOGICAL_SRC_DATA] = this->final_gs_vertex_count;
|
||||||
inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
|
inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
|
||||||
srcs, ARRAY_SIZE(srcs));
|
srcs, ARRAY_SIZE(srcs));
|
||||||
@@ -5852,50 +5852,6 @@ fs_visitor::dump_instruction(const backend_instruction *be_inst, FILE *file) con
|
|||||||
fprintf(file, "\n");
|
fprintf(file, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
fs_visitor::setup_gs_payload()
|
|
||||||
{
|
|
||||||
assert(stage == MESA_SHADER_GEOMETRY);
|
|
||||||
thread_payload &payload = this->payload();
|
|
||||||
|
|
||||||
struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data);
|
|
||||||
struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data);
|
|
||||||
|
|
||||||
/* R0: thread header, R1: output URB handles */
|
|
||||||
payload.num_regs = 2;
|
|
||||||
|
|
||||||
if (gs_prog_data->include_primitive_id) {
|
|
||||||
/* R2: Primitive ID 0..7 */
|
|
||||||
payload.num_regs++;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Always enable VUE handles so we can safely use pull model if needed.
|
|
||||||
*
|
|
||||||
* The push model for a GS uses a ton of register space even for trivial
|
|
||||||
* scenarios with just a few inputs, so just make things easier and a bit
|
|
||||||
* safer by always having pull model available.
|
|
||||||
*/
|
|
||||||
gs_prog_data->base.include_vue_handles = true;
|
|
||||||
|
|
||||||
/* R3..RN: ICP Handles for each incoming vertex (when using pull model) */
|
|
||||||
payload.num_regs += nir->info.gs.vertices_in;
|
|
||||||
|
|
||||||
/* Use a maximum of 24 registers for push-model inputs. */
|
|
||||||
const unsigned max_push_components = 24;
|
|
||||||
|
|
||||||
/* If pushing our inputs would take too many registers, reduce the URB read
|
|
||||||
* length (which is in HWords, or 8 registers), and resort to pulling.
|
|
||||||
*
|
|
||||||
* Note that the GS reads <URB Read Length> HWords for every vertex - so we
|
|
||||||
* have to multiply by VerticesIn to obtain the total storage requirement.
|
|
||||||
*/
|
|
||||||
if (8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in >
|
|
||||||
max_push_components) {
|
|
||||||
vue_prog_data->urb_read_length =
|
|
||||||
ROUND_DOWN_TO(max_push_components / nir->info.gs.vertices_in, 8) / 8;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_visitor::setup_cs_payload()
|
fs_visitor::setup_cs_payload()
|
||||||
{
|
{
|
||||||
@@ -6702,7 +6658,7 @@ fs_visitor::run_gs()
|
|||||||
{
|
{
|
||||||
assert(stage == MESA_SHADER_GEOMETRY);
|
assert(stage == MESA_SHADER_GEOMETRY);
|
||||||
|
|
||||||
setup_gs_payload();
|
payload_ = new gs_thread_payload(*this);
|
||||||
|
|
||||||
this->final_gs_vertex_count = vgrf(glsl_type::uint_type);
|
this->final_gs_vertex_count = vgrf(glsl_type::uint_type);
|
||||||
|
|
||||||
|
@@ -116,6 +116,13 @@ struct tes_thread_payload : public thread_payload {
|
|||||||
fs_reg urb_output;
|
fs_reg urb_output;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct gs_thread_payload : public thread_payload {
|
||||||
|
gs_thread_payload(const fs_visitor &v);
|
||||||
|
|
||||||
|
fs_reg urb_handles;
|
||||||
|
fs_reg primitive_id;
|
||||||
|
};
|
||||||
|
|
||||||
struct fs_thread_payload : public thread_payload {
|
struct fs_thread_payload : public thread_payload {
|
||||||
fs_thread_payload(const fs_visitor &v,
|
fs_thread_payload(const fs_visitor &v,
|
||||||
bool &source_depth_to_render_target,
|
bool &source_depth_to_render_target,
|
||||||
@@ -192,7 +199,6 @@ public:
|
|||||||
bool run_mesh(bool allow_spilling);
|
bool run_mesh(bool allow_spilling);
|
||||||
void optimize();
|
void optimize();
|
||||||
void allocate_registers(bool allow_spilling);
|
void allocate_registers(bool allow_spilling);
|
||||||
void setup_gs_payload();
|
|
||||||
void setup_cs_payload();
|
void setup_cs_payload();
|
||||||
bool fixup_sends_duplicate_payload();
|
bool fixup_sends_duplicate_payload();
|
||||||
void fixup_3src_null_dest();
|
void fixup_3src_null_dest();
|
||||||
@@ -471,6 +477,11 @@ public:
|
|||||||
return *static_cast<tes_thread_payload *>(this->payload_);
|
return *static_cast<tes_thread_payload *>(this->payload_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
gs_thread_payload &gs_payload() {
|
||||||
|
assert(stage == MESA_SHADER_GEOMETRY);
|
||||||
|
return *static_cast<gs_thread_payload *>(this->payload_);
|
||||||
|
}
|
||||||
|
|
||||||
fs_thread_payload &fs_payload() {
|
fs_thread_payload &fs_payload() {
|
||||||
assert(stage == MESA_SHADER_FRAGMENT);
|
assert(stage == MESA_SHADER_FRAGMENT);
|
||||||
return *static_cast<fs_thread_payload *>(this->payload_);
|
return *static_cast<fs_thread_payload *>(this->payload_);
|
||||||
|
@@ -2337,7 +2337,7 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count)
|
|||||||
sources[i] = this->control_data_bits;
|
sources[i] = this->control_data_bits;
|
||||||
|
|
||||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||||
srcs[URB_LOGICAL_SRC_HANDLE] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
|
srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles;
|
||||||
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = per_slot_offset;
|
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = per_slot_offset;
|
||||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = channel_mask;
|
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = channel_mask;
|
||||||
srcs[URB_LOGICAL_SRC_DATA] = bld.vgrf(BRW_REGISTER_TYPE_F, length);
|
srcs[URB_LOGICAL_SRC_DATA] = bld.vgrf(BRW_REGISTER_TYPE_F, length);
|
||||||
@@ -3155,8 +3155,7 @@ fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld,
|
|||||||
case nir_intrinsic_load_primitive_id:
|
case nir_intrinsic_load_primitive_id:
|
||||||
assert(stage == MESA_SHADER_GEOMETRY);
|
assert(stage == MESA_SHADER_GEOMETRY);
|
||||||
assert(brw_gs_prog_data(prog_data)->include_primitive_id);
|
assert(brw_gs_prog_data(prog_data)->include_primitive_id);
|
||||||
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD),
|
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD), gs_payload().primitive_id);
|
||||||
retype(fs_reg(brw_vec8_grf(2, 0)), BRW_REGISTER_TYPE_UD));
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_load_input:
|
case nir_intrinsic_load_input:
|
||||||
|
@@ -81,6 +81,52 @@ tes_thread_payload::tes_thread_payload()
|
|||||||
num_regs = 5;
|
num_regs = 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
gs_thread_payload::gs_thread_payload(const fs_visitor &v)
|
||||||
|
{
|
||||||
|
struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(v.prog_data);
|
||||||
|
struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(v.prog_data);
|
||||||
|
|
||||||
|
/* R0: thread header. */
|
||||||
|
unsigned r = 1;
|
||||||
|
|
||||||
|
/* R1: output URB handles. */
|
||||||
|
urb_handles = retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD);
|
||||||
|
r++;
|
||||||
|
|
||||||
|
if (gs_prog_data->include_primitive_id) {
|
||||||
|
primitive_id = retype(brw_vec8_grf(2, 0), BRW_REGISTER_TYPE_UD);
|
||||||
|
r++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Always enable VUE handles so we can safely use pull model if needed.
|
||||||
|
*
|
||||||
|
* The push model for a GS uses a ton of register space even for trivial
|
||||||
|
* scenarios with just a few inputs, so just make things easier and a bit
|
||||||
|
* safer by always having pull model available.
|
||||||
|
*/
|
||||||
|
gs_prog_data->base.include_vue_handles = true;
|
||||||
|
|
||||||
|
/* R3..RN: ICP Handles for each incoming vertex (when using pull model) */
|
||||||
|
r += v.nir->info.gs.vertices_in;
|
||||||
|
|
||||||
|
num_regs = r;
|
||||||
|
|
||||||
|
/* Use a maximum of 24 registers for push-model inputs. */
|
||||||
|
const unsigned max_push_components = 24;
|
||||||
|
|
||||||
|
/* If pushing our inputs would take too many registers, reduce the URB read
|
||||||
|
* length (which is in HWords, or 8 registers), and resort to pulling.
|
||||||
|
*
|
||||||
|
* Note that the GS reads <URB Read Length> HWords for every vertex - so we
|
||||||
|
* have to multiply by VerticesIn to obtain the total storage requirement.
|
||||||
|
*/
|
||||||
|
if (8 * vue_prog_data->urb_read_length * v.nir->info.gs.vertices_in >
|
||||||
|
max_push_components) {
|
||||||
|
vue_prog_data->urb_read_length =
|
||||||
|
ROUND_DOWN_TO(max_push_components / v.nir->info.gs.vertices_in, 8) / 8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
setup_fs_payload_gfx6(fs_thread_payload &payload,
|
setup_fs_payload_gfx6(fs_thread_payload &payload,
|
||||||
const fs_visitor &v,
|
const fs_visitor &v,
|
||||||
|
@@ -778,7 +778,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
|||||||
urb_handle = tes_payload().urb_output;
|
urb_handle = tes_payload().urb_output;
|
||||||
break;
|
break;
|
||||||
case MESA_SHADER_GEOMETRY:
|
case MESA_SHADER_GEOMETRY:
|
||||||
urb_handle = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
|
urb_handle = gs_payload().urb_handles;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
unreachable("invalid stage");
|
unreachable("invalid stage");
|
||||||
|
Reference in New Issue
Block a user