diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index d2e40ef62bf..a4e8cd0beca 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -1543,13 +1543,13 @@ fs_visitor::emit_gs_thread_end() } } fs_reg srcs[URB_LOGICAL_NUM_SRCS]; - srcs[URB_LOGICAL_SRC_HANDLE] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); + srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles; inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef, srcs, ARRAY_SIZE(srcs)); inst->mlen = 1; } else { fs_reg srcs[URB_LOGICAL_NUM_SRCS]; - srcs[URB_LOGICAL_SRC_HANDLE] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); + srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles; srcs[URB_LOGICAL_SRC_DATA] = this->final_gs_vertex_count; inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef, srcs, ARRAY_SIZE(srcs)); @@ -5852,50 +5852,6 @@ fs_visitor::dump_instruction(const backend_instruction *be_inst, FILE *file) con fprintf(file, "\n"); } -void -fs_visitor::setup_gs_payload() -{ - assert(stage == MESA_SHADER_GEOMETRY); - thread_payload &payload = this->payload(); - - struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data); - struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data); - - /* R0: thread header, R1: output URB handles */ - payload.num_regs = 2; - - if (gs_prog_data->include_primitive_id) { - /* R2: Primitive ID 0..7 */ - payload.num_regs++; - } - - /* Always enable VUE handles so we can safely use pull model if needed. - * - * The push model for a GS uses a ton of register space even for trivial - * scenarios with just a few inputs, so just make things easier and a bit - * safer by always having pull model available. - */ - gs_prog_data->base.include_vue_handles = true; - - /* R3..RN: ICP Handles for each incoming vertex (when using pull model) */ - payload.num_regs += nir->info.gs.vertices_in; - - /* Use a maximum of 24 registers for push-model inputs. */ - const unsigned max_push_components = 24; - - /* If pushing our inputs would take too many registers, reduce the URB read - * length (which is in HWords, or 8 registers), and resort to pulling. - * - * Note that the GS reads HWords for every vertex - so we - * have to multiply by VerticesIn to obtain the total storage requirement. - */ - if (8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in > - max_push_components) { - vue_prog_data->urb_read_length = - ROUND_DOWN_TO(max_push_components / nir->info.gs.vertices_in, 8) / 8; - } -} - void fs_visitor::setup_cs_payload() { @@ -6702,7 +6658,7 @@ fs_visitor::run_gs() { assert(stage == MESA_SHADER_GEOMETRY); - setup_gs_payload(); + payload_ = new gs_thread_payload(*this); this->final_gs_vertex_count = vgrf(glsl_type::uint_type); diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 70cebae8a0e..fe741e3fd64 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -116,6 +116,13 @@ struct tes_thread_payload : public thread_payload { fs_reg urb_output; }; +struct gs_thread_payload : public thread_payload { + gs_thread_payload(const fs_visitor &v); + + fs_reg urb_handles; + fs_reg primitive_id; +}; + struct fs_thread_payload : public thread_payload { fs_thread_payload(const fs_visitor &v, bool &source_depth_to_render_target, @@ -192,7 +199,6 @@ public: bool run_mesh(bool allow_spilling); void optimize(); void allocate_registers(bool allow_spilling); - void setup_gs_payload(); void setup_cs_payload(); bool fixup_sends_duplicate_payload(); void fixup_3src_null_dest(); @@ -471,6 +477,11 @@ public: return *static_cast(this->payload_); } + gs_thread_payload &gs_payload() { + assert(stage == MESA_SHADER_GEOMETRY); + return *static_cast(this->payload_); + } + fs_thread_payload &fs_payload() { assert(stage == MESA_SHADER_FRAGMENT); return *static_cast(this->payload_); diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 5a57dc302ec..ca6a2f44ae9 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -2337,7 +2337,7 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count) sources[i] = this->control_data_bits; fs_reg srcs[URB_LOGICAL_NUM_SRCS]; - srcs[URB_LOGICAL_SRC_HANDLE] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); + srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles; srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = per_slot_offset; srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = channel_mask; srcs[URB_LOGICAL_SRC_DATA] = bld.vgrf(BRW_REGISTER_TYPE_F, length); @@ -3155,8 +3155,7 @@ fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld, case nir_intrinsic_load_primitive_id: assert(stage == MESA_SHADER_GEOMETRY); assert(brw_gs_prog_data(prog_data)->include_primitive_id); - bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD), - retype(fs_reg(brw_vec8_grf(2, 0)), BRW_REGISTER_TYPE_UD)); + bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD), gs_payload().primitive_id); break; case nir_intrinsic_load_input: diff --git a/src/intel/compiler/brw_fs_thread_payload.cpp b/src/intel/compiler/brw_fs_thread_payload.cpp index e522f723897..f800eb74bca 100644 --- a/src/intel/compiler/brw_fs_thread_payload.cpp +++ b/src/intel/compiler/brw_fs_thread_payload.cpp @@ -81,6 +81,52 @@ tes_thread_payload::tes_thread_payload() num_regs = 5; } +gs_thread_payload::gs_thread_payload(const fs_visitor &v) +{ + struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(v.prog_data); + struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(v.prog_data); + + /* R0: thread header. */ + unsigned r = 1; + + /* R1: output URB handles. */ + urb_handles = retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD); + r++; + + if (gs_prog_data->include_primitive_id) { + primitive_id = retype(brw_vec8_grf(2, 0), BRW_REGISTER_TYPE_UD); + r++; + } + + /* Always enable VUE handles so we can safely use pull model if needed. + * + * The push model for a GS uses a ton of register space even for trivial + * scenarios with just a few inputs, so just make things easier and a bit + * safer by always having pull model available. + */ + gs_prog_data->base.include_vue_handles = true; + + /* R3..RN: ICP Handles for each incoming vertex (when using pull model) */ + r += v.nir->info.gs.vertices_in; + + num_regs = r; + + /* Use a maximum of 24 registers for push-model inputs. */ + const unsigned max_push_components = 24; + + /* If pushing our inputs would take too many registers, reduce the URB read + * length (which is in HWords, or 8 registers), and resort to pulling. + * + * Note that the GS reads HWords for every vertex - so we + * have to multiply by VerticesIn to obtain the total storage requirement. + */ + if (8 * vue_prog_data->urb_read_length * v.nir->info.gs.vertices_in > + max_push_components) { + vue_prog_data->urb_read_length = + ROUND_DOWN_TO(max_push_components / v.nir->info.gs.vertices_in, 8) / 8; + } +} + static inline void setup_fs_payload_gfx6(fs_thread_payload &payload, const fs_visitor &v, diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index 3380758389c..caf3a5f2200 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -778,7 +778,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) urb_handle = tes_payload().urb_output; break; case MESA_SHADER_GEOMETRY: - urb_handle = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); + urb_handle = gs_payload().urb_handles; break; default: unreachable("invalid stage");