i965/vec4: Use NIR to do GS input remapping
We're already doing this in the FS back-end. This just does the same thing in the vec4 back-end. Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
@@ -334,7 +334,7 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar,
|
brw_nir_lower_vue_inputs(nir_shader *nir,
|
||||||
const struct brw_vue_map *vue_map)
|
const struct brw_vue_map *vue_map)
|
||||||
{
|
{
|
||||||
foreach_list_typed(nir_variable, var, node, &nir->inputs) {
|
foreach_list_typed(nir_variable, var, node, &nir->inputs) {
|
||||||
@@ -344,9 +344,6 @@ brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar,
|
|||||||
/* Inputs are stored in vec4 slots, so use type_size_vec4(). */
|
/* Inputs are stored in vec4 slots, so use type_size_vec4(). */
|
||||||
nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0);
|
nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0);
|
||||||
|
|
||||||
if (nir->stage == MESA_SHADER_GEOMETRY && !is_scalar)
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* This pass needs actual constants */
|
/* This pass needs actual constants */
|
||||||
nir_opt_constant_folding(nir);
|
nir_opt_constant_folding(nir);
|
||||||
|
|
||||||
|
@@ -100,7 +100,7 @@ bool brw_nir_lower_intrinsics(nir_shader *nir,
|
|||||||
void brw_nir_lower_vs_inputs(nir_shader *nir,
|
void brw_nir_lower_vs_inputs(nir_shader *nir,
|
||||||
bool use_legacy_snorm_formula,
|
bool use_legacy_snorm_formula,
|
||||||
const uint8_t *vs_attrib_wa_flags);
|
const uint8_t *vs_attrib_wa_flags);
|
||||||
void brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar,
|
void brw_nir_lower_vue_inputs(nir_shader *nir,
|
||||||
const struct brw_vue_map *vue_map);
|
const struct brw_vue_map *vue_map);
|
||||||
void brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue);
|
void brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue);
|
||||||
void brw_nir_lower_fs_inputs(nir_shader *nir,
|
void brw_nir_lower_fs_inputs(nir_shader *nir,
|
||||||
|
@@ -1677,66 +1677,6 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline struct brw_reg
|
|
||||||
attribute_to_hw_reg(int attr, brw_reg_type type, bool interleaved)
|
|
||||||
{
|
|
||||||
struct brw_reg reg;
|
|
||||||
|
|
||||||
unsigned width = REG_SIZE / 2 / MAX2(4, type_sz(type));
|
|
||||||
if (interleaved) {
|
|
||||||
reg = stride(brw_vecn_grf(width, attr / 2, (attr % 2) * 4), 0, width, 1);
|
|
||||||
} else {
|
|
||||||
reg = brw_vecn_grf(width, attr, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
reg.type = type;
|
|
||||||
return reg;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Replace each register of type ATTR in this->instructions with a reference
|
|
||||||
* to a fixed HW register.
|
|
||||||
*
|
|
||||||
* If interleaved is true, then each attribute takes up half a register, with
|
|
||||||
* register N containing attribute 2*N in its first half and attribute 2*N+1
|
|
||||||
* in its second half (this corresponds to the payload setup used by geometry
|
|
||||||
* shaders in "single" or "dual instanced" dispatch mode). If interleaved is
|
|
||||||
* false, then each attribute takes up a whole register, with register N
|
|
||||||
* containing attribute N (this corresponds to the payload setup used by
|
|
||||||
* vertex shaders, and by geometry shaders in "dual object" dispatch mode).
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
vec4_visitor::lower_attributes_to_hw_regs(const int *attribute_map,
|
|
||||||
bool interleaved)
|
|
||||||
{
|
|
||||||
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
|
|
||||||
for (int i = 0; i < 3; i++) {
|
|
||||||
if (inst->src[i].file != ATTR)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
int grf = attribute_map[inst->src[i].nr +
|
|
||||||
inst->src[i].offset / REG_SIZE];
|
|
||||||
assert(inst->src[i].offset % REG_SIZE == 0);
|
|
||||||
|
|
||||||
/* All attributes used in the shader need to have been assigned a
|
|
||||||
* hardware register by the caller
|
|
||||||
*/
|
|
||||||
assert(grf != 0);
|
|
||||||
|
|
||||||
struct brw_reg reg =
|
|
||||||
attribute_to_hw_reg(grf, inst->src[i].type, interleaved);
|
|
||||||
reg.swizzle = inst->src[i].swizzle;
|
|
||||||
if (inst->src[i].abs)
|
|
||||||
reg = brw_abs(reg);
|
|
||||||
if (inst->src[i].negate)
|
|
||||||
reg = negate(reg);
|
|
||||||
|
|
||||||
inst->src[i] = reg;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
int
|
||||||
vec4_vs_visitor::setup_attributes(int payload_reg)
|
vec4_vs_visitor::setup_attributes(int payload_reg)
|
||||||
{
|
{
|
||||||
|
@@ -367,8 +367,6 @@ public:
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
void emit_vertex();
|
void emit_vertex();
|
||||||
void lower_attributes_to_hw_regs(const int *attribute_map,
|
|
||||||
bool interleaved);
|
|
||||||
void setup_payload_interference(struct ra_graph *g, int first_payload_node,
|
void setup_payload_interference(struct ra_graph *g, int first_payload_node,
|
||||||
int reg_node_count);
|
int reg_node_count);
|
||||||
virtual void setup_payload() = 0;
|
virtual void setup_payload() = 0;
|
||||||
|
@@ -66,8 +66,10 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||||||
nir_const_value *vertex = nir_src_as_const_value(instr->src[0]);
|
nir_const_value *vertex = nir_src_as_const_value(instr->src[0]);
|
||||||
nir_const_value *offset_reg = nir_src_as_const_value(instr->src[1]);
|
nir_const_value *offset_reg = nir_src_as_const_value(instr->src[1]);
|
||||||
|
|
||||||
|
const unsigned input_array_stride = prog_data->urb_read_length * 2;
|
||||||
|
|
||||||
if (nir_dest_bit_size(instr->dest) == 64) {
|
if (nir_dest_bit_size(instr->dest) == 64) {
|
||||||
src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u32[0] +
|
src = src_reg(ATTR, input_array_stride * vertex->u32[0] +
|
||||||
instr->const_index[0] + offset_reg->u32[0],
|
instr->const_index[0] + offset_reg->u32[0],
|
||||||
glsl_type::dvec4_type);
|
glsl_type::dvec4_type);
|
||||||
|
|
||||||
@@ -85,15 +87,11 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||||||
/* Make up a type...we have no way of knowing... */
|
/* Make up a type...we have no way of knowing... */
|
||||||
const glsl_type *const type = glsl_type::ivec(instr->num_components);
|
const glsl_type *const type = glsl_type::ivec(instr->num_components);
|
||||||
|
|
||||||
src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u32[0] +
|
src = src_reg(ATTR, input_array_stride * vertex->u32[0] +
|
||||||
instr->const_index[0] + offset_reg->u32[0],
|
instr->const_index[0] + offset_reg->u32[0],
|
||||||
type);
|
type);
|
||||||
src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr));
|
src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr));
|
||||||
|
|
||||||
/* gl_PointSize is passed in the .w component of the VUE header */
|
|
||||||
if (instr->const_index[0] == VARYING_SLOT_PSIZ)
|
|
||||||
src.swizzle = BRW_SWIZZLE_WWWW;
|
|
||||||
|
|
||||||
dest = get_nir_dest(instr->dest, src.type);
|
dest = get_nir_dest(instr->dest, src.type);
|
||||||
dest.writemask = brw_writemask_for_size(instr->num_components);
|
dest.writemask = brw_writemask_for_size(instr->num_components);
|
||||||
emit(MOV(dest, src));
|
emit(MOV(dest, src));
|
||||||
|
@@ -29,6 +29,7 @@
|
|||||||
|
|
||||||
#include "brw_vec4_gs_visitor.h"
|
#include "brw_vec4_gs_visitor.h"
|
||||||
#include "gen6_gs_visitor.h"
|
#include "gen6_gs_visitor.h"
|
||||||
|
#include "brw_cfg.h"
|
||||||
#include "brw_fs.h"
|
#include "brw_fs.h"
|
||||||
#include "brw_nir.h"
|
#include "brw_nir.h"
|
||||||
#include "common/gen_debug.h"
|
#include "common/gen_debug.h"
|
||||||
@@ -72,9 +73,36 @@ vec4_gs_visitor::make_reg_for_system_value(int location)
|
|||||||
return reg;
|
return reg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline struct brw_reg
|
||||||
|
attribute_to_hw_reg(int attr, brw_reg_type type, bool interleaved)
|
||||||
|
{
|
||||||
|
struct brw_reg reg;
|
||||||
|
|
||||||
|
unsigned width = REG_SIZE / 2 / MAX2(4, type_sz(type));
|
||||||
|
if (interleaved) {
|
||||||
|
reg = stride(brw_vecn_grf(width, attr / 2, (attr % 2) * 4), 0, width, 1);
|
||||||
|
} else {
|
||||||
|
reg = brw_vecn_grf(width, attr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
reg.type = type;
|
||||||
|
return reg;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replace each register of type ATTR in this->instructions with a reference
|
||||||
|
* to a fixed HW register.
|
||||||
|
*
|
||||||
|
* If interleaved is true, then each attribute takes up half a register, with
|
||||||
|
* register N containing attribute 2*N in its first half and attribute 2*N+1
|
||||||
|
* in its second half (this corresponds to the payload setup used by geometry
|
||||||
|
* shaders in "single" or "dual instanced" dispatch mode). If interleaved is
|
||||||
|
* false, then each attribute takes up a whole register, with register N
|
||||||
|
* containing attribute N (this corresponds to the payload setup used by
|
||||||
|
* vertex shaders, and by geometry shaders in "dual object" dispatch mode).
|
||||||
|
*/
|
||||||
int
|
int
|
||||||
vec4_gs_visitor::setup_varying_inputs(int payload_reg, int *attribute_map,
|
vec4_gs_visitor::setup_varying_inputs(int payload_reg,
|
||||||
int attributes_per_reg)
|
int attributes_per_reg)
|
||||||
{
|
{
|
||||||
/* For geometry shaders there are N copies of the input attributes, where N
|
/* For geometry shaders there are N copies of the input attributes, where N
|
||||||
@@ -89,12 +117,24 @@ vec4_gs_visitor::setup_varying_inputs(int payload_reg, int *attribute_map,
|
|||||||
assert(num_input_vertices <= MAX_GS_INPUT_VERTICES);
|
assert(num_input_vertices <= MAX_GS_INPUT_VERTICES);
|
||||||
unsigned input_array_stride = prog_data->urb_read_length * 2;
|
unsigned input_array_stride = prog_data->urb_read_length * 2;
|
||||||
|
|
||||||
for (int slot = 0; slot < c->input_vue_map.num_slots; slot++) {
|
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
|
||||||
int varying = c->input_vue_map.slot_to_varying[slot];
|
for (int i = 0; i < 3; i++) {
|
||||||
for (unsigned vertex = 0; vertex < num_input_vertices; vertex++) {
|
if (inst->src[i].file != ATTR)
|
||||||
attribute_map[BRW_VARYING_SLOT_COUNT * vertex + varying] =
|
continue;
|
||||||
attributes_per_reg * payload_reg + input_array_stride * vertex +
|
|
||||||
slot;
|
assert(inst->src[i].offset % REG_SIZE == 0);
|
||||||
|
int grf = payload_reg * attributes_per_reg +
|
||||||
|
inst->src[i].nr + inst->src[i].offset / REG_SIZE;
|
||||||
|
|
||||||
|
struct brw_reg reg =
|
||||||
|
attribute_to_hw_reg(grf, inst->src[i].type, attributes_per_reg > 1);
|
||||||
|
reg.swizzle = inst->src[i].swizzle;
|
||||||
|
if (inst->src[i].abs)
|
||||||
|
reg = brw_abs(reg);
|
||||||
|
if (inst->src[i].negate)
|
||||||
|
reg = negate(reg);
|
||||||
|
|
||||||
|
inst->src[i] = reg;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -103,25 +143,15 @@ vec4_gs_visitor::setup_varying_inputs(int payload_reg, int *attribute_map,
|
|||||||
return payload_reg + regs_used;
|
return payload_reg + regs_used;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
vec4_gs_visitor::setup_payload()
|
vec4_gs_visitor::setup_payload()
|
||||||
{
|
{
|
||||||
int attribute_map[BRW_VARYING_SLOT_COUNT * MAX_GS_INPUT_VERTICES];
|
|
||||||
|
|
||||||
/* If we are in dual instanced or single mode, then attributes are going
|
/* If we are in dual instanced or single mode, then attributes are going
|
||||||
* to be interleaved, so one register contains two attribute slots.
|
* to be interleaved, so one register contains two attribute slots.
|
||||||
*/
|
*/
|
||||||
int attributes_per_reg =
|
int attributes_per_reg =
|
||||||
prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
|
prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
|
||||||
|
|
||||||
/* If a geometry shader tries to read from an input that wasn't written by
|
|
||||||
* the vertex shader, that produces undefined results, but it shouldn't
|
|
||||||
* crash anything. So initialize attribute_map to zeros--that ensures that
|
|
||||||
* these undefined results are read from r0.
|
|
||||||
*/
|
|
||||||
memset(attribute_map, 0, sizeof(attribute_map));
|
|
||||||
|
|
||||||
int reg = 0;
|
int reg = 0;
|
||||||
|
|
||||||
/* The payload always contains important data in r0, which contains
|
/* The payload always contains important data in r0, which contains
|
||||||
@@ -132,13 +162,11 @@ vec4_gs_visitor::setup_payload()
|
|||||||
|
|
||||||
/* If the shader uses gl_PrimitiveIDIn, that goes in r1. */
|
/* If the shader uses gl_PrimitiveIDIn, that goes in r1. */
|
||||||
if (gs_prog_data->include_primitive_id)
|
if (gs_prog_data->include_primitive_id)
|
||||||
attribute_map[VARYING_SLOT_PRIMITIVE_ID] = attributes_per_reg * reg++;
|
reg++;
|
||||||
|
|
||||||
reg = setup_uniforms(reg);
|
reg = setup_uniforms(reg);
|
||||||
|
|
||||||
reg = setup_varying_inputs(reg, attribute_map, attributes_per_reg);
|
reg = setup_varying_inputs(reg, attributes_per_reg);
|
||||||
|
|
||||||
lower_attributes_to_hw_regs(attribute_map, attributes_per_reg > 1);
|
|
||||||
|
|
||||||
this->first_non_payload_grf = reg;
|
this->first_non_payload_grf = reg;
|
||||||
}
|
}
|
||||||
@@ -634,7 +662,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
|
|||||||
shader->info.separate_shader);
|
shader->info.separate_shader);
|
||||||
|
|
||||||
shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, is_scalar);
|
shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, is_scalar);
|
||||||
brw_nir_lower_vue_inputs(shader, is_scalar, &c.input_vue_map);
|
brw_nir_lower_vue_inputs(shader, &c.input_vue_map);
|
||||||
brw_nir_lower_vue_outputs(shader, is_scalar);
|
brw_nir_lower_vue_outputs(shader, is_scalar);
|
||||||
shader = brw_postprocess_nir(shader, compiler, is_scalar);
|
shader = brw_postprocess_nir(shader, compiler, is_scalar);
|
||||||
|
|
||||||
|
@@ -64,8 +64,7 @@ protected:
|
|||||||
virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
|
virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
int setup_varying_inputs(int payload_reg, int *attribute_map,
|
int setup_varying_inputs(int payload_reg, int attributes_per_reg);
|
||||||
int attributes_per_reg);
|
|
||||||
void emit_control_data_bits();
|
void emit_control_data_bits();
|
||||||
void set_stream_control_data_bits(unsigned stream_id);
|
void set_stream_control_data_bits(unsigned stream_id);
|
||||||
|
|
||||||
|
@@ -413,7 +413,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
|
|||||||
nir->info.patch_outputs_written);
|
nir->info.patch_outputs_written);
|
||||||
|
|
||||||
nir = brw_nir_apply_sampler_key(nir, compiler, &key->tex, is_scalar);
|
nir = brw_nir_apply_sampler_key(nir, compiler, &key->tex, is_scalar);
|
||||||
brw_nir_lower_vue_inputs(nir, is_scalar, &input_vue_map);
|
brw_nir_lower_vue_inputs(nir, &input_vue_map);
|
||||||
brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map,
|
brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map,
|
||||||
key->tes_primitive_mode);
|
key->tes_primitive_mode);
|
||||||
if (key->quads_workaround)
|
if (key->quads_workaround)
|
||||||
|
@@ -516,9 +516,7 @@ gen6_gs_visitor::setup_payload()
|
|||||||
|
|
||||||
reg = setup_uniforms(reg);
|
reg = setup_uniforms(reg);
|
||||||
|
|
||||||
reg = setup_varying_inputs(reg, attribute_map, attributes_per_reg);
|
reg = setup_varying_inputs(reg, attributes_per_reg);
|
||||||
|
|
||||||
lower_attributes_to_hw_regs(attribute_map, true);
|
|
||||||
|
|
||||||
this->first_non_payload_grf = reg;
|
this->first_non_payload_grf = reg;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user