asahi,agx: Rewrite varying linking
Instead of using driver_location magic and hoping things work, make the linkage between vertex and fragment shaders explicit. Thanks to the coefficient register mechanism reverse-engineered and documented earlier in this series, this does not require any shader keys to support separable shaders. It just requires that we regenerate the coefficient register binding tables at draw time, based on the varying layouts decided by the compiler independently for the VS and FS. This is more robust in the face of separate shaders. This also gets us glProvokingVertex() support without shader keys. After that, we don't need any of the remapping prepasses. For fragment shaders, any old mapping will do, so we can assign coefficient registers as we go (based on what the program actually uses, not nir_variable information that might be stale by this point). We do want to cache coefficient registers, particularly for fragcoord.w which is used for perspective interpolation everywhere. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17198>
This commit is contained in:
@@ -51,6 +51,45 @@ int agx_debug = 0;
|
|||||||
fprintf(stderr, "%s:%d: "fmt, \
|
fprintf(stderr, "%s:%d: "fmt, \
|
||||||
__FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0)
|
__FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0)
|
||||||
|
|
||||||
|
static agx_index
|
||||||
|
agx_get_cf(agx_context *ctx, bool smooth, bool perspective,
|
||||||
|
gl_varying_slot slot, unsigned offset, unsigned count)
|
||||||
|
{
|
||||||
|
struct agx_varyings_fs *varyings = &ctx->out->varyings.fs;
|
||||||
|
unsigned cf_base = varyings->nr_cf;
|
||||||
|
|
||||||
|
if (slot == VARYING_SLOT_POS) {
|
||||||
|
assert(offset == 2 || (cf_base == 0 && offset == 3));
|
||||||
|
varyings->reads_z |= (offset == 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* First, search for an appropriate binding. This is O(n) to the number of
|
||||||
|
* bindings, which isn't great, but n should be small in practice.
|
||||||
|
*/
|
||||||
|
for (unsigned b = 0; b < varyings->nr_bindings; ++b) {
|
||||||
|
if ((varyings->bindings[b].slot == slot) &&
|
||||||
|
(varyings->bindings[b].offset == offset) &&
|
||||||
|
(varyings->bindings[b].count == count) &&
|
||||||
|
(varyings->bindings[b].smooth == smooth) &&
|
||||||
|
(varyings->bindings[b].perspective == perspective)) {
|
||||||
|
|
||||||
|
return agx_immediate(varyings->bindings[b].cf_base);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we didn't find one, make one */
|
||||||
|
unsigned b = varyings->nr_bindings++;
|
||||||
|
varyings->bindings[b].cf_base = varyings->nr_cf;
|
||||||
|
varyings->bindings[b].slot = slot;
|
||||||
|
varyings->bindings[b].offset = offset;
|
||||||
|
varyings->bindings[b].count = count;
|
||||||
|
varyings->bindings[b].smooth = smooth;
|
||||||
|
varyings->bindings[b].perspective = perspective;
|
||||||
|
varyings->nr_cf += count;
|
||||||
|
|
||||||
|
return agx_immediate(cf_base);
|
||||||
|
}
|
||||||
|
|
||||||
/* Builds a 64-bit hash table key for an index */
|
/* Builds a 64-bit hash table key for an index */
|
||||||
static uint64_t
|
static uint64_t
|
||||||
agx_index_to_key(agx_index idx)
|
agx_index_to_key(agx_index idx)
|
||||||
@@ -278,17 +317,25 @@ agx_emit_load_vary_flat(agx_builder *b, agx_index *dests, nir_intrinsic_instr *i
|
|||||||
unsigned components = instr->num_components;
|
unsigned components = instr->num_components;
|
||||||
assert(components >= 1 && components <= 4);
|
assert(components >= 1 && components <= 4);
|
||||||
|
|
||||||
|
nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
|
||||||
nir_src *offset = nir_get_io_offset_src(instr);
|
nir_src *offset = nir_get_io_offset_src(instr);
|
||||||
assert(nir_src_is_const(*offset) && "no indirects");
|
assert(nir_src_is_const(*offset) && "no indirects");
|
||||||
unsigned imm_index = b->shader->varyings[nir_intrinsic_base(instr)];
|
|
||||||
imm_index += nir_src_as_uint(*offset);
|
|
||||||
|
|
||||||
assert(nir_dest_bit_size(instr->dest) == 32 && "no 16-bit flat shading");
|
assert(nir_dest_bit_size(instr->dest) == 32 && "no 16-bit flat shading");
|
||||||
|
|
||||||
|
/* Get all coefficient registers up front. This ensures the driver emits a
|
||||||
|
* single vectorized binding.
|
||||||
|
*/
|
||||||
|
agx_index cf = agx_get_cf(b->shader, false, false,
|
||||||
|
sem.location + nir_src_as_uint(*offset), 0,
|
||||||
|
components);
|
||||||
|
|
||||||
for (unsigned i = 0; i < components; ++i) {
|
for (unsigned i = 0; i < components; ++i) {
|
||||||
/* vec3 for each vertex, unknown what first 2 channels are for */
|
/* vec3 for each vertex, unknown what first 2 channels are for */
|
||||||
agx_index values = agx_ld_vary_flat(b, agx_immediate(imm_index + i), 1);
|
agx_index values = agx_ld_vary_flat(b, cf, 1);
|
||||||
dests[i] = agx_p_extract(b, values, 2);
|
dests[i] = agx_p_extract(b, values, 2);
|
||||||
|
|
||||||
|
/* Each component accesses a sequential coefficient register */
|
||||||
|
cf.value++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -304,22 +351,29 @@ agx_emit_load_vary(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr)
|
|||||||
/* TODO: Interpolation modes */
|
/* TODO: Interpolation modes */
|
||||||
assert(parent->intrinsic == nir_intrinsic_load_barycentric_pixel);
|
assert(parent->intrinsic == nir_intrinsic_load_barycentric_pixel);
|
||||||
|
|
||||||
|
nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
|
||||||
nir_src *offset = nir_get_io_offset_src(instr);
|
nir_src *offset = nir_get_io_offset_src(instr);
|
||||||
assert(nir_src_is_const(*offset) && "no indirects");
|
assert(nir_src_is_const(*offset) && "no indirects");
|
||||||
unsigned imm_index = b->shader->varyings[nir_intrinsic_base(instr)];
|
|
||||||
imm_index += nir_src_as_uint(*offset) * 4;
|
/* TODO: Make use of w explicit int he IR */
|
||||||
|
agx_index I = agx_get_cf(b->shader, true, true,
|
||||||
|
sem.location + nir_src_as_uint(*offset), 0,
|
||||||
|
components);
|
||||||
|
|
||||||
agx_index vec = agx_vec_for_intr(b->shader, instr);
|
agx_index vec = agx_vec_for_intr(b->shader, instr);
|
||||||
agx_ld_vary_to(b, vec, agx_immediate(imm_index), components, true);
|
agx_ld_vary_to(b, vec, I, components, true);
|
||||||
agx_emit_split(b, dests, vec, components);
|
agx_emit_split(b, dests, vec, components);
|
||||||
}
|
}
|
||||||
|
|
||||||
static agx_instr *
|
static agx_instr *
|
||||||
agx_emit_store_vary(agx_builder *b, nir_intrinsic_instr *instr)
|
agx_emit_store_vary(agx_builder *b, nir_intrinsic_instr *instr)
|
||||||
{
|
{
|
||||||
|
nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
|
||||||
nir_src *offset = nir_get_io_offset_src(instr);
|
nir_src *offset = nir_get_io_offset_src(instr);
|
||||||
assert(nir_src_is_const(*offset) && "todo: indirects");
|
assert(nir_src_is_const(*offset) && "todo: indirects");
|
||||||
unsigned imm_index = b->shader->varyings[nir_intrinsic_base(instr)];
|
|
||||||
|
unsigned imm_index = b->shader->out->varyings.vs.slots[sem.location];
|
||||||
|
assert(imm_index < ~0);
|
||||||
imm_index += nir_intrinsic_component(instr);
|
imm_index += nir_intrinsic_component(instr);
|
||||||
imm_index += nir_src_as_uint(*offset);
|
imm_index += nir_src_as_uint(*offset);
|
||||||
|
|
||||||
@@ -447,8 +501,10 @@ agx_emit_load_frag_coord(agx_builder *b, agx_index *dests, nir_intrinsic_instr *
|
|||||||
AGX_ROUND_RTE), agx_immediate_f(0.5f));
|
AGX_ROUND_RTE), agx_immediate_f(0.5f));
|
||||||
}
|
}
|
||||||
|
|
||||||
dests[2] = agx_ld_vary(b, agx_immediate(1), 1, false); /* z */
|
agx_index z = agx_get_cf(b->shader, true, false, VARYING_SLOT_POS, 2, 1);
|
||||||
dests[3] = agx_ld_vary(b, agx_immediate(0), 1, false); /* w */
|
|
||||||
|
dests[2] = agx_ld_vary(b, z, 1, false);
|
||||||
|
dests[3] = agx_ld_vary(b, agx_immediate(0), 1, false); /* cf0 is w */
|
||||||
}
|
}
|
||||||
|
|
||||||
static agx_instr *
|
static agx_instr *
|
||||||
@@ -1500,118 +1556,38 @@ agx_optimize_nir(nir_shader *nir)
|
|||||||
|
|
||||||
/* ABI: position first, then user, then psiz */
|
/* ABI: position first, then user, then psiz */
|
||||||
static void
|
static void
|
||||||
agx_remap_varyings_vs(nir_shader *nir, struct agx_varyings *varyings,
|
agx_remap_varyings_vs(nir_shader *nir, struct agx_varyings_vs *varyings)
|
||||||
unsigned *remap)
|
|
||||||
{
|
{
|
||||||
unsigned base = 0;
|
unsigned base = 0;
|
||||||
|
|
||||||
nir_variable *pos = nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_POS);
|
/* Initalize to "nothing is written" */
|
||||||
if (pos) {
|
for (unsigned i = 0; i < ARRAY_SIZE(varyings->slots); ++i)
|
||||||
assert(pos->data.driver_location < AGX_MAX_VARYINGS);
|
varyings->slots[i] = ~0;
|
||||||
remap[pos->data.driver_location] = base;
|
|
||||||
|
assert(nir->info.outputs_written & VARYING_BIT_POS);
|
||||||
|
varyings->slots[VARYING_SLOT_POS] = base;
|
||||||
base += 4;
|
base += 4;
|
||||||
}
|
|
||||||
|
|
||||||
nir_foreach_shader_out_variable(var, nir) {
|
nir_foreach_shader_out_variable(var, nir) {
|
||||||
unsigned loc = var->data.location;
|
unsigned loc = var->data.location;
|
||||||
|
|
||||||
if(loc == VARYING_SLOT_POS || loc == VARYING_SLOT_PSIZ) {
|
if(loc == VARYING_SLOT_POS || loc == VARYING_SLOT_PSIZ)
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
|
|
||||||
assert(var->data.driver_location < AGX_MAX_VARYINGS);
|
varyings->slots[loc] = base;
|
||||||
remap[var->data.driver_location] = base;
|
|
||||||
base += 4;
|
base += 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
nir_variable *psiz = nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_PSIZ);
|
/* TODO: Link FP16 varyings */
|
||||||
if (psiz) {
|
varyings->base_index_fp16 = base;
|
||||||
assert(psiz->data.driver_location < AGX_MAX_VARYINGS);
|
|
||||||
remap[psiz->data.driver_location] = base;
|
if (nir->info.outputs_written & VARYING_BIT_PSIZ) {
|
||||||
|
varyings->slots[VARYING_SLOT_PSIZ] = base;
|
||||||
base += 1;
|
base += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
varyings->nr_slots = base;
|
/* All varyings linked now */
|
||||||
}
|
varyings->nr_index = base;
|
||||||
|
|
||||||
static void
|
|
||||||
agx_remap_varyings_fs(nir_shader *nir, struct agx_varyings *varyings,
|
|
||||||
unsigned *remap)
|
|
||||||
{
|
|
||||||
struct agx_cf_binding_packed *packed = varyings->packed;
|
|
||||||
unsigned base = 0;
|
|
||||||
|
|
||||||
agx_pack(packed, CF_BINDING, cfg) {
|
|
||||||
/* W component */
|
|
||||||
cfg.shade_model = AGX_SHADE_MODEL_GOURAUD;
|
|
||||||
cfg.components = 1;
|
|
||||||
cfg.base_slot = base;
|
|
||||||
cfg.base_coefficient_register = base;
|
|
||||||
}
|
|
||||||
|
|
||||||
base++;
|
|
||||||
packed++;
|
|
||||||
|
|
||||||
agx_pack(packed, CF_BINDING, cfg) {
|
|
||||||
/* Z component */
|
|
||||||
cfg.shade_model = AGX_SHADE_MODEL_GOURAUD;
|
|
||||||
cfg.perspective = true;
|
|
||||||
cfg.fragcoord_z = true;
|
|
||||||
cfg.components = 1;
|
|
||||||
cfg.base_slot = base;
|
|
||||||
cfg.base_coefficient_register = base;
|
|
||||||
}
|
|
||||||
|
|
||||||
base++;
|
|
||||||
packed++;
|
|
||||||
|
|
||||||
unsigned comps[MAX_VARYING] = { 0 };
|
|
||||||
|
|
||||||
nir_foreach_shader_in_variable(var, nir) {
|
|
||||||
unsigned loc = var->data.driver_location;
|
|
||||||
const struct glsl_type *column =
|
|
||||||
glsl_without_array_or_matrix(var->type);
|
|
||||||
unsigned chan = glsl_get_components(column);
|
|
||||||
|
|
||||||
/* If we have a fractional location added, we need to increase the size
|
|
||||||
* so it will fit, i.e. a vec3 in YZW requires us to allocate a vec4.
|
|
||||||
* We could do better but this is an edge case as it is, normally
|
|
||||||
* packed varyings will be aligned.
|
|
||||||
*/
|
|
||||||
chan += var->data.location_frac;
|
|
||||||
comps[loc] = MAX2(comps[loc], chan);
|
|
||||||
}
|
|
||||||
|
|
||||||
nir_foreach_shader_in_variable(var, nir) {
|
|
||||||
unsigned loc = var->data.driver_location;
|
|
||||||
unsigned sz = glsl_count_attribute_slots(var->type, FALSE);
|
|
||||||
unsigned channels = comps[loc];
|
|
||||||
|
|
||||||
assert(var->data.driver_location <= AGX_MAX_VARYINGS);
|
|
||||||
remap[var->data.driver_location] = base;
|
|
||||||
|
|
||||||
for (int c = 0; c < sz; ++c) {
|
|
||||||
agx_pack(packed, CF_BINDING, cfg) {
|
|
||||||
cfg.shade_model =
|
|
||||||
(var->data.interpolation == INTERP_MODE_FLAT) ?
|
|
||||||
AGX_SHADE_MODEL_FLAT_VERTEX_2 :
|
|
||||||
AGX_SHADE_MODEL_GOURAUD;
|
|
||||||
|
|
||||||
cfg.perspective = (var->data.interpolation != INTERP_MODE_FLAT);
|
|
||||||
cfg.point_sprite = (var->data.location == VARYING_SLOT_PNTC);
|
|
||||||
|
|
||||||
cfg.components = channels;
|
|
||||||
cfg.base_slot = base;
|
|
||||||
cfg.base_coefficient_register = base;
|
|
||||||
}
|
|
||||||
|
|
||||||
base += channels;
|
|
||||||
packed++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
varyings->nr_descs = (packed - varyings->packed);
|
|
||||||
varyings->nr_slots = base;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1648,6 +1624,8 @@ agx_compile_shader_nir(nir_shader *nir,
|
|||||||
ctx->stage = nir->info.stage;
|
ctx->stage = nir->info.stage;
|
||||||
list_inithead(&ctx->blocks);
|
list_inithead(&ctx->blocks);
|
||||||
|
|
||||||
|
memset(out, 0, sizeof *out);
|
||||||
|
|
||||||
if (ctx->stage == MESA_SHADER_VERTEX) {
|
if (ctx->stage == MESA_SHADER_VERTEX) {
|
||||||
out->writes_psiz = nir->info.outputs_written &
|
out->writes_psiz = nir->info.outputs_written &
|
||||||
BITFIELD_BIT(VARYING_SLOT_PSIZ);
|
BITFIELD_BIT(VARYING_SLOT_PSIZ);
|
||||||
@@ -1714,9 +1692,13 @@ agx_compile_shader_nir(nir_shader *nir,
|
|||||||
|
|
||||||
/* Must be last since NIR passes can remap driver_location freely */
|
/* Must be last since NIR passes can remap driver_location freely */
|
||||||
if (ctx->stage == MESA_SHADER_VERTEX) {
|
if (ctx->stage == MESA_SHADER_VERTEX) {
|
||||||
agx_remap_varyings_vs(nir, &out->varyings, ctx->varyings);
|
agx_remap_varyings_vs(nir, &out->varyings.vs);
|
||||||
} else if (ctx->stage == MESA_SHADER_FRAGMENT) {
|
} else if (ctx->stage == MESA_SHADER_FRAGMENT) {
|
||||||
agx_remap_varyings_fs(nir, &out->varyings, ctx->varyings);
|
/* Ensure cf0 is W */
|
||||||
|
ASSERTED agx_index w =
|
||||||
|
agx_get_cf(ctx, true, false, VARYING_SLOT_POS, 3, 1);
|
||||||
|
|
||||||
|
assert(w.value == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool skip_internal = nir->info.internal;
|
bool skip_internal = nir->info.internal;
|
||||||
|
@@ -84,9 +84,71 @@ struct agx_push {
|
|||||||
#define AGX_MAX_PUSH_RANGES (16)
|
#define AGX_MAX_PUSH_RANGES (16)
|
||||||
#define AGX_MAX_VARYINGS (32)
|
#define AGX_MAX_VARYINGS (32)
|
||||||
|
|
||||||
|
struct agx_varyings_vs {
|
||||||
|
/* The first index used for FP16 varyings. Indices less than this are treated
|
||||||
|
* as FP32. This may require remapping slots to guarantee.
|
||||||
|
*/
|
||||||
|
unsigned base_index_fp16;
|
||||||
|
|
||||||
|
/* The total number of vertex shader indices output. Must be at least
|
||||||
|
* base_index_fp16.
|
||||||
|
*/
|
||||||
|
unsigned nr_index;
|
||||||
|
|
||||||
|
/* If the slot is written, this is the base index that the first component
|
||||||
|
* of the slot is written to. The next components are found in the next
|
||||||
|
* indices. If less than base_index_fp16, this is a 32-bit slot (with 4
|
||||||
|
* indices for the 4 components), else this is a 16-bit slot (with 2
|
||||||
|
* indices for the 4 components). This must be less than nr_index.
|
||||||
|
*
|
||||||
|
* If the slot is not written, this must be ~0.
|
||||||
|
*/
|
||||||
|
unsigned slots[VARYING_SLOT_MAX];
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Conservative bound */
|
||||||
|
#define AGX_MAX_CF_BINDINGS (VARYING_SLOT_MAX)
|
||||||
|
|
||||||
|
struct agx_varyings_fs {
|
||||||
|
/* Number of coefficient registers used */
|
||||||
|
unsigned nr_cf;
|
||||||
|
|
||||||
|
/* Number of coefficient register bindings */
|
||||||
|
unsigned nr_bindings;
|
||||||
|
|
||||||
|
/* Whether gl_FragCoord.z is read */
|
||||||
|
bool reads_z;
|
||||||
|
|
||||||
|
/* Coefficient register bindings */
|
||||||
|
struct {
|
||||||
|
/* Base coefficient register */
|
||||||
|
unsigned cf_base;
|
||||||
|
|
||||||
|
/* Slot being bound */
|
||||||
|
gl_varying_slot slot;
|
||||||
|
|
||||||
|
/* First component bound.
|
||||||
|
*
|
||||||
|
* Must be 2 (Z) or 3 (W) if slot == VARYING_SLOT_POS.
|
||||||
|
*/
|
||||||
|
unsigned offset : 2;
|
||||||
|
|
||||||
|
/* Number of components bound */
|
||||||
|
unsigned count : 3;
|
||||||
|
|
||||||
|
/* Is smooth shading enabled? If false, flat shading is used */
|
||||||
|
bool smooth : 1;
|
||||||
|
|
||||||
|
/* Perspective correct interpolation */
|
||||||
|
bool perspective : 1;
|
||||||
|
} bindings[AGX_MAX_CF_BINDINGS];
|
||||||
|
};
|
||||||
|
|
||||||
struct agx_varyings {
|
struct agx_varyings {
|
||||||
unsigned nr_descs, nr_slots;
|
union {
|
||||||
struct agx_cf_binding_packed packed[AGX_MAX_VARYINGS];
|
struct agx_varyings_vs vs;
|
||||||
|
struct agx_varyings_fs fs;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
struct agx_shader_info {
|
struct agx_shader_info {
|
||||||
|
@@ -375,9 +375,6 @@ typedef struct {
|
|||||||
struct agx_shader_info *out;
|
struct agx_shader_info *out;
|
||||||
struct agx_shader_key *key;
|
struct agx_shader_key *key;
|
||||||
|
|
||||||
/* Remapping table for varyings indexed by driver_location */
|
|
||||||
unsigned varyings[AGX_MAX_VARYINGS];
|
|
||||||
|
|
||||||
/* Place to start pushing new values */
|
/* Place to start pushing new values */
|
||||||
unsigned push_base;
|
unsigned push_base;
|
||||||
|
|
||||||
|
@@ -494,7 +494,6 @@
|
|||||||
<field name="Padding 1" size="8" start="1:24" type="hex" default="0x0"/>
|
<field name="Padding 1" size="8" start="1:24" type="hex" default="0x0"/>
|
||||||
<field name="Pipeline" size="32" start="2:0" type="address"/>
|
<field name="Pipeline" size="32" start="2:0" type="address"/>
|
||||||
<field name="CF bindings" size="32" start="3:0" type="address"/>
|
<field name="CF bindings" size="32" start="3:0" type="address"/>
|
||||||
<field name="Padding 2" size="16" start="3:16" type="hex" default="0x0"/>
|
|
||||||
<field name="More than 4 textures" start="4:0" size="1" type="bool"/>
|
<field name="More than 4 textures" start="4:0" size="1" type="bool"/>
|
||||||
</struct>
|
</struct>
|
||||||
|
|
||||||
|
@@ -897,6 +897,122 @@ agx_create_shader_state(struct pipe_context *pctx,
|
|||||||
return so;
|
return so;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned
|
||||||
|
agx_find_linked_slot(struct agx_varyings_vs *vs, struct agx_varyings_fs *fs,
|
||||||
|
gl_varying_slot slot, unsigned offset)
|
||||||
|
{
|
||||||
|
assert(offset < 4);
|
||||||
|
assert(slot != VARYING_SLOT_PNTC && "point coords aren't linked");
|
||||||
|
|
||||||
|
if (slot == VARYING_SLOT_POS) {
|
||||||
|
if (offset == 3) {
|
||||||
|
return 0; /* W */
|
||||||
|
} else if (offset == 2) {
|
||||||
|
assert(fs->reads_z);
|
||||||
|
return 1; /* Z */
|
||||||
|
} else {
|
||||||
|
unreachable("gl_Position.xy are not varyings");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned vs_index = vs->slots[slot];
|
||||||
|
|
||||||
|
assert(vs_index >= 4 && "gl_Position should have been the first 4 slots");
|
||||||
|
assert(vs_index < vs->nr_index &&
|
||||||
|
"varyings not written by vertex shader are undefined");
|
||||||
|
assert((vs_index < vs->base_index_fp16) ==
|
||||||
|
((vs_index + offset) < vs->base_index_fp16) &&
|
||||||
|
"a given varying must have a consistent type");
|
||||||
|
|
||||||
|
unsigned vs_user_index = (vs_index + offset) - 4;
|
||||||
|
|
||||||
|
if (fs->reads_z)
|
||||||
|
return vs_user_index + 2;
|
||||||
|
else
|
||||||
|
return vs_user_index + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned
|
||||||
|
agx_num_general_outputs(struct agx_varyings_vs *vs)
|
||||||
|
{
|
||||||
|
unsigned nr_vs = vs->nr_index;
|
||||||
|
bool writes_psiz = vs->slots[VARYING_SLOT_PSIZ] < nr_vs;
|
||||||
|
|
||||||
|
assert(nr_vs >= 4 && "gl_Position must be written");
|
||||||
|
if (writes_psiz)
|
||||||
|
assert(nr_vs >= 5 && "gl_PointSize is written");
|
||||||
|
|
||||||
|
return nr_vs - (writes_psiz ? 5 : 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t
|
||||||
|
agx_link_varyings_vs_fs(struct agx_pool *pool, struct agx_varyings_vs *vs,
|
||||||
|
struct agx_varyings_fs *fs, bool first_provoking_vertex)
|
||||||
|
{
|
||||||
|
/* If there are no bindings, there's nothing to emit */
|
||||||
|
if (fs->nr_bindings == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
size_t linkage_size = AGX_CF_BINDING_HEADER_LENGTH +
|
||||||
|
(fs->nr_bindings * AGX_CF_BINDING_LENGTH);
|
||||||
|
|
||||||
|
void *tmp = alloca(linkage_size);
|
||||||
|
struct agx_cf_binding_header_packed *header = tmp;
|
||||||
|
struct agx_cf_binding_packed *bindings = (void *) (header + 1);
|
||||||
|
|
||||||
|
unsigned nr_slots = agx_num_general_outputs(vs) + 1 + (fs->reads_z ? 1 : 0);
|
||||||
|
|
||||||
|
agx_pack(header, CF_BINDING_HEADER, cfg) {
|
||||||
|
cfg.number_of_32_bit_slots = nr_slots;
|
||||||
|
cfg.number_of_coefficient_registers = fs->nr_cf;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < fs->nr_bindings; ++i) {
|
||||||
|
agx_pack(bindings + i, CF_BINDING, cfg) {
|
||||||
|
cfg.base_coefficient_register = fs->bindings[i].cf_base;
|
||||||
|
cfg.components = fs->bindings[i].count;
|
||||||
|
cfg.perspective = fs->bindings[i].perspective;
|
||||||
|
|
||||||
|
cfg.shade_model = fs->bindings[i].smooth ? AGX_SHADE_MODEL_GOURAUD :
|
||||||
|
first_provoking_vertex ? AGX_SHADE_MODEL_FLAT_VERTEX_0 :
|
||||||
|
AGX_SHADE_MODEL_FLAT_VERTEX_2;
|
||||||
|
|
||||||
|
if (fs->bindings[i].slot == VARYING_SLOT_PNTC) {
|
||||||
|
assert(fs->bindings[i].offset == 0);
|
||||||
|
cfg.point_sprite = true;
|
||||||
|
} else {
|
||||||
|
cfg.base_slot = agx_find_linked_slot(vs, fs, fs->bindings[i].slot,
|
||||||
|
fs->bindings[i].offset);
|
||||||
|
|
||||||
|
assert(cfg.base_slot + cfg.components <= nr_slots &&
|
||||||
|
"overflow slots");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fs->bindings[i].slot == VARYING_SLOT_POS) {
|
||||||
|
if (fs->bindings[i].offset == 2)
|
||||||
|
cfg.fragcoord_z = true;
|
||||||
|
else
|
||||||
|
assert(!cfg.perspective && "W must not be perspective divided");
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(cfg.base_coefficient_register + cfg.components <= fs->nr_cf &&
|
||||||
|
"overflowed coefficient registers");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct agx_ptr ptr = agx_pool_alloc_aligned(pool, (3 * linkage_size), 256);
|
||||||
|
assert(ptr.gpu < (1ull << 32) && "varyings must be in low memory");
|
||||||
|
|
||||||
|
/* I don't understand why the data structures are repeated thrice */
|
||||||
|
for (unsigned i = 0; i < 3; ++i) {
|
||||||
|
memcpy(((uint8_t *) ptr.cpu) + (i * linkage_size),
|
||||||
|
((uint8_t *) tmp) + (i * linkage_size),
|
||||||
|
linkage_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ptr.gpu;
|
||||||
|
}
|
||||||
|
|
||||||
/* Does not take ownership of key. Clones if necessary. */
|
/* Does not take ownership of key. Clones if necessary. */
|
||||||
static bool
|
static bool
|
||||||
agx_update_shader(struct agx_context *ctx, struct agx_compiled_shader **out,
|
agx_update_shader(struct agx_context *ctx, struct agx_compiled_shader **out,
|
||||||
@@ -942,35 +1058,10 @@ agx_update_shader(struct agx_context *ctx, struct agx_compiled_shader **out,
|
|||||||
|
|
||||||
agx_compile_shader_nir(nir, &key->base, &binary, &compiled->info);
|
agx_compile_shader_nir(nir, &key->base, &binary, &compiled->info);
|
||||||
|
|
||||||
struct agx_varyings *varyings = &compiled->info.varyings;
|
|
||||||
unsigned packed_varying_sz = (AGX_CF_BINDING_HEADER_LENGTH +
|
|
||||||
varyings->nr_descs * AGX_CF_BINDING_LENGTH);
|
|
||||||
uint8_t *packed_varyings = alloca(packed_varying_sz);
|
|
||||||
|
|
||||||
agx_pack(packed_varyings, CF_BINDING_HEADER, cfg) {
|
|
||||||
cfg.number_of_32_bit_slots = varyings->nr_slots;
|
|
||||||
cfg.number_of_coefficient_registers = varyings->nr_slots;
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(packed_varyings + AGX_CF_BINDING_HEADER_LENGTH,
|
|
||||||
varyings->packed, varyings->nr_descs * AGX_CF_BINDING_LENGTH);
|
|
||||||
|
|
||||||
if (binary.size) {
|
if (binary.size) {
|
||||||
struct agx_device *dev = agx_device(ctx->base.screen);
|
struct agx_device *dev = agx_device(ctx->base.screen);
|
||||||
compiled->bo = agx_bo_create(dev,
|
compiled->bo = agx_bo_create(dev, binary.size, AGX_MEMORY_TYPE_SHADER);
|
||||||
ALIGN_POT(binary.size, 256) + (3 * packed_varying_sz),
|
|
||||||
AGX_MEMORY_TYPE_SHADER);
|
|
||||||
memcpy(compiled->bo->ptr.cpu, binary.data, binary.size);
|
memcpy(compiled->bo->ptr.cpu, binary.data, binary.size);
|
||||||
|
|
||||||
|
|
||||||
/* TODO: Why is the varying descriptor duplicated 3x? */
|
|
||||||
unsigned offs = ALIGN_POT(binary.size, 256);
|
|
||||||
for (unsigned copy = 0; copy < 3; ++copy) {
|
|
||||||
memcpy(((uint8_t *) compiled->bo->ptr.cpu) + offs, packed_varyings, packed_varying_sz);
|
|
||||||
offs += packed_varying_sz;
|
|
||||||
}
|
|
||||||
|
|
||||||
compiled->varyings = compiled->bo->ptr.gpu + ALIGN_POT(binary.size, 256);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ralloc_free(nir);
|
ralloc_free(nir);
|
||||||
@@ -1161,7 +1252,7 @@ agx_build_pipeline(struct agx_context *ctx, struct agx_compiled_shader *cs, enum
|
|||||||
agx_pack(record, SET_SHADER, cfg) {
|
agx_pack(record, SET_SHADER, cfg) {
|
||||||
cfg.code = cs->bo->ptr.gpu;
|
cfg.code = cs->bo->ptr.gpu;
|
||||||
cfg.register_quadwords = 0;
|
cfg.register_quadwords = 0;
|
||||||
cfg.unk_2b = cs->info.varyings.nr_slots;
|
cfg.unk_2b = cs->info.varyings.vs.nr_index;
|
||||||
cfg.unk_2 = 0x0d;
|
cfg.unk_2 = 0x0d;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1377,27 +1468,27 @@ demo_launch_fragment(struct agx_context *ctx, struct agx_pool *pool, uint32_t pi
|
|||||||
}
|
}
|
||||||
|
|
||||||
static uint64_t
|
static uint64_t
|
||||||
demo_interpolation(struct agx_compiled_shader *fs, struct agx_pool *pool)
|
demo_interpolation(struct agx_varyings_vs *vs, struct agx_pool *pool)
|
||||||
{
|
{
|
||||||
struct agx_ptr t = agx_pool_alloc_aligned(pool, AGX_INTERPOLATION_LENGTH, 64);
|
struct agx_ptr t = agx_pool_alloc_aligned(pool, AGX_INTERPOLATION_LENGTH, 64);
|
||||||
|
|
||||||
agx_pack(t.cpu, INTERPOLATION, cfg) {
|
agx_pack(t.cpu, INTERPOLATION, cfg) {
|
||||||
cfg.varying_count = fs->info.varyings.nr_slots;
|
cfg.varying_count = agx_num_general_outputs(vs);
|
||||||
};
|
};
|
||||||
|
|
||||||
return t.gpu;
|
return t.gpu;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint64_t
|
static uint64_t
|
||||||
demo_linkage(struct agx_compiled_shader *vs, struct agx_pool *pool)
|
demo_linkage(struct agx_compiled_shader *vs, struct agx_compiled_shader *fs, struct agx_pool *pool)
|
||||||
{
|
{
|
||||||
struct agx_ptr t = agx_pool_alloc_aligned(pool, AGX_LINKAGE_LENGTH, 64);
|
struct agx_ptr t = agx_pool_alloc_aligned(pool, AGX_LINKAGE_LENGTH, 64);
|
||||||
|
|
||||||
agx_pack(t.cpu, LINKAGE, cfg) {
|
agx_pack(t.cpu, LINKAGE, cfg) {
|
||||||
cfg.varying_count = vs->info.varyings.nr_slots;
|
cfg.varying_count = vs->info.varyings.vs.nr_index;
|
||||||
cfg.any_varyings = !!cfg.varying_count;
|
cfg.any_varyings = !!fs->info.varyings.fs.nr_bindings;
|
||||||
cfg.has_point_size = vs->info.writes_psiz;
|
cfg.has_point_size = vs->info.writes_psiz;
|
||||||
cfg.has_frag_coord_z = 1;
|
cfg.has_frag_coord_z = fs->info.varyings.fs.reads_z;
|
||||||
};
|
};
|
||||||
|
|
||||||
return t.gpu;
|
return t.gpu;
|
||||||
@@ -1505,8 +1596,8 @@ agx_encode_state(struct agx_context *ctx, uint8_t *out,
|
|||||||
unsigned tex_count = ctx->stage[PIPE_SHADER_VERTEX].texture_count;
|
unsigned tex_count = ctx->stage[PIPE_SHADER_VERTEX].texture_count;
|
||||||
agx_pack(out, BIND_VERTEX_PIPELINE, cfg) {
|
agx_pack(out, BIND_VERTEX_PIPELINE, cfg) {
|
||||||
cfg.pipeline = pipeline_vertex;
|
cfg.pipeline = pipeline_vertex;
|
||||||
cfg.output_count_1 = ctx->vs->info.varyings.nr_slots;
|
cfg.output_count_1 = ctx->vs->info.varyings.vs.nr_index;
|
||||||
cfg.output_count_2 = ctx->vs->info.varyings.nr_slots;
|
cfg.output_count_2 = cfg.output_count_1;
|
||||||
|
|
||||||
cfg.groups_of_8_immediate_textures = DIV_ROUND_UP(tex_count, 8);
|
cfg.groups_of_8_immediate_textures = DIV_ROUND_UP(tex_count, 8);
|
||||||
cfg.groups_of_4_samplers = DIV_ROUND_UP(tex_count, 4);
|
cfg.groups_of_4_samplers = DIV_ROUND_UP(tex_count, 4);
|
||||||
@@ -1519,9 +1610,10 @@ agx_encode_state(struct agx_context *ctx, uint8_t *out,
|
|||||||
bool reads_tib = ctx->fs->info.reads_tib;
|
bool reads_tib = ctx->fs->info.reads_tib;
|
||||||
bool sample_mask_from_shader = ctx->fs->info.writes_sample_mask;
|
bool sample_mask_from_shader = ctx->fs->info.writes_sample_mask;
|
||||||
|
|
||||||
agx_push_record(&out, 5, demo_interpolation(ctx->fs, pool));
|
agx_push_record(&out, 5, demo_interpolation(&ctx->vs->info.varyings.vs, pool));
|
||||||
agx_push_record(&out, 5, demo_launch_fragment(ctx, pool, pipeline_fragment, varyings, ctx->fs->info.varyings.nr_descs));
|
agx_push_record(&out, 5, demo_launch_fragment(ctx, pool, pipeline_fragment,
|
||||||
agx_push_record(&out, 4, demo_linkage(ctx->vs, pool));
|
varyings, ctx->fs->info.varyings.fs.nr_bindings));
|
||||||
|
agx_push_record(&out, 4, demo_linkage(ctx->vs, ctx->fs, pool));
|
||||||
agx_push_record(&out, 7, demo_rasterizer(ctx, pool, is_points));
|
agx_push_record(&out, 7, demo_rasterizer(ctx, pool, is_points));
|
||||||
agx_push_record(&out, 5, demo_unk11(pool, is_lines, is_points, reads_tib, sample_mask_from_shader));
|
agx_push_record(&out, 5, demo_unk11(pool, is_lines, is_points, reads_tib, sample_mask_from_shader));
|
||||||
|
|
||||||
@@ -1620,6 +1712,12 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
|
|||||||
agx_update_vs(ctx);
|
agx_update_vs(ctx);
|
||||||
agx_update_fs(ctx);
|
agx_update_fs(ctx);
|
||||||
|
|
||||||
|
/* TODO: Cache or dirty track */
|
||||||
|
uint32_t varyings = agx_link_varyings_vs_fs(&ctx->batch->pipeline_pool,
|
||||||
|
&ctx->vs->info.varyings.vs,
|
||||||
|
&ctx->fs->info.varyings.fs,
|
||||||
|
ctx->rast->base.flatshade_first);
|
||||||
|
|
||||||
agx_batch_add_bo(batch, ctx->vs->bo);
|
agx_batch_add_bo(batch, ctx->vs->bo);
|
||||||
agx_batch_add_bo(batch, ctx->fs->bo);
|
agx_batch_add_bo(batch, ctx->fs->bo);
|
||||||
|
|
||||||
@@ -1634,7 +1732,7 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
|
|||||||
uint8_t *out = agx_encode_state(ctx, batch->encoder_current,
|
uint8_t *out = agx_encode_state(ctx, batch->encoder_current,
|
||||||
agx_build_pipeline(ctx, ctx->vs, PIPE_SHADER_VERTEX),
|
agx_build_pipeline(ctx, ctx->vs, PIPE_SHADER_VERTEX),
|
||||||
agx_build_pipeline(ctx, ctx->fs, PIPE_SHADER_FRAGMENT),
|
agx_build_pipeline(ctx, ctx->fs, PIPE_SHADER_FRAGMENT),
|
||||||
ctx->fs->varyings, is_lines, info->mode == PIPE_PRIM_POINTS);
|
varyings, is_lines, info->mode == PIPE_PRIM_POINTS);
|
||||||
|
|
||||||
enum agx_primitive prim = agx_primitive_for_pipe(info->mode);
|
enum agx_primitive prim = agx_primitive_for_pipe(info->mode);
|
||||||
unsigned idx_size = info->index_size;
|
unsigned idx_size = info->index_size;
|
||||||
|
@@ -58,9 +58,6 @@ struct agx_compiled_shader {
|
|||||||
/* Mapped executable memory */
|
/* Mapped executable memory */
|
||||||
struct agx_bo *bo;
|
struct agx_bo *bo;
|
||||||
|
|
||||||
/* Varying descriptor (TODO: is this the right place?) */
|
|
||||||
uint64_t varyings;
|
|
||||||
|
|
||||||
/* Metadata returned from the compiler */
|
/* Metadata returned from the compiler */
|
||||||
struct agx_shader_info info;
|
struct agx_shader_info info;
|
||||||
};
|
};
|
||||||
|
Reference in New Issue
Block a user