v3d: Upload all of UBO[0] if any indirect load occurs.

The idea was that we could skip uploading the constant-indexed uniform
data and just upload the uniforms that are variably-indexed.  However,
since the VS bin and render shaders may have a different set of uniforms
used, this meant that we had to upload the UBO for each of them.  The
first case is generally a fairly small impact (usually the uniform array
is the most space, other than a couple of FSes in shader-db), while the
second is a larger impact: 3DMMES2 was uploading 38k/frame of uniforms
instead of 18k.

Given that the optimization is of dubious value, has a big downside, and
is quite a bit of code, just drop it.  No change in shader-db.  No change
on 3DMMES2 (n=15).
This commit is contained in:
Eric Anholt
2019-03-19 09:58:14 -07:00
parent 320e96bace
commit 16f2770eb4
4 changed files with 20 additions and 167 deletions

View File

@@ -231,31 +231,7 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
struct qreg offset;
if (instr->intrinsic == nir_intrinsic_load_uniform) {
/* Find what variable in the default uniform block this
* uniform load is coming from.
*/
uint32_t base = nir_intrinsic_base(instr);
int i;
struct v3d_ubo_range *range = NULL;
for (i = 0; i < c->num_ubo_ranges; i++) {
range = &c->ubo_ranges[i];
if (base >= range->src_offset &&
base < range->src_offset + range->size) {
break;
}
}
/* The driver-location-based offset always has to be within a
* declared uniform range.
*/
assert(i != c->num_ubo_ranges);
if (!c->ubo_range_used[i]) {
c->ubo_range_used[i] = true;
range->dst_offset = c->next_ubo_dst_offset;
c->next_ubo_dst_offset += range->size;
}
const_offset += base - range->src_offset + range->dst_offset;
const_offset += nir_intrinsic_base(instr);
offset = vir_uniform(c, QUNIFORM_UBO_ADDR,
v3d_unit_data_create(0, const_offset));
const_offset = 0;
@@ -668,27 +644,6 @@ add_output(struct v3d_compile *c,
v3d_slot_from_slot_and_component(slot, swizzle);
}
static void
declare_uniform_range(struct v3d_compile *c, uint32_t start, uint32_t size)
{
unsigned array_id = c->num_ubo_ranges++;
if (array_id >= c->ubo_ranges_array_size) {
c->ubo_ranges_array_size = MAX2(c->ubo_ranges_array_size * 2,
array_id + 1);
c->ubo_ranges = reralloc(c, c->ubo_ranges,
struct v3d_ubo_range,
c->ubo_ranges_array_size);
c->ubo_range_used = reralloc(c, c->ubo_range_used,
bool,
c->ubo_ranges_array_size);
}
c->ubo_ranges[array_id].dst_offset = 0;
c->ubo_ranges[array_id].src_offset = start;
c->ubo_ranges[array_id].size = size;
c->ubo_range_used[array_id] = false;
}
/**
* If compare_instr is a valid comparison instruction, emits the
* compare_instr's comparison and returns the sel_instr's return value based
@@ -1536,23 +1491,6 @@ ntq_setup_outputs(struct v3d_compile *c)
}
}
static void
ntq_setup_uniforms(struct v3d_compile *c)
{
nir_foreach_variable(var, &c->s->uniforms) {
uint32_t vec4_count = glsl_count_attribute_slots(var->type,
false);
unsigned vec4_size = 4 * sizeof(float);
if (var->data.mode != nir_var_uniform)
continue;
declare_uniform_range(c, var->data.driver_location * vec4_size,
vec4_count * vec4_size);
}
}
/**
* Sets up the mapping from nir_register to struct qreg *.
*
@@ -2361,7 +2299,6 @@ nir_to_vir(struct v3d_compile *c)
ntq_setup_vpm_inputs(c);
ntq_setup_outputs(c);
ntq_setup_uniforms(c);
ntq_setup_registers(c, &c->s->registers);
/* Find the main function and emit the body. */

View File

@@ -318,25 +318,6 @@ static inline uint8_t v3d_slot_get_component(struct v3d_varying_slot slot)
return slot.slot_and_component & 3;
}
struct v3d_ubo_range {
/**
* offset in bytes from the start of the ubo where this range is
* uploaded.
*
* Only set once used is set.
*/
uint32_t dst_offset;
/**
* offset in bytes from the start of the gallium uniforms where the
* data comes from.
*/
uint32_t src_offset;
/** size in bytes of this ubo range */
uint32_t size;
};
struct v3d_key {
void *shader_state;
struct {
@@ -533,13 +514,6 @@ struct v3d_compile {
bool uses_center_w;
bool writes_z;
struct v3d_ubo_range *ubo_ranges;
bool *ubo_range_used;
uint32_t ubo_ranges_array_size;
/** Number of uniform areas tracked in ubo_ranges. */
uint32_t num_ubo_ranges;
uint32_t next_ubo_dst_offset;
/* State for whether we're executing on each channel currently. 0 if
* yes, otherwise a block number + 1 that the channel jumped to.
*/
@@ -674,9 +648,6 @@ struct v3d_uniform_list {
struct v3d_prog_data {
struct v3d_uniform_list uniforms;
struct v3d_ubo_range *ubo_ranges;
uint32_t num_ubo_ranges;
uint32_t ubo_size;
uint32_t spill_size;
uint8_t threads;

View File

@@ -582,41 +582,6 @@ v3d_set_prog_data_uniforms(struct v3d_compile *c,
count * sizeof(*ulist->contents));
}
/* Copy the compiler UBO range state to the compiled shader, dropping out
* arrays that were never referenced by an indirect load.
*
* (Note that QIR dead code elimination of an array access still leaves that
* array alive, though)
*/
static void
v3d_set_prog_data_ubo(struct v3d_compile *c,
struct v3d_prog_data *prog_data)
{
if (!c->num_ubo_ranges)
return;
prog_data->num_ubo_ranges = 0;
prog_data->ubo_ranges = ralloc_array(prog_data, struct v3d_ubo_range,
c->num_ubo_ranges);
for (int i = 0; i < c->num_ubo_ranges; i++) {
if (!c->ubo_range_used[i])
continue;
struct v3d_ubo_range *range = &c->ubo_ranges[i];
prog_data->ubo_ranges[prog_data->num_ubo_ranges++] = *range;
prog_data->ubo_size += range->size;
}
if (prog_data->ubo_size) {
if (V3D_DEBUG & V3D_DEBUG_SHADERDB) {
fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d UBO uniforms\n",
vir_get_stage_name(c),
c->program_id, c->variant_id,
prog_data->ubo_size / 4);
}
}
}
static void
v3d_vs_set_prog_data(struct v3d_compile *c,
struct v3d_vs_prog_data *prog_data)
@@ -713,7 +678,6 @@ v3d_set_prog_data(struct v3d_compile *c,
prog_data->spill_size = c->spill_size;
v3d_set_prog_data_uniforms(c, prog_data);
v3d_set_prog_data_ubo(c, prog_data);
if (c->s->info.stage == MESA_SHADER_VERTEX) {
v3d_vs_set_prog_data(c, (struct v3d_vs_prog_data *)prog_data);

View File

@@ -22,6 +22,7 @@
*/
#include "util/u_pack_color.h"
#include "util/u_upload_mgr.h"
#include "util/format_srgb.h"
#include "v3d_context.h"
@@ -95,28 +96,6 @@ get_image_size(struct v3d_shaderimg_stateobj *shaderimg,
}
}
static struct v3d_bo *
v3d_upload_ubo(struct v3d_context *v3d,
struct v3d_compiled_shader *shader,
const uint32_t *gallium_uniforms)
{
if (!shader->prog_data.base->ubo_size)
return NULL;
struct v3d_bo *ubo = v3d_bo_alloc(v3d->screen,
shader->prog_data.base->ubo_size,
"ubo");
void *data = v3d_bo_map(ubo);
for (uint32_t i = 0; i < shader->prog_data.base->num_ubo_ranges; i++) {
memcpy(data + shader->prog_data.base->ubo_ranges[i].dst_offset,
((const void *)gallium_uniforms +
shader->prog_data.base->ubo_ranges[i].src_offset),
shader->prog_data.base->ubo_ranges[i].size);
}
return ubo;
}
/**
* Writes the V3D 3.x P0 (CFG_MODE=1) texture parameter.
*
@@ -235,7 +214,6 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_compiled_shader *shader,
struct v3d_uniform_list *uinfo = &shader->prog_data.base->uniforms;
struct v3d_job *job = v3d->job;
const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
struct v3d_bo *ubo = v3d_upload_ubo(v3d, shader, gallium_uniforms);
/* We always need to return some space for uniforms, because the HW
* will be prefetching, even if we don't read any in the program.
@@ -329,21 +307,26 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_compiled_shader *shader,
v3d->zsa->base.alpha.ref_value);
break;
case QUNIFORM_UBO_ADDR:
if (data == 0) {
cl_aligned_reloc(&job->indirect, &uniforms,
ubo, 0);
} else {
int ubo_index = v3d_unit_data_get_unit(data);
struct v3d_resource *rsc =
v3d_resource(cb->cb[ubo_index].buffer);
cl_aligned_reloc(&job->indirect, &uniforms,
rsc->bo,
cb->cb[ubo_index].buffer_offset +
v3d_unit_data_get_offset(data));
case QUNIFORM_UBO_ADDR: {
uint32_t unit = v3d_unit_data_get_unit(data);
/* Constant buffer 0 may be a system memory pointer,
* in which case we want to upload a shadow copy to
* the GPU.
*/
if (!cb->cb[unit].buffer) {
u_upload_data(v3d->uploader, 0,
cb->cb[unit].buffer_size, 16,
cb->cb[unit].user_buffer,
&cb->cb[unit].buffer_offset,
&cb->cb[unit].buffer);
}
cl_aligned_reloc(&job->indirect, &uniforms,
v3d_resource(cb->cb[unit].buffer)->bo,
cb->cb[unit].buffer_offset +
v3d_unit_data_get_offset(data));
break;
}
case QUNIFORM_SSBO_OFFSET: {
struct pipe_shader_buffer *sb =
@@ -397,8 +380,6 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_compiled_shader *shader,
cl_end(&job->indirect, uniforms);
v3d_bo_unreference(&ubo);
return uniform_stream;
}