nir: Add a range_base+range to nir_intrinsic_load_ubo().

For UBO accesses to be the same performance as classic GL default uniform
block uniforms, we need to be able to push them through the same path.  On
freedreno, we haven't been uploading UBOs as push constants when they're
used for indirect array access, because we don't know what range of the
UBO is needed for an access.

I believe we won't be able to calculate the range in general in spirv
given casts that can happen, so we define a [0, ~0] range to be "We don't
know anything".  We use that at the moment for all UBO loads except for
nir_lower_uniforms_to_ubo, where we now avoid losing the range information
that default uniform block loads come with.

In a departure from other NIR intrinsics with a "base", I didn't make the
base an be something you have to add to the src[1] offset.  This keeps us
from needing to modify all drivers (particularly since the base+offset
thing can mean needing to do addition in the backend), makes backend
tracking of ranges easy, and makes the range calculations in
load_store_vectorizer reasonable.  However, this could definitely cause
some confusion for people used to the normal NIR base.

Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Reviewed-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6359>
This commit is contained in:
Eric Anholt
2020-08-14 13:10:02 -07:00
committed by Marge Bot
parent 3a9356831a
commit f3b33a5a35
11 changed files with 123 additions and 4 deletions

View File

@@ -1642,8 +1642,18 @@ typedef enum {
NIR_INTRINSIC_UCP_ID,
/**
* The amount of data, starting from BASE, that this instruction may
* access. This is used to provide bounds if the offset is not constant.
* The start of NIR_INTRINSIC_RANGE. Only present on instructions that
* don't have NIR_INTRINSIC_BASE.
*
* If the [range_base, range] is [0, ~0], then we don't know the possible
* range of the access.
*/
NIR_INTRINSIC_RANGE_BASE,
/**
* The amount of data, starting from BASE or RANGE_BASE, that this
* instruction may access. This is used to provide bounds if the offset is
* not constant.
*/
NIR_INTRINSIC_RANGE,
@@ -1896,6 +1906,7 @@ INTRINSIC_IDX_ACCESSORS(base, BASE, int)
INTRINSIC_IDX_ACCESSORS(stream_id, STREAM_ID, unsigned)
INTRINSIC_IDX_ACCESSORS(ucp_id, UCP_ID, unsigned)
INTRINSIC_IDX_ACCESSORS(range, RANGE, unsigned)
INTRINSIC_IDX_ACCESSORS(range_base, RANGE_BASE, unsigned)
INTRINSIC_IDX_ACCESSORS(desc_set, DESC_SET, unsigned)
INTRINSIC_IDX_ACCESSORS(binding, BINDING, unsigned)
INTRINSIC_IDX_ACCESSORS(component, COMPONENT, unsigned)

View File

@@ -89,6 +89,10 @@ UCP_ID = "NIR_INTRINSIC_UCP_ID"
# may access. This is used to provide bounds if the offset is
# not constant.
RANGE = "NIR_INTRINSIC_RANGE"
# The offset to the start of the NIR_INTRINSIC_RANGE. This is an alternative
# to NIR_INTRINSIC_BASE for describing the valid range in intrinsics that don't
# have the implicit addition of a base to the offset.
RANGE_BASE = "NIR_INTRINSIC_RANGE_BASE"
# The vulkan descriptor set binding for vulkan_resource_index
# intrinsic
DESC_SET = "NIR_INTRINSIC_DESC_SET"
@@ -724,6 +728,10 @@ intrinsic("load_fs_input_interp_deltas", src_comp=[1], dest_comp=3,
# range (starting at base) of the data from which we are loading. If
# range == 0, then the range is unknown.
#
# UBO load operations have a nir_intrinsic_range_base() and
# nir_intrinsic_range() that specify the byte range [range_base,
# range_base+range] of the UBO that the src offset access must lie within.
#
# Some load operations such as UBO/SSBO load and per_vertex loads take an
# additional source to specify which UBO/SSBO/vertex to load from.
#
@@ -739,7 +747,7 @@ def load(name, src_comp, indices=[], flags=[]):
# src[] = { offset }.
load("uniform", [1], [BASE, RANGE, TYPE], [CAN_ELIMINATE, CAN_REORDER])
# src[] = { buffer_index, offset }.
load("ubo", [-1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE, CAN_REORDER])
load("ubo", [-1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET, RANGE_BASE, RANGE], flags=[CAN_ELIMINATE, CAN_REORDER])
# src[] = { buffer_index, offset in vec4 units }
load("ubo_vec4", [-1, 1], [ACCESS, COMPONENT], flags=[CAN_ELIMINATE, CAN_REORDER])
# src[] = { offset }.

View File

@@ -1030,6 +1030,11 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
nir_intrinsic_set_align(load, align_mul, align_offset);
if (op == nir_intrinsic_load_ubo) {
nir_intrinsic_set_range_base(load, 0);
nir_intrinsic_set_range(load, ~0);
}
assert(intrin->dest.is_ssa);
load->num_components = num_components;
nir_ssa_dest_init(&load->instr, &load->dest, num_components,

View File

@@ -76,6 +76,9 @@ lower_instr(nir_intrinsic_instr *instr, nir_builder *b, int multiplier)
nir_builder_instr_insert(b, &load->instr);
nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(&load->dest.ssa));
nir_intrinsic_set_range_base(load, nir_intrinsic_base(instr) * multiplier);
nir_intrinsic_set_range(load, nir_intrinsic_range(instr) * multiplier);
nir_instr_remove(&instr->instr);
return true;
}

View File

@@ -812,6 +812,16 @@ vectorize_loads(nir_builder *b, struct vectorize_ctx *ctx,
if (first != low && nir_intrinsic_has_base(first->intrin))
nir_intrinsic_set_base(first->intrin, nir_intrinsic_base(low->intrin));
if (nir_intrinsic_has_range_base(first->intrin)) {
uint32_t low_base = nir_intrinsic_range_base(low->intrin);
uint32_t high_base = nir_intrinsic_range_base(high->intrin);
uint32_t low_end = low_base + nir_intrinsic_range(low->intrin);
uint32_t high_end = high_base + nir_intrinsic_range(high->intrin);
nir_intrinsic_set_range_base(first->intrin, low_base);
nir_intrinsic_set_range(first->intrin, MAX2(low_end, high_end) - low_base);
}
first->key = low->key;
first->offset = low->offset;

View File

@@ -816,6 +816,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
[NIR_INTRINSIC_STREAM_ID] = "stream-id",
[NIR_INTRINSIC_UCP_ID] = "ucp-id",
[NIR_INTRINSIC_RANGE] = "range",
[NIR_INTRINSIC_RANGE_BASE] = "range_base",
[NIR_INTRINSIC_DESC_SET] = "desc-set",
[NIR_INTRINSIC_BINDING] = "binding",
[NIR_INTRINSIC_COMPONENT] = "component",

View File

@@ -601,6 +601,9 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
}
case nir_intrinsic_load_ubo:
/* Make sure that the creator didn't forget to set the range_base+range. */
validate_assert(state, nir_intrinsic_range(instr) != 0);
/* Fall through */
case nir_intrinsic_load_ssbo:
case nir_intrinsic_load_shared:
case nir_intrinsic_load_global:

View File

@@ -223,10 +223,27 @@ nir_load_store_vectorize_test::create_indirect_load(
} else {
load->src[0] = nir_src_for_ssa(offset);
}
int byte_size = (bit_size == 1 ? 32 : bit_size) / 8;
if (mode != nir_var_mem_push_const) {
nir_intrinsic_set_align(load, (bit_size == 1 ? 32 : bit_size) / 8, 0);
nir_intrinsic_set_align(load, byte_size, 0);
nir_intrinsic_set_access(load, (gl_access_qualifier)access);
}
if (nir_intrinsic_has_range_base(load)) {
uint32_t range = byte_size * components;
int offset_src = res ? 1 : 0;
if (nir_src_is_const(load->src[offset_src])) {
nir_intrinsic_set_range_base(load, nir_src_as_uint(load->src[offset_src]));
nir_intrinsic_set_range(load, range);
} else {
/* Unknown range */
nir_intrinsic_set_range_base(load, 0);
nir_intrinsic_set_range(load, ~0);
}
}
nir_builder_instr_insert(b, &load->instr);
nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, &load->dest.ssa)->parent_instr);
movs[id] = mov;
@@ -380,6 +397,8 @@ TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent)
nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
ASSERT_EQ(load->dest.ssa.bit_size, 32);
ASSERT_EQ(load->dest.ssa.num_components, 2);
ASSERT_EQ(nir_intrinsic_range_base(load), 0);
ASSERT_EQ(nir_intrinsic_range(load), 8);
ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
@@ -400,11 +419,41 @@ TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting)
nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
ASSERT_EQ(load->dest.ssa.bit_size, 32);
ASSERT_EQ(load->dest.ssa.num_components, 3);
ASSERT_EQ(nir_intrinsic_range_base(load), 0);
ASSERT_EQ(nir_intrinsic_range(load), 12);
ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
EXPECT_INSTR_SWIZZLES(movs[0x2], load, "yz");
}
/* Test for a bug in range handling */
TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting_range)
{
create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 4);
create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 1);
nir_validate_shader(b->shader, NULL);
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
ASSERT_EQ(load->dest.ssa.bit_size, 32);
ASSERT_EQ(load->dest.ssa.num_components, 4);
ASSERT_EQ(nir_intrinsic_range_base(load), 0);
ASSERT_EQ(nir_intrinsic_range(load), 16);
ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
ASSERT_EQ(loads[0x1]->swizzle[0], 0);
ASSERT_EQ(loads[0x1]->swizzle[1], 1);
ASSERT_EQ(loads[0x1]->swizzle[2], 2);
ASSERT_EQ(loads[0x1]->swizzle[3], 3);
ASSERT_EQ(loads[0x2]->swizzle[0], 1);
}
TEST_F(nir_load_store_vectorize_test, ubo_load_identical)
{
create_load(nir_var_mem_ubo, 0, 0, 0x1);
@@ -420,6 +469,8 @@ TEST_F(nir_load_store_vectorize_test, ubo_load_identical)
nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
ASSERT_EQ(load->dest.ssa.bit_size, 32);
ASSERT_EQ(load->dest.ssa.num_components, 1);
ASSERT_EQ(nir_intrinsic_range_base(load), 0);
ASSERT_EQ(nir_intrinsic_range(load), 4);
ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);

View File

@@ -853,6 +853,9 @@ _vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load,
if (op == nir_intrinsic_load_push_constant) {
nir_intrinsic_set_base(instr, access_offset);
nir_intrinsic_set_range(instr, access_size);
} else if (op == nir_intrinsic_load_ubo) {
nir_intrinsic_set_range_base(instr, 0);
nir_intrinsic_set_range(instr, ~0);
}
if (op == nir_intrinsic_load_ubo ||

View File

@@ -75,6 +75,7 @@ struct ttn_compile {
nir_variable *samplers[PIPE_MAX_SAMPLERS];
nir_variable *images[PIPE_MAX_SHADER_IMAGES];
nir_variable *ssbo[PIPE_MAX_SHADER_BUFFERS];
uint32_t ubo_sizes[PIPE_MAX_CONSTANT_BUFFERS];
unsigned num_samplers;
unsigned num_images;
@@ -315,6 +316,8 @@ ttn_emit_declaration(struct ttn_compile *c)
decl->Dim.Index2D != 0) {
b->shader->info.num_ubos =
MAX2(b->shader->info.num_ubos, decl->Dim.Index2D);
c->ubo_sizes[decl->Dim.Index2D] =
MAX2(c->ubo_sizes[decl->Dim.Index2D], decl->Range.Last * 16);
return;
}
@@ -741,12 +744,27 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
/* UBO offsets are in bytes, but TGSI gives them to us in vec4's */
offset = nir_ishl(b, offset, nir_imm_int(b, 4));
nir_intrinsic_set_align(load, 16, 0);
/* Set a very conservative base/range of the access: 16 bytes if not
* indirect at all, offset to the end of the UBO if the offset is
* indirect, and totally unknown if the block number is indirect.
*/
uint32_t base = index * 16;
nir_intrinsic_set_range_base(load, base);
if (dimind)
nir_intrinsic_set_range(load, ~0);
else if (indirect)
nir_intrinsic_set_range(load, c->ubo_sizes[dim->Index] - base);
else
nir_intrinsic_set_range(load, base + 16);
} else {
nir_intrinsic_set_base(load, index);
if (indirect) {
offset = ttn_src_for_indirect(c, indirect);
nir_intrinsic_set_range(load, c->ubo_sizes[0] - index);
} else {
offset = nir_imm_int(b, 0);
nir_intrinsic_set_range(load, 1);
}
}
load->src[srcn++] = nir_src_for_ssa(offset);

View File

@@ -538,6 +538,8 @@ build_ssbo_descriptor_load(const VkDescriptorType desc_type,
desc_load->num_components = 4;
nir_ssa_dest_init(&desc_load->instr, &desc_load->dest, 4, 32, NULL);
nir_builder_instr_insert(b, &desc_load->instr);
nir_intrinsic_set_range_base(desc_load, 0);
nir_intrinsic_set_range(desc_load, ~0);
return &desc_load->dest.ssa;
}
@@ -745,6 +747,8 @@ build_descriptor_load(nir_deref_instr *deref, unsigned offset,
nir_ssa_dest_init(&desc_load->instr, &desc_load->dest,
num_components, bit_size, NULL);
nir_builder_instr_insert(b, &desc_load->instr);
nir_intrinsic_set_range_base(desc_load, 0);
nir_intrinsic_set_range(desc_load, ~0);
return &desc_load->dest.ssa;
}
@@ -846,6 +850,8 @@ lower_load_constant(nir_intrinsic_instr *intrin,
load_ubo->src[0] = nir_src_for_ssa(index);
load_ubo->src[1] = nir_src_for_ssa(offset);
nir_intrinsic_set_align(load_ubo, intrin->dest.ssa.bit_size / 8, 0);
nir_intrinsic_set_range_base(load_ubo, nir_intrinsic_base(intrin));
nir_intrinsic_set_range(load_ubo, nir_intrinsic_range(intrin));
nir_ssa_dest_init(&load_ubo->instr, &load_ubo->dest,
intrin->dest.ssa.num_components,
intrin->dest.ssa.bit_size, NULL);