nir/range_analysis: Handle vectors better in ssa_def_bits_used

If a query is made of a vector ssa_def (possibly from an intermediate
result), return all_bits.  If a constant source is a vector, swizzle
the correct component.

Unit tests were added for the constant vector cases.  I don't see a
great way to make unit tests for the other cases.

v2: Add a FINIHSME comment about u16vec2 hardware.

Fixes: 96303a59ea ("nir: Add some range analysis for used bits")
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9123>
This commit is contained in:
Ian Romanick
2021-02-17 19:40:07 -08:00
committed by Marge Bot
parent ce649e54f1
commit f2656569c6
3 changed files with 306 additions and 4 deletions

View File

@@ -485,4 +485,17 @@ if with_tests
), ),
suite : ['compiler', 'nir'], suite : ['compiler', 'nir'],
) )
test(
'ssa_def_bits_used',
executable(
'ssa_def_bits_used',
files('tests/ssa_def_bits_used_tests.cpp'),
c_args : [c_msvc_compat_args, no_override_init_args],
gnu_symbol_visibility : 'hidden',
include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux],
dependencies : [dep_thread, idep_gtest, idep_nir, idep_mesautil],
),
suite : ['compiler', 'nir'],
)
endif endif

View File

@@ -1502,6 +1502,18 @@ ssa_def_bits_used(nir_ssa_def *def, int recur)
uint64_t bits_used = 0; uint64_t bits_used = 0;
uint64_t all_bits = BITFIELD64_MASK(def->bit_size); uint64_t all_bits = BITFIELD64_MASK(def->bit_size);
/* Querying the bits used from a vector is too hard of a question to
* answer. Return the conservative answer that all bits are used. To
* handle this, the function would need to be extended to be a query of a
* single component of the vector. That would also necessary to fully
* handle the 'num_components > 1' inside the loop below.
*
* FINISHME: This restriction will eventually need to be restricted to be
* useful for hardware that uses u16vec2 as the native 16-bit integer type.
*/
if (def->num_components > 1)
return all_bits;
/* Limit recursion */ /* Limit recursion */
if (recur-- <= 0) if (recur-- <= 0)
return all_bits; return all_bits;
@@ -1512,6 +1524,22 @@ ssa_def_bits_used(nir_ssa_def *def, int recur)
nir_alu_instr *use_alu = nir_instr_as_alu(src->parent_instr); nir_alu_instr *use_alu = nir_instr_as_alu(src->parent_instr);
unsigned src_idx = container_of(src, nir_alu_src, src) - use_alu->src; unsigned src_idx = container_of(src, nir_alu_src, src) - use_alu->src;
/* If a user of the value produces a vector result, return the
* conservative answer that all bits are used. It is possible to
* answer this query by looping over the components used. For example,
*
* vec4 32 ssa_5 = load_const(0x0000f000, 0x00000f00, 0x000000f0, 0x0000000f)
* ...
* vec4 32 ssa_8 = iand ssa_7.xxxx, ssa_5
*
* could conceivably return 0x0000ffff when queyring the bits used of
* ssa_7. This is unlikely to be worth the effort because the
* question can eventually answered after the shader has been
* scalarized.
*/
if (use_alu->dest.dest.ssa.num_components > 1)
return all_bits;
switch (use_alu->op) { switch (use_alu->op) {
case nir_op_u2u8: case nir_op_u2u8:
case nir_op_i2i8: case nir_op_i2i8:
@@ -1531,7 +1559,8 @@ ssa_def_bits_used(nir_ssa_def *def, int recur)
case nir_op_extract_u8: case nir_op_extract_u8:
case nir_op_extract_i8: case nir_op_extract_i8:
if (src_idx == 0 && nir_src_is_const(use_alu->src[1].src)) { if (src_idx == 0 && nir_src_is_const(use_alu->src[1].src)) {
unsigned chunk = nir_src_as_uint(use_alu->src[1].src); unsigned chunk = nir_src_comp_as_uint(use_alu->src[1].src,
use_alu->src[1].swizzle[0]);
bits_used |= 0xffull << (chunk * 8); bits_used |= 0xffull << (chunk * 8);
break; break;
} else { } else {
@@ -1541,7 +1570,8 @@ ssa_def_bits_used(nir_ssa_def *def, int recur)
case nir_op_extract_u16: case nir_op_extract_u16:
case nir_op_extract_i16: case nir_op_extract_i16:
if (src_idx == 0 && nir_src_is_const(use_alu->src[1].src)) { if (src_idx == 0 && nir_src_is_const(use_alu->src[1].src)) {
unsigned chunk = nir_src_as_uint(use_alu->src[1].src); unsigned chunk = nir_src_comp_as_uint(use_alu->src[1].src,
use_alu->src[1].swizzle[0]);
bits_used |= 0xffffull << (chunk * 16); bits_used |= 0xffffull << (chunk * 16);
break; break;
} else { } else {
@@ -1561,7 +1591,8 @@ ssa_def_bits_used(nir_ssa_def *def, int recur)
case nir_op_iand: case nir_op_iand:
assert(src_idx < 2); assert(src_idx < 2);
if (nir_src_is_const(use_alu->src[1 - src_idx].src)) { if (nir_src_is_const(use_alu->src[1 - src_idx].src)) {
uint64_t u64 = nir_src_as_uint(use_alu->src[1 - src_idx].src); uint64_t u64 = nir_src_comp_as_uint(use_alu->src[1 - src_idx].src,
use_alu->src[1 - src_idx].swizzle[0]);
bits_used |= u64; bits_used |= u64;
break; break;
} else { } else {
@@ -1571,7 +1602,8 @@ ssa_def_bits_used(nir_ssa_def *def, int recur)
case nir_op_ior: case nir_op_ior:
assert(src_idx < 2); assert(src_idx < 2);
if (nir_src_is_const(use_alu->src[1 - src_idx].src)) { if (nir_src_is_const(use_alu->src[1 - src_idx].src)) {
uint64_t u64 = nir_src_as_uint(use_alu->src[1 - src_idx].src); uint64_t u64 = nir_src_comp_as_uint(use_alu->src[1 - src_idx].src,
use_alu->src[1 - src_idx].swizzle[0]);
bits_used |= all_bits & ~u64; bits_used |= all_bits & ~u64;
break; break;
} else { } else {

View File

@@ -0,0 +1,257 @@
/*
* Copyright © 2021 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <gtest/gtest.h>
#include "nir.h"
#include "nir_builder.h"
#include "nir_range_analysis.h"
class ssa_def_bits_used_test : public ::testing::Test {
protected:
ssa_def_bits_used_test()
{
glsl_type_singleton_init_or_ref();
static const nir_shader_compiler_options options = { };
bld = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, &options,
"ssa_def_bits_used test");
}
~ssa_def_bits_used_test()
{
ralloc_free(bld.shader);
glsl_type_singleton_decref();
}
nir_alu_instr *build_alu_instr(nir_op op, nir_ssa_def *, nir_ssa_def *);
struct nir_builder bld;
};
static bool
is_used_once(const nir_ssa_def *def)
{
return list_is_singular(&def->uses) &&
list_is_empty(&def->if_uses);
}
nir_alu_instr *
ssa_def_bits_used_test::build_alu_instr(nir_op op,
nir_ssa_def *src0, nir_ssa_def *src1)
{
nir_ssa_def *def = nir_build_alu(&bld, op, src0, src1, NULL, NULL);
if (def == NULL)
return NULL;
nir_alu_instr *alu = nir_instr_as_alu(def->parent_instr);
if (alu == NULL)
return NULL;
alu->dest.write_mask = 1;
alu->dest.dest.ssa.num_components = 1;
return alu;
}
TEST_F(ssa_def_bits_used_test, iand_with_const_vector)
{
static const unsigned src0_imm[4] = { 255u << 24, 255u << 16, 255u << 8, 255u };
nir_ssa_def *src0 = nir_imm_ivec4(&bld,
src0_imm[0], src0_imm[1],
src0_imm[2], src0_imm[3]);
nir_ssa_def *src1 = nir_imm_int(&bld, 0xffffffff);
nir_alu_instr *alu = build_alu_instr(nir_op_iand, src0, src1);
ASSERT_NE((void *) 0, alu);
for (unsigned i = 0; i < 4; i++) {
/* If the test is changed, and somehow src1 is used multiple times,
* nir_ssa_def_bits_used will accumulate *all* the uses (as it should).
* This isn't what we're trying to test here.
*/
ASSERT_TRUE(is_used_once(src1));
alu->src[0].swizzle[0] = i;
const uint64_t bits_used = nir_ssa_def_bits_used(alu->src[1].src.ssa);
/* The answer should be the value swizzled from src0. */
EXPECT_EQ(src0_imm[i], bits_used);
}
}
TEST_F(ssa_def_bits_used_test, ior_with_const_vector)
{
static const unsigned src0_imm[4] = { 255u << 24, 255u << 16, 255u << 8, 255u };
nir_ssa_def *src0 = nir_imm_ivec4(&bld,
src0_imm[0], src0_imm[1],
src0_imm[2], src0_imm[3]);
nir_ssa_def *src1 = nir_imm_int(&bld, 0xffffffff);
nir_alu_instr *alu = build_alu_instr(nir_op_ior, src0, src1);
ASSERT_NE((void *) 0, alu);
for (unsigned i = 0; i < 4; i++) {
/* If the test is changed, and somehow src1 is used multiple times,
* nir_ssa_def_bits_used will accumulate *all* the uses (as it should).
* This isn't what we're trying to test here.
*/
ASSERT_TRUE(is_used_once(src1));
alu->src[0].swizzle[0] = i;
const uint64_t bits_used = nir_ssa_def_bits_used(alu->src[1].src.ssa);
/* The answer should be the value swizzled from ~src0. */
EXPECT_EQ(~src0_imm[i], bits_used);
}
}
TEST_F(ssa_def_bits_used_test, extract_i16_with_const_index)
{
nir_ssa_def *src0 = nir_imm_int(&bld, 0xffffffff);
static const unsigned src1_imm[4] = { 9, 1, 0, 9 };
nir_ssa_def *src1 = nir_imm_ivec4(&bld,
src1_imm[0],
src1_imm[1],
src1_imm[2],
src1_imm[3]);
nir_alu_instr *alu = build_alu_instr(nir_op_extract_i16, src0, src1);
ASSERT_NE((void *) 0, alu);
for (unsigned i = 1; i < 3; i++) {
/* If the test is changed, and somehow src1 is used multiple times,
* nir_ssa_def_bits_used will accumulate *all* the uses (as it should).
* This isn't what we're trying to test here.
*/
ASSERT_TRUE(is_used_once(src1));
alu->src[1].swizzle[0] = i;
const uint64_t bits_used = nir_ssa_def_bits_used(alu->src[0].src.ssa);
EXPECT_EQ(0xffffu << (16 * src1_imm[i]), bits_used);
}
}
TEST_F(ssa_def_bits_used_test, extract_u16_with_const_index)
{
nir_ssa_def *src0 = nir_imm_int(&bld, 0xffffffff);
static const unsigned src1_imm[4] = { 9, 1, 0, 9 };
nir_ssa_def *src1 = nir_imm_ivec4(&bld,
src1_imm[0],
src1_imm[1],
src1_imm[2],
src1_imm[3]);
nir_alu_instr *alu = build_alu_instr(nir_op_extract_u16, src0, src1);
ASSERT_NE((void *) 0, alu);
for (unsigned i = 1; i < 3; i++) {
/* If the test is changed, and somehow src1 is used multiple times,
* nir_ssa_def_bits_used will accumulate *all* the uses (as it should).
* This isn't what we're trying to test here.
*/
ASSERT_TRUE(is_used_once(src1));
alu->src[1].swizzle[0] = i;
const uint64_t bits_used = nir_ssa_def_bits_used(alu->src[0].src.ssa);
EXPECT_EQ(0xffffu << (16 * src1_imm[i]), bits_used);
}
}
TEST_F(ssa_def_bits_used_test, extract_i8_with_const_index)
{
nir_ssa_def *src0 = nir_imm_int(&bld, 0xffffffff);
static const unsigned src1_imm[4] = { 3, 2, 1, 0 };
nir_ssa_def *src1 = nir_imm_ivec4(&bld,
src1_imm[0],
src1_imm[1],
src1_imm[2],
src1_imm[3]);
nir_alu_instr *alu = build_alu_instr(nir_op_extract_i8, src0, src1);
ASSERT_NE((void *) 0, alu);
for (unsigned i = 0; i < 4; i++) {
/* If the test is changed, and somehow src1 is used multiple times,
* nir_ssa_def_bits_used will accumulate *all* the uses (as it should).
* This isn't what we're trying to test here.
*/
ASSERT_TRUE(is_used_once(src1));
alu->src[1].swizzle[0] = i;
const uint64_t bits_used = nir_ssa_def_bits_used(alu->src[0].src.ssa);
EXPECT_EQ(0xffu << (8 * src1_imm[i]), bits_used);
}
}
TEST_F(ssa_def_bits_used_test, extract_u8_with_const_index)
{
nir_ssa_def *src0 = nir_imm_int(&bld, 0xffffffff);
static const unsigned src1_imm[4] = { 3, 2, 1, 0 };
nir_ssa_def *src1 = nir_imm_ivec4(&bld,
src1_imm[0],
src1_imm[1],
src1_imm[2],
src1_imm[3]);
nir_alu_instr *alu = build_alu_instr(nir_op_extract_u8, src0, src1);
ASSERT_NE((void *) 0, alu);
for (unsigned i = 0; i < 4; i++) {
/* If the test is changed, and somehow src1 is used multiple times,
* nir_ssa_def_bits_used will accumulate *all* the uses (as it should).
* This isn't what we're trying to test here.
*/
ASSERT_TRUE(is_used_once(src1));
alu->src[1].swizzle[0] = i;
const uint64_t bits_used = nir_ssa_def_bits_used(alu->src[0].src.ssa);
EXPECT_EQ(0xffu << (8 * src1_imm[i]), bits_used);
}
}