intel/compiler: Keep track of compiled/spilled in brw_simd_selection_state
We still update the cs_prog_data, but don't rely on it for this state anymore. This will allow use the SIMD selector with shaders that don't use cs_prog_data. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Ivan Briano <ivan.briano@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19601>
This commit is contained in:
@@ -7838,13 +7838,11 @@ brw_compile_cs(const struct brw_compiler *compiler,
|
||||
&prog_data->base, shader, dispatch_width,
|
||||
debug_enabled);
|
||||
|
||||
if (prog_data->prog_mask) {
|
||||
unsigned first = ffs(prog_data->prog_mask) - 1;
|
||||
const int first = brw_simd_first_compiled(simd_state);
|
||||
if (first >= 0)
|
||||
v[simd]->import_uniforms(v[first].get());
|
||||
}
|
||||
|
||||
const bool allow_spilling = !prog_data->prog_mask ||
|
||||
nir->info.workgroup_size_variable;
|
||||
const bool allow_spilling = first < 0 || nir->info.workgroup_size_variable;
|
||||
|
||||
if (v[simd]->run_cs(allow_spilling)) {
|
||||
cs_fill_push_const_info(compiler->devinfo, prog_data);
|
||||
|
@@ -300,8 +300,7 @@ brw_compile_task(const struct brw_compiler *compiler,
|
||||
v[simd]->import_uniforms(v[first].get());
|
||||
}
|
||||
|
||||
const bool allow_spilling = !prog_data->base.prog_mask;
|
||||
|
||||
const bool allow_spilling = !brw_simd_any_compiled(simd_state);
|
||||
if (v[simd]->run_task(allow_spilling))
|
||||
brw_simd_mark_compiled(simd_state, simd, v[simd]->spilled_any_registers);
|
||||
else
|
||||
@@ -811,8 +810,7 @@ brw_compile_mesh(const struct brw_compiler *compiler,
|
||||
v[simd]->import_uniforms(v[first].get());
|
||||
}
|
||||
|
||||
const bool allow_spilling = !prog_data->base.prog_mask;
|
||||
|
||||
const bool allow_spilling = !brw_simd_any_compiled(simd_state);
|
||||
if (v[simd]->run_mesh(allow_spilling))
|
||||
brw_simd_mark_compiled(simd_state, simd, v[simd]->spilled_any_registers);
|
||||
else
|
||||
|
@@ -32,7 +32,6 @@ unsigned brw_required_dispatch_width(const struct shader_info *info);
|
||||
static constexpr int SIMD_COUNT = 3;
|
||||
|
||||
struct brw_simd_selection_state {
|
||||
|
||||
void *mem_ctx;
|
||||
const struct intel_device_info *devinfo;
|
||||
|
||||
@@ -41,8 +40,25 @@ struct brw_simd_selection_state {
|
||||
unsigned required_width;
|
||||
|
||||
const char *error[SIMD_COUNT];
|
||||
|
||||
bool compiled[SIMD_COUNT];
|
||||
bool spilled[SIMD_COUNT];
|
||||
};
|
||||
|
||||
inline int brw_simd_first_compiled(const brw_simd_selection_state &state)
|
||||
{
|
||||
for (int i = 0; i < SIMD_COUNT; i++) {
|
||||
if (state.compiled[i])
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
inline bool brw_simd_any_compiled(const brw_simd_selection_state &state)
|
||||
{
|
||||
return brw_simd_first_compiled(state) >= 0;
|
||||
}
|
||||
|
||||
bool brw_simd_should_compile(brw_simd_selection_state &state, unsigned simd);
|
||||
|
||||
void brw_simd_mark_compiled(brw_simd_selection_state &state, unsigned simd, bool spilled);
|
||||
|
@@ -47,14 +47,12 @@ test_bit(unsigned mask, unsigned bit) {
|
||||
}
|
||||
|
||||
bool
|
||||
brw_simd_should_compile(brw_simd_selection_state &state,
|
||||
unsigned simd)
|
||||
brw_simd_should_compile(brw_simd_selection_state &state, unsigned simd)
|
||||
{
|
||||
assert(simd < SIMD_COUNT);
|
||||
assert(!state.compiled[simd]);
|
||||
|
||||
struct brw_cs_prog_data *prog_data = state.prog_data;
|
||||
assert(!test_bit(prog_data->prog_mask, simd));
|
||||
|
||||
const unsigned width = 8u << simd;
|
||||
|
||||
/* For shaders with variable size workgroup, in most cases we can compile
|
||||
@@ -64,7 +62,7 @@ brw_simd_should_compile(brw_simd_selection_state &state,
|
||||
const bool workgroup_size_variable = prog_data->local_size[0] == 0;
|
||||
|
||||
if (!workgroup_size_variable) {
|
||||
if (test_bit(prog_data->prog_spilled, simd)) {
|
||||
if (state.spilled[simd]) {
|
||||
state.error[simd] = ralloc_asprintf(
|
||||
state.mem_ctx, "SIMD%u skipped because would spill", width);
|
||||
return false;
|
||||
@@ -83,7 +81,7 @@ brw_simd_should_compile(brw_simd_selection_state &state,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (simd > 0 && test_bit(prog_data->prog_mask, simd - 1) &&
|
||||
if (simd > 0 && state.compiled[simd - 1] &&
|
||||
workgroup_size <= (width / 2)) {
|
||||
state.error[simd] = ralloc_asprintf(
|
||||
state.mem_ctx, "SIMD%u skipped because workgroup size %u already fits in SIMD%u",
|
||||
@@ -103,7 +101,7 @@ brw_simd_should_compile(brw_simd_selection_state &state,
|
||||
* TODO: Use performance_analysis and drop this rule.
|
||||
*/
|
||||
if (width == 32) {
|
||||
if (!INTEL_DEBUG(DEBUG_DO32) && prog_data->prog_mask) {
|
||||
if (!INTEL_DEBUG(DEBUG_DO32) && (state.compiled[0] || state.compiled[1])) {
|
||||
state.error[simd] = ralloc_strdup(
|
||||
state.mem_ctx, "SIMD32 skipped because not required");
|
||||
return false;
|
||||
@@ -147,35 +145,32 @@ void
|
||||
brw_simd_mark_compiled(brw_simd_selection_state &state, unsigned simd, bool spilled)
|
||||
{
|
||||
assert(simd < SIMD_COUNT);
|
||||
assert(!state.compiled[simd]);
|
||||
|
||||
struct brw_cs_prog_data *prog_data = state.prog_data;
|
||||
assert(!test_bit(prog_data->prog_mask, simd));
|
||||
|
||||
prog_data->prog_mask |= 1u << simd;
|
||||
state.compiled[simd] = true;
|
||||
state.prog_data->prog_mask |= 1u << simd;
|
||||
|
||||
/* If a SIMD spilled, all the larger ones would spill too. */
|
||||
if (spilled) {
|
||||
for (unsigned i = simd; i < SIMD_COUNT; i++)
|
||||
prog_data->prog_spilled |= 1u << i;
|
||||
for (unsigned i = simd; i < SIMD_COUNT; i++) {
|
||||
state.spilled[i] = true;
|
||||
state.prog_data->prog_spilled |= 1u << i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
brw_simd_select(const struct brw_simd_selection_state &state)
|
||||
{
|
||||
const struct brw_cs_prog_data *prog_data = state.prog_data;
|
||||
assert((prog_data->prog_mask & ~0x7u) == 0);
|
||||
const unsigned not_spilled_mask =
|
||||
prog_data->prog_mask & ~prog_data->prog_spilled;
|
||||
|
||||
/* Util functions index bits from 1 instead of 0, adjust before return. */
|
||||
|
||||
if (not_spilled_mask)
|
||||
return util_last_bit(not_spilled_mask) - 1;
|
||||
else if (prog_data->prog_mask)
|
||||
return ffs(prog_data->prog_mask) - 1;
|
||||
else
|
||||
return -1;
|
||||
for (int i = SIMD_COUNT - 1; i >= 0; i--) {
|
||||
if (state.compiled[i] && !state.spilled[i])
|
||||
return i;
|
||||
}
|
||||
for (int i = SIMD_COUNT - 1; i >= 0; i--) {
|
||||
if (state.compiled[i])
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int
|
||||
@@ -186,9 +181,18 @@ brw_simd_select_for_workgroup_size(const struct intel_device_info *devinfo,
|
||||
if (!sizes || (prog_data->local_size[0] == sizes[0] &&
|
||||
prog_data->local_size[1] == sizes[1] &&
|
||||
prog_data->local_size[2] == sizes[2])) {
|
||||
const brw_simd_selection_state simd_state{
|
||||
brw_simd_selection_state simd_state{
|
||||
.prog_data = const_cast<struct brw_cs_prog_data *>(prog_data),
|
||||
};
|
||||
|
||||
/* Propagate the prog_data information back to the simd_state,
|
||||
* so we can use select() directly.
|
||||
*/
|
||||
for (int i = 0; i < SIMD_COUNT; i++) {
|
||||
simd_state.compiled[i] = test_bit(prog_data->prog_mask, i);
|
||||
simd_state.spilled[i] = test_bit(prog_data->prog_spilled, i);
|
||||
}
|
||||
|
||||
return brw_simd_select(simd_state);
|
||||
}
|
||||
|
||||
|
@@ -366,3 +366,33 @@ TEST_F(SIMDSelectionCS, Require32ErrorWhenNotCompile)
|
||||
|
||||
ASSERT_EQ(brw_simd_select(simd_state), -1);
|
||||
}
|
||||
|
||||
TEST_F(SIMDSelectionCS, FirstCompiledIsSIMD8)
|
||||
{
|
||||
ASSERT_TRUE(brw_simd_should_compile(simd_state, SIMD8));
|
||||
brw_simd_mark_compiled(simd_state, SIMD8, not_spilled);
|
||||
|
||||
ASSERT_TRUE(brw_simd_any_compiled(simd_state));
|
||||
ASSERT_EQ(brw_simd_first_compiled(simd_state), SIMD8);
|
||||
}
|
||||
|
||||
TEST_F(SIMDSelectionCS, FirstCompiledIsSIMD16)
|
||||
{
|
||||
ASSERT_TRUE(brw_simd_should_compile(simd_state, SIMD8));
|
||||
ASSERT_TRUE(brw_simd_should_compile(simd_state, SIMD16));
|
||||
brw_simd_mark_compiled(simd_state, SIMD16, not_spilled);
|
||||
|
||||
ASSERT_TRUE(brw_simd_any_compiled(simd_state));
|
||||
ASSERT_EQ(brw_simd_first_compiled(simd_state), SIMD16);
|
||||
}
|
||||
|
||||
TEST_F(SIMDSelectionCS, FirstCompiledIsSIMD32)
|
||||
{
|
||||
ASSERT_TRUE(brw_simd_should_compile(simd_state, SIMD8));
|
||||
ASSERT_TRUE(brw_simd_should_compile(simd_state, SIMD16));
|
||||
ASSERT_TRUE(brw_simd_should_compile(simd_state, SIMD32));
|
||||
brw_simd_mark_compiled(simd_state, SIMD32, not_spilled);
|
||||
|
||||
ASSERT_TRUE(brw_simd_any_compiled(simd_state));
|
||||
ASSERT_EQ(brw_simd_first_compiled(simd_state), SIMD32);
|
||||
}
|
||||
|
Reference in New Issue
Block a user