diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index 28645017b77..f1fde789e7b 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -998,7 +998,8 @@ setup_nir(isel_context *ctx, nir_shader *nir) (nir_variable_mode)(nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_push_const | nir_var_mem_shared | nir_var_mem_global), - mem_vectorize_callback)) { + mem_vectorize_callback, + (nir_variable_mode)0)) { lower_to_scalar = true; lower_pack = true; } diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 04080b5e1df..25caa370f13 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -4429,7 +4429,8 @@ typedef bool (*nir_should_vectorize_mem_func)(unsigned align, unsigned bit_size, nir_intrinsic_instr *low, nir_intrinsic_instr *high); bool nir_opt_load_store_vectorize(nir_shader *shader, nir_variable_mode modes, - nir_should_vectorize_mem_func callback); + nir_should_vectorize_mem_func callback, + nir_variable_mode robust_modes); void nir_schedule(nir_shader *shader, int threshold); diff --git a/src/compiler/nir/nir_opt_load_store_vectorize.c b/src/compiler/nir/nir_opt_load_store_vectorize.c index c31c8d293bf..814c5a55264 100644 --- a/src/compiler/nir/nir_opt_load_store_vectorize.c +++ b/src/compiler/nir/nir_opt_load_store_vectorize.c @@ -187,6 +187,7 @@ struct entry { struct vectorize_ctx { nir_variable_mode modes; nir_should_vectorize_mem_func callback; + nir_variable_mode robust_modes; struct list_head entries[nir_num_variable_modes]; struct hash_table *loads[nir_num_variable_modes]; struct hash_table *stores[nir_num_variable_modes]; @@ -1053,6 +1054,22 @@ check_for_aliasing(struct vectorize_ctx *ctx, struct entry *first, struct entry return false; } +static bool +check_for_robustness(struct vectorize_ctx *ctx, struct entry *low) +{ + nir_variable_mode mode = get_variable_mode(low); + if (mode & ctx->robust_modes) { + unsigned low_bit_size = get_bit_size(low); + unsigned low_size = low->intrin->num_components * low_bit_size; + + /* don't attempt to vectorize accesses if the offset can overflow. */ + /* TODO: handle indirect accesses. */ + return low->offset_signed < 0 && low->offset_signed + low_size >= 0; + } + + return false; +} + static bool is_strided_vector(const struct glsl_type *type) { @@ -1077,6 +1094,9 @@ try_vectorize(nir_function_impl *impl, struct vectorize_ctx *ctx, if (check_for_aliasing(ctx, first, second)) return false; + if (check_for_robustness(ctx, low)) + return false; + /* we can only vectorize non-volatile loads/stores of the same type and with * the same access */ if (first->info != second->info || first->access != second->access || @@ -1327,13 +1347,15 @@ process_block(nir_function_impl *impl, struct vectorize_ctx *ctx, nir_block *blo bool nir_opt_load_store_vectorize(nir_shader *shader, nir_variable_mode modes, - nir_should_vectorize_mem_func callback) + nir_should_vectorize_mem_func callback, + nir_variable_mode robust_modes) { bool progress = false; struct vectorize_ctx *ctx = rzalloc(NULL, struct vectorize_ctx); ctx->modes = modes; ctx->callback = callback; + ctx->robust_modes = robust_modes; nir_index_vars(shader, NULL, modes); diff --git a/src/compiler/nir/tests/load_store_vectorizer_tests.cpp b/src/compiler/nir/tests/load_store_vectorizer_tests.cpp index 710a2da6f72..4001b8d192c 100644 --- a/src/compiler/nir/tests/load_store_vectorizer_tests.cpp +++ b/src/compiler/nir/tests/load_store_vectorizer_tests.cpp @@ -38,7 +38,8 @@ protected: nir_intrinsic_instr *get_intrinsic(nir_intrinsic_op intrinsic, unsigned index); - bool run_vectorizer(nir_variable_mode modes, bool cse=false); + bool run_vectorizer(nir_variable_mode modes, bool cse=false, + nir_variable_mode robust_modes = (nir_variable_mode)0); nir_ssa_def *get_resource(uint32_t binding, bool ssbo); @@ -134,11 +135,13 @@ nir_load_store_vectorize_test::get_intrinsic(nir_intrinsic_op intrinsic, } bool -nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes, bool cse) +nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes, + bool cse, + nir_variable_mode robust_modes) { if (modes & nir_var_mem_shared) nir_lower_vars_to_explicit_types(b->shader, nir_var_mem_shared, shared_type_info); - bool progress = nir_opt_load_store_vectorize(b->shader, modes, mem_vectorize_callback); + bool progress = nir_opt_load_store_vectorize(b->shader, modes, mem_vectorize_callback, robust_modes); if (progress) { nir_validate_shader(b->shader, NULL); if (cse) @@ -1773,3 +1776,16 @@ TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_indirect_64bit) ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); } + +TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust) +{ + create_load(nir_var_mem_ssbo, 0, 0xfffffffc, 0x1); + create_load(nir_var_mem_ssbo, 0, 0x0, 0x2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); +} diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index e59b3ac4269..7a1fea10aa2 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -881,7 +881,8 @@ brw_vectorize_lower_mem_access(nir_shader *nir, OPT(nir_opt_load_store_vectorize, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_global | nir_var_mem_shared, - brw_nir_should_vectorize_mem); + brw_nir_should_vectorize_mem, + (nir_variable_mode)0); } OPT(brw_nir_lower_mem_access_bit_sizes, devinfo);