gallivm/ssbo: replace run time loop by compile time loop

Unrolled loop is notably faster.

Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27302>
This commit is contained in:
Błażej Szczygieł
2024-01-25 12:20:08 +01:00
committed by Marge Bot
parent bab4399970
commit 9ff3bec484

View File

@@ -1433,17 +1433,18 @@ static void emit_load_mem(struct lp_build_nir_context *bld_base,
LLVMValueRef exec_mask = mask_vec(bld_base); LLVMValueRef exec_mask = mask_vec(bld_base);
LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
struct lp_build_loop_state loop_state; for (unsigned i = 0; i < uint_bld->type.length; i++) {
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); LLVMValueRef counter = lp_build_const_int32(gallivm, i);
LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, counter, "");
LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
struct lp_build_if_state exec_ifthen; struct lp_build_if_state exec_ifthen;
lp_build_if(&exec_ifthen, gallivm, loop_cond); lp_build_if(&exec_ifthen, gallivm, loop_cond);
LLVMValueRef ssbo_limit; LLVMValueRef ssbo_limit;
LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, payload, index, LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, payload, index,
loop_state.counter, &ssbo_limit); counter, &ssbo_limit);
LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, counter, "");
for (unsigned c = 0; c < nc; c++) { for (unsigned c = 0; c < nc; c++) {
LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), ""); LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
@@ -1462,19 +1463,18 @@ static void emit_load_mem(struct lp_build_nir_context *bld_base,
LLVMValueRef scalar = lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, loop_index); LLVMValueRef scalar = lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, loop_index);
temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], ""); temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], "");
temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, ""); temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, counter, "");
LLVMBuildStore(builder, temp_res, result[c]); LLVMBuildStore(builder, temp_res, result[c]);
lp_build_else(&ifthen); lp_build_else(&ifthen);
temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], ""); temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], "");
LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false); LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, ""); temp_res = LLVMBuildInsertElement(builder, temp_res, zero, counter, "");
LLVMBuildStore(builder, temp_res, result[c]); LLVMBuildStore(builder, temp_res, result[c]);
lp_build_endif(&ifthen); lp_build_endif(&ifthen);
} }
lp_build_endif(&exec_ifthen); lp_build_endif(&exec_ifthen);
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), }
NULL, LLVMIntUGE);
for (unsigned c = 0; c < nc; c++) for (unsigned c = 0; c < nc; c++)
outval[c] = LLVMBuildLoad2(gallivm->builder, load_bld->vec_type, result[c], ""); outval[c] = LLVMBuildLoad2(gallivm->builder, load_bld->vec_type, result[c], "");
@@ -1539,17 +1539,18 @@ static void emit_store_mem(struct lp_build_nir_context *bld_base,
LLVMValueRef exec_mask = mask_vec(bld_base); LLVMValueRef exec_mask = mask_vec(bld_base);
LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
struct lp_build_loop_state loop_state; for (unsigned i = 0; i < uint_bld->type.length; i++) {
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); LLVMValueRef counter = lp_build_const_int32(gallivm, i);
LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, counter, "");
LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
struct lp_build_if_state exec_ifthen; struct lp_build_if_state exec_ifthen;
lp_build_if(&exec_ifthen, gallivm, loop_cond); lp_build_if(&exec_ifthen, gallivm, loop_cond);
LLVMValueRef ssbo_limit; LLVMValueRef ssbo_limit;
LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, payload, index, LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, payload, index,
loop_state.counter, &ssbo_limit); counter, &ssbo_limit);
LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, counter, "");
for (unsigned c = 0; c < nc; c++) { for (unsigned c = 0; c < nc; c++) {
if (!(writemask & (1u << c))) if (!(writemask & (1u << c)))
@@ -1564,7 +1565,7 @@ static void emit_store_mem(struct lp_build_nir_context *bld_base,
} }
LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val, LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
loop_state.counter, ""); counter, "");
value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, ""); value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
struct lp_build_if_state ifthen; struct lp_build_if_state ifthen;
LLVMValueRef store_cond; LLVMValueRef store_cond;
@@ -1576,9 +1577,7 @@ static void emit_store_mem(struct lp_build_nir_context *bld_base,
} }
lp_build_endif(&exec_ifthen); lp_build_endif(&exec_ifthen);
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), }
NULL, LLVMIntUGE);
} }
@@ -1604,17 +1603,18 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
LLVMValueRef exec_mask = mask_vec(bld_base); LLVMValueRef exec_mask = mask_vec(bld_base);
LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
struct lp_build_loop_state loop_state; for (unsigned i = 0; i < uint_bld->type.length; i++) {
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); LLVMValueRef counter = lp_build_const_int32(gallivm, i);
LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, counter, "");
LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
struct lp_build_if_state exec_ifthen; struct lp_build_if_state exec_ifthen;
lp_build_if(&exec_ifthen, gallivm, loop_cond); lp_build_if(&exec_ifthen, gallivm, loop_cond);
LLVMValueRef ssbo_limit; LLVMValueRef ssbo_limit;
LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, atomic_bld, bit_size, payload, index, LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, atomic_bld, bit_size, payload, index,
loop_state.counter, &ssbo_limit); counter, &ssbo_limit);
LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, counter, "");
LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1); LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
if (ssbo_limit) { if (ssbo_limit) {
@@ -1623,7 +1623,7 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
} }
LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val, LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
loop_state.counter, ""); counter, "");
value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atomic_bld->elem_type, ""); value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atomic_bld->elem_type, "");
LLVMValueRef scalar_ptr = LLVMBuildGEP2(builder, atomic_bld->elem_type, mem_ptr, &loop_offset, 1, ""); LLVMValueRef scalar_ptr = LLVMBuildGEP2(builder, atomic_bld->elem_type, mem_ptr, &loop_offset, 1, "");
@@ -1637,7 +1637,7 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
if (val2 != NULL) { if (val2 != NULL) {
LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2, LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
loop_state.counter, ""); counter, "");
cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atomic_bld->elem_type, ""); cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atomic_bld->elem_type, "");
scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr, scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
cas_src_ptr, cas_src_ptr,
@@ -1652,18 +1652,17 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
false); false);
} }
temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, ""); temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, ""); temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, counter, "");
LLVMBuildStore(builder, temp_res, atom_res); LLVMBuildStore(builder, temp_res, atom_res);
lp_build_else(&ifthen); lp_build_else(&ifthen);
temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, ""); temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, is_float); LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, is_float);
temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, ""); temp_res = LLVMBuildInsertElement(builder, temp_res, zero, counter, "");
LLVMBuildStore(builder, temp_res, atom_res); LLVMBuildStore(builder, temp_res, atom_res);
lp_build_endif(&ifthen); lp_build_endif(&ifthen);
lp_build_endif(&exec_ifthen); lp_build_endif(&exec_ifthen);
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), }
NULL, LLVMIntUGE);
*result = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, ""); *result = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
} }