dzn: Fix triangle-fan emulation
We were completely ignoring the primitive-restart case in the index-rewrite logic used to emulate triangle fans. Unfortunately, this case is way more complicated than a regular index rewrite: - we need to skip all primitive-restart entries when turning the triangle fan into a triangle list, which implies serializing the index buffer rewrite procedure (at least I didn't find any clever way to parallelize things) - the number of triangles can no longer be extrapolated from the number of indices in the original index buffer, thus forcing us to lower direct indexed draws into indirect draws and patching the indexCount value when the new index buffer is forged Reviewed-by: Jesse Natalie <jenatali@microsoft.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16971>
This commit is contained in:

committed by
Marge Bot

parent
741b5ded49
commit
91f3c7a9fb
@@ -93,6 +93,8 @@ dzn_nir_indirect_draw_shader(enum dzn_indirect_draw_type type)
|
||||
"draw_count_triangle_fan",
|
||||
"indexed_draw_triangle_fan",
|
||||
"indexed_draw_count_triangle_fan",
|
||||
"indexed_draw_triangle_fan_prim_restart",
|
||||
"indexed_draw_count_triangle_fan_prim_restart",
|
||||
};
|
||||
|
||||
assert(type < ARRAY_SIZE(type_str));
|
||||
@@ -100,15 +102,22 @@ dzn_nir_indirect_draw_shader(enum dzn_indirect_draw_type type)
|
||||
bool indexed = type == DZN_INDIRECT_INDEXED_DRAW ||
|
||||
type == DZN_INDIRECT_INDEXED_DRAW_COUNT ||
|
||||
type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN ||
|
||||
type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN;
|
||||
type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN ||
|
||||
type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART ||
|
||||
type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
|
||||
bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN ||
|
||||
type == DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN ||
|
||||
type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN ||
|
||||
type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN;
|
||||
type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN ||
|
||||
type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART ||
|
||||
type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
|
||||
bool indirect_count = type == DZN_INDIRECT_DRAW_COUNT ||
|
||||
type == DZN_INDIRECT_INDEXED_DRAW_COUNT ||
|
||||
type == DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN ||
|
||||
type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN;
|
||||
type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN ||
|
||||
type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
|
||||
bool prim_restart = type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART ||
|
||||
type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
|
||||
nir_builder b =
|
||||
nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
|
||||
dxil_get_nir_compiler_options(),
|
||||
@@ -130,10 +139,11 @@ dzn_nir_indirect_draw_shader(enum dzn_indirect_draw_type type)
|
||||
nir_ssa_def *exec_buf_desc =
|
||||
dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2, "exec_buf", ACCESS_NON_READABLE);
|
||||
|
||||
unsigned params_size =
|
||||
triangle_fan ?
|
||||
sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params) :
|
||||
sizeof(struct dzn_indirect_draw_rewrite_params);
|
||||
unsigned params_size;
|
||||
if (triangle_fan)
|
||||
params_size = sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params);
|
||||
else
|
||||
params_size = sizeof(struct dzn_indirect_draw_rewrite_params);
|
||||
|
||||
nir_ssa_def *params =
|
||||
nir_load_ubo(&b, params_size / 4, 32,
|
||||
@@ -209,35 +219,64 @@ dzn_nir_indirect_draw_shader(enum dzn_indirect_draw_type type)
|
||||
nir_iadd(&b, nir_channel(&b, params, 2),
|
||||
nir_imul(&b, triangle_fan_index_buf_stride, index));
|
||||
|
||||
nir_ssa_def *triangle_fan_exec_vals[9] = { 0 };
|
||||
uint32_t triangle_fan_exec_param_count = 0;
|
||||
nir_ssa_def *addr_lo_overflow =
|
||||
nir_ult(&b, triangle_fan_index_buf_addr_lo, nir_channel(&b, params, 2));
|
||||
nir_ssa_def *triangle_fan_index_buf_addr_hi =
|
||||
nir_iadd(&b, nir_channel(&b, params, 3),
|
||||
nir_bcsel(&b, addr_lo_overflow, nir_imm_int(&b, 1), nir_imm_int(&b, 0)));
|
||||
|
||||
nir_ssa_def *triangle_fan_exec_vals[] = {
|
||||
triangle_fan_index_buf_addr_lo,
|
||||
triangle_fan_index_buf_addr_hi,
|
||||
indexed ? nir_channel(&b, draw_info1, 2) : nir_imm_int(&b, 0),
|
||||
triangle_count,
|
||||
nir_imm_int(&b, 1),
|
||||
nir_imm_int(&b, 1),
|
||||
};
|
||||
triangle_fan_exec_vals[triangle_fan_exec_param_count++] = triangle_fan_index_buf_addr_lo;
|
||||
triangle_fan_exec_vals[triangle_fan_exec_param_count++] = triangle_fan_index_buf_addr_hi;
|
||||
|
||||
assert(sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params) == (ARRAY_SIZE(triangle_fan_exec_vals) * 4));
|
||||
if (prim_restart) {
|
||||
triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_channel(&b, draw_info1, 2);
|
||||
triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_channel(&b, draw_info1, 0);
|
||||
uint32_t index_count_offset =
|
||||
offsetof(struct dzn_indirect_triangle_fan_draw_exec_params, indexed_draw.index_count);
|
||||
nir_ssa_def *exec_buf_start =
|
||||
nir_load_ubo(&b, 2, 32,
|
||||
params_desc, nir_imm_int(&b, 16),
|
||||
.align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
|
||||
nir_ssa_def *exec_buf_start_lo =
|
||||
nir_iadd(&b, nir_imm_int(&b, index_count_offset),
|
||||
nir_iadd(&b, nir_channel(&b, exec_buf_start, 0),
|
||||
nir_imul(&b, exec_stride, index)));
|
||||
addr_lo_overflow = nir_ult(&b, exec_buf_start_lo, nir_channel(&b, exec_buf_start, 0));
|
||||
nir_ssa_def *exec_buf_start_hi =
|
||||
nir_iadd(&b, nir_channel(&b, exec_buf_start, 0),
|
||||
nir_bcsel(&b, addr_lo_overflow, nir_imm_int(&b, 1), nir_imm_int(&b, 0)));
|
||||
triangle_fan_exec_vals[triangle_fan_exec_param_count++] = exec_buf_start_lo;
|
||||
triangle_fan_exec_vals[triangle_fan_exec_param_count++] = exec_buf_start_hi;
|
||||
triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1);
|
||||
} else {
|
||||
triangle_fan_exec_vals[triangle_fan_exec_param_count++] =
|
||||
indexed ? nir_channel(&b, draw_info1, 2) : nir_imm_int(&b, 0);
|
||||
triangle_fan_exec_vals[triangle_fan_exec_param_count++] =
|
||||
triangle_count;
|
||||
}
|
||||
triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1);
|
||||
triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1);
|
||||
|
||||
unsigned rewrite_index_exec_params =
|
||||
prim_restart ?
|
||||
sizeof(struct dzn_indirect_triangle_fan_prim_restart_rewrite_index_exec_params) :
|
||||
sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params);
|
||||
nir_ssa_def *triangle_fan_exec_stride =
|
||||
nir_imm_int(&b, sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params));
|
||||
nir_imm_int(&b, rewrite_index_exec_params);
|
||||
nir_ssa_def *triangle_fan_exec_offset =
|
||||
nir_imul(&b, triangle_fan_exec_stride, index);
|
||||
|
||||
nir_store_ssbo(&b, nir_vec(&b, &triangle_fan_exec_vals[0], 4),
|
||||
triangle_fan_exec_buf_desc, triangle_fan_exec_offset,
|
||||
.write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 4);
|
||||
nir_store_ssbo(&b, nir_vec(&b, &triangle_fan_exec_vals[4], 2),
|
||||
triangle_fan_exec_buf_desc,
|
||||
nir_iadd_imm(&b, triangle_fan_exec_offset, 16),
|
||||
.write_mask = 0x3, .access = ACCESS_NON_READABLE, .align_mul = 4);
|
||||
for (uint32_t i = 0; i < triangle_fan_exec_param_count; i += 4) {
|
||||
unsigned comps = MIN2(triangle_fan_exec_param_count - i, 4);
|
||||
uint32_t mask = (1 << comps) - 1;
|
||||
|
||||
nir_store_ssbo(&b, nir_vec(&b, &triangle_fan_exec_vals[i], comps),
|
||||
triangle_fan_exec_buf_desc,
|
||||
nir_iadd_imm(&b, triangle_fan_exec_offset, i * 4),
|
||||
.write_mask = mask, .access = ACCESS_NON_READABLE, .align_mul = 4);
|
||||
}
|
||||
|
||||
nir_ssa_def *ibview_vals[] = {
|
||||
triangle_fan_index_buf_addr_lo,
|
||||
@@ -271,6 +310,172 @@ dzn_nir_indirect_draw_shader(enum dzn_indirect_draw_type type)
|
||||
return b.shader;
|
||||
}
|
||||
|
||||
nir_shader *
|
||||
dzn_nir_triangle_fan_prim_restart_rewrite_index_shader(uint8_t old_index_size)
|
||||
{
|
||||
assert(old_index_size == 2 || old_index_size == 4);
|
||||
|
||||
nir_builder b =
|
||||
nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
|
||||
dxil_get_nir_compiler_options(),
|
||||
"dzn_meta_triangle_prim_rewrite_index(old_index_size=%d)",
|
||||
old_index_size);
|
||||
b.shader->info.internal = true;
|
||||
|
||||
nir_ssa_def *params_desc =
|
||||
dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
|
||||
nir_ssa_def *new_index_buf_desc =
|
||||
dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1,
|
||||
"new_index_buf", ACCESS_NON_READABLE);
|
||||
nir_ssa_def *old_index_buf_desc =
|
||||
dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2,
|
||||
"old_index_buf", ACCESS_NON_WRITEABLE);
|
||||
nir_ssa_def *new_index_count_ptr_desc =
|
||||
dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 3,
|
||||
"new_index_count_ptr", ACCESS_NON_READABLE);
|
||||
|
||||
nir_ssa_def *params =
|
||||
nir_load_ubo(&b, sizeof(struct dzn_triangle_fan_prim_restart_rewrite_index_params) / 4, 32,
|
||||
params_desc, nir_imm_int(&b, 0),
|
||||
.align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
|
||||
|
||||
nir_ssa_def *prim_restart_val =
|
||||
nir_imm_int(&b, old_index_size == 2 ? 0xffff : 0xffffffff);
|
||||
nir_variable *old_index_ptr_var =
|
||||
nir_local_variable_create(b.impl, glsl_uint_type(), "old_index_ptr_var");
|
||||
nir_ssa_def *old_index_ptr = nir_channel(&b, params, 0);
|
||||
nir_store_var(&b, old_index_ptr_var, old_index_ptr, 1);
|
||||
nir_variable *new_index_ptr_var =
|
||||
nir_local_variable_create(b.impl, glsl_uint_type(), "new_index_ptr_var");
|
||||
nir_store_var(&b, new_index_ptr_var, nir_imm_int(&b, 0), 1);
|
||||
nir_ssa_def *old_index_count = nir_channel(&b, params, 1);
|
||||
nir_variable *index0_var =
|
||||
nir_local_variable_create(b.impl, glsl_uint_type(), "index0_var");
|
||||
nir_store_var(&b, index0_var, prim_restart_val, 1);
|
||||
|
||||
/*
|
||||
* Filter out all primitive-restart magic values, and generate a triangle list
|
||||
* from the triangle fan definition.
|
||||
*
|
||||
* Basically:
|
||||
*
|
||||
* new_index_ptr = 0;
|
||||
* index0 = restart_prim_value; // 0xffff or 0xffffffff
|
||||
* for (old_index_ptr = firstIndex; old_index_ptr < indexCount;) {
|
||||
* // If we have no starting-point we need at least 3 vertices,
|
||||
* // otherwise we can do with two. If there's not enough vertices
|
||||
* // to form a primitive, we just bail out.
|
||||
* min_indices = index0 == restart_prim_value ? 3 : 2;
|
||||
* if (old_index_ptr + min_indices > firstIndex + indexCount)
|
||||
* break;
|
||||
*
|
||||
* if (index0 == restart_prim_value) {
|
||||
* // No starting point, skip all entries until we have a
|
||||
* // non-primitive-restart value
|
||||
* index0 = old_index_buf[old_index_ptr++];
|
||||
* continue;
|
||||
* }
|
||||
*
|
||||
* // If at least one index contains the primitive-restart pattern,
|
||||
// ignore this triangle, and skip the unused entries
|
||||
* if (old_index_buf[old_index_ptr + 1] == restart_prim_value) {
|
||||
* old_index_ptr += 2;
|
||||
* continue;
|
||||
* }
|
||||
* if (old_index_buf[old_index_ptr] == restart_prim_value) {
|
||||
* old_index_ptr++;
|
||||
* continue;
|
||||
* }
|
||||
*
|
||||
* // We have a valid primitive, queue it to the new index buffer
|
||||
* new_index_buf[new_index_ptr++] = old_index_buf[old_index_ptr];
|
||||
* new_index_buf[new_index_ptr++] = old_index_buf[old_index_ptr + 1];
|
||||
* new_index_buf[new_index_ptr++] = index0;
|
||||
* }
|
||||
*
|
||||
* expressed in NIR, which admitedly is not super easy to grasp with.
|
||||
* TODO: Might be a good thing to use use the CL compiler we have and turn
|
||||
* those shaders into CL kernels.
|
||||
*/
|
||||
nir_push_loop(&b);
|
||||
|
||||
old_index_ptr = nir_load_var(&b, old_index_ptr_var);
|
||||
nir_ssa_def *index0 = nir_load_var(&b, index0_var);
|
||||
|
||||
nir_ssa_def *read_index_count =
|
||||
nir_bcsel(&b, nir_ieq(&b, index0, prim_restart_val),
|
||||
nir_imm_int(&b, 3), nir_imm_int(&b, 2));
|
||||
nir_push_if(&b, nir_ult(&b, old_index_count, nir_iadd(&b, old_index_ptr, read_index_count)));
|
||||
nir_jump(&b, nir_jump_break);
|
||||
nir_pop_if(&b, NULL);
|
||||
|
||||
nir_ssa_def *old_index_offset =
|
||||
nir_imul_imm(&b, old_index_ptr, old_index_size);
|
||||
|
||||
nir_push_if(&b, nir_ieq(&b, index0, prim_restart_val));
|
||||
nir_ssa_def *index_val =
|
||||
nir_load_ssbo(&b, 1, 32, old_index_buf_desc,
|
||||
old_index_size == 2 ? nir_iand_imm(&b, old_index_offset, ~3ULL) : old_index_offset,
|
||||
.align_mul = 4);
|
||||
if (old_index_size == 2) {
|
||||
index_val = nir_bcsel(&b,
|
||||
nir_ieq_imm(&b, nir_iand_imm(&b, old_index_offset, 0x2), 0),
|
||||
nir_iand_imm(&b, index_val, 0xffff),
|
||||
nir_ushr_imm(&b, index_val, 16));
|
||||
}
|
||||
|
||||
nir_store_var(&b, index0_var, index_val, 1);
|
||||
nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 1), 1);
|
||||
nir_jump(&b, nir_jump_continue);
|
||||
nir_pop_if(&b, NULL);
|
||||
|
||||
nir_ssa_def *index12 =
|
||||
nir_load_ssbo(&b, 2, 32, old_index_buf_desc,
|
||||
old_index_size == 2 ? nir_iand_imm(&b, old_index_offset, ~3ULL) : old_index_offset,
|
||||
.align_mul = 4);
|
||||
if (old_index_size == 2) {
|
||||
nir_ssa_def *indices[] = {
|
||||
nir_iand_imm(&b, nir_channel(&b, index12, 0), 0xffff),
|
||||
nir_ushr_imm(&b, nir_channel(&b, index12, 0), 16),
|
||||
nir_iand_imm(&b, nir_channel(&b, index12, 1), 0xffff),
|
||||
};
|
||||
|
||||
index12 =
|
||||
nir_bcsel(&b,
|
||||
nir_ieq_imm(&b, nir_iand_imm(&b, old_index_offset, 0x2), 0),
|
||||
nir_vec2(&b, indices[0], indices[1]),
|
||||
nir_vec2(&b, indices[1], indices[2]));
|
||||
}
|
||||
|
||||
nir_push_if(&b, nir_ieq(&b, nir_channel(&b, index12, 1), prim_restart_val));
|
||||
nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 2), 1);
|
||||
nir_store_var(&b, index0_var, prim_restart_val, 1);
|
||||
nir_jump(&b, nir_jump_continue);
|
||||
nir_push_else(&b, NULL);
|
||||
nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 1), 1);
|
||||
nir_push_if(&b, nir_ieq(&b, nir_channel(&b, index12, 0), prim_restart_val));
|
||||
nir_store_var(&b, index0_var, prim_restart_val, 1);
|
||||
nir_jump(&b, nir_jump_continue);
|
||||
nir_push_else(&b, NULL);
|
||||
nir_ssa_def *new_indices =
|
||||
nir_vec3(&b, nir_channel(&b, index12, 0), nir_channel(&b, index12, 1), index0);
|
||||
nir_ssa_def *new_index_ptr = nir_load_var(&b, new_index_ptr_var);
|
||||
nir_ssa_def *new_index_offset = nir_imul_imm(&b, new_index_ptr, sizeof(uint32_t));
|
||||
nir_store_ssbo(&b, new_indices, new_index_buf_desc,
|
||||
new_index_offset,
|
||||
.write_mask = 7, .access = ACCESS_NON_READABLE, .align_mul = 4);
|
||||
nir_store_var(&b, new_index_ptr_var, nir_iadd_imm(&b, new_index_ptr, 3), 1);
|
||||
nir_pop_if(&b, NULL);
|
||||
nir_pop_if(&b, NULL);
|
||||
nir_pop_loop(&b, NULL);
|
||||
|
||||
nir_store_ssbo(&b, nir_load_var(&b, new_index_ptr_var),
|
||||
new_index_count_ptr_desc, nir_imm_int(&b, 0),
|
||||
.write_mask = 1, .access = ACCESS_NON_READABLE, .align_mul = 4);
|
||||
|
||||
return b.shader;
|
||||
}
|
||||
|
||||
nir_shader *
|
||||
dzn_nir_triangle_fan_rewrite_index_shader(uint8_t old_index_size)
|
||||
{
|
||||
|
Reference in New Issue
Block a user