d3d12: GL4.6
This enables pipeline stats and SO overflow queries Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26210>
This commit is contained in:
@@ -225,17 +225,17 @@ GL 4.5, GLSL 4.50 -- all DONE: freedreno/a6xx, nvc0, r600, radeonsi, llvmpipe, v
|
||||
GL_KHR_robustness DONE (freedreno)
|
||||
GL_EXT_shader_integer_mix DONE (all drivers that support GLSL)
|
||||
|
||||
GL 4.6, GLSL 4.60 -- all DONE: radeonsi, virgl, zink, iris, crocus/gen7+
|
||||
GL 4.6, GLSL 4.60 -- all DONE: radeonsi, virgl, zink, iris, crocus/gen7+, d3d12
|
||||
|
||||
GL_ARB_gl_spirv DONE (freedreno, llvmpipe)
|
||||
GL_ARB_indirect_parameters DONE (freedreno/a6xx+, nvc0, llvmpipe, virgl, d3d12)
|
||||
GL_ARB_indirect_parameters DONE (freedreno/a6xx+, nvc0, llvmpipe, virgl)
|
||||
GL_ARB_pipeline_statistics_query DONE (freedreno/a6xx+, nvc0, r600, llvmpipe, softpipe, crocus/gen6+)
|
||||
GL_ARB_polygon_offset_clamp DONE (freedreno, nv50, nvc0, r600, llvmpipe, v3d, panfrost, crocus)
|
||||
GL_ARB_shader_atomic_counter_ops DONE (freedreno/a5xx+, nvc0, r600, llvmpipe, softpipe, v3d)
|
||||
GL_ARB_shader_draw_parameters DONE (freedreno/a6xx+, llvmpipe, nvc0, d3d12, crocus/gen6+)
|
||||
GL_ARB_shader_draw_parameters DONE (freedreno/a6xx+, llvmpipe, nvc0, crocus/gen6+)
|
||||
GL_ARB_shader_group_vote DONE (freedreno/a6xx, nvc0, llvmpipe, crocus)
|
||||
GL_ARB_spirv_extensions DONE (freedreno, llvmpipe)
|
||||
GL_ARB_texture_filter_anisotropic DONE (etnaviv/HALTI0, freedreno, nv50, nvc0, r600, softpipe, llvmpipe, d3d12, v3d, panfrost/g72+, asahi, crocus)
|
||||
GL_ARB_texture_filter_anisotropic DONE (etnaviv/HALTI0, freedreno, nv50, nvc0, r600, softpipe, llvmpipe, v3d, panfrost/g72+, asahi, crocus)
|
||||
GL_ARB_transform_feedback_overflow_query DONE (freedreno/a6xx+, nvc0, llvmpipe, softpipe, crocus/gen6+)
|
||||
GL_KHR_no_error DONE (all drivers)
|
||||
|
||||
|
@@ -9,3 +9,4 @@ GL_ARB_clip_control on Asahi
|
||||
GL_ARB_timer_query on Asahi
|
||||
GL_EXT_disjoint_timer_query on Asahi
|
||||
GL_ARB_base_instance on Asahi
|
||||
OpenGL 4.6 (up from 4.2) on d3d12
|
||||
|
@@ -411,3 +411,15 @@ spec@arb_vertex_program@arb_vertex_program-property-bindings,Fail
|
||||
# https://gitlab.freedesktop.org/mesa/piglit/-/merge_requests/850,
|
||||
# and CI changes to glue them together
|
||||
spec@arb_gpu_shader5@arb_gpu_shader5-xfb-streams-without-invocations spirv,Fail
|
||||
|
||||
# WARP bug: submitting an indirect draw with a count buffer modifies the arg buffer to zero
|
||||
# entries between >= dynamic count and < static max count
|
||||
spec@arb_query_buffer_object@coherency,Fail
|
||||
spec@arb_query_buffer_object@coherency@indirect-draw-count-GL_FRAGMENT_SHADER_INVOCATIONS,Fail
|
||||
spec@arb_query_buffer_object@coherency@indirect-draw-count-GL_PRIMITIVES_GENERATED,Fail
|
||||
spec@arb_query_buffer_object@coherency@indirect-draw-count-GL_PRIMITIVES_SUBMITTED,Fail
|
||||
spec@arb_query_buffer_object@coherency@indirect-draw-count-GL_SAMPLES_PASSED,Fail
|
||||
spec@arb_query_buffer_object@coherency@indirect-draw-count-GL_TIMESTAMP,Fail
|
||||
spec@arb_query_buffer_object@coherency@indirect-draw-count-GL_TIME_ELAPSED,Fail
|
||||
spec@arb_query_buffer_object@coherency@indirect-draw-count-GL_VERTEX_SHADER_INVOCATIONS,Fail
|
||||
spec@arb_query_buffer_object@coherency@indirect-draw-count-GL_VERTICES_SUBMITTED,Fail
|
||||
|
@@ -54,6 +54,7 @@ enum d3d12_state_var {
|
||||
|
||||
D3D12_STATE_VAR_NUM_WORKGROUPS = 0,
|
||||
D3D12_STATE_VAR_TRANSFORM_GENERIC0,
|
||||
D3D12_STATE_VAR_TRANSFORM_GENERIC1,
|
||||
D3D12_MAX_COMPUTE_STATE_VARS,
|
||||
|
||||
D3D12_MAX_STATE_VARS = MAX2(D3D12_MAX_GRAPHICS_STATE_VARS, D3D12_MAX_COMPUTE_STATE_VARS)
|
||||
|
@@ -223,10 +223,11 @@ get_query_resolve(const nir_shader_compiler_options *options, const d3d12_comput
|
||||
assert(!key->query_resolve.is_resolve_in_place ||
|
||||
(key->query_resolve.is_64bit && key->query_resolve.num_subqueries == 1));
|
||||
assert(key->query_resolve.num_subqueries == 1 ||
|
||||
key->query_resolve.pipe_query_type == PIPE_QUERY_PRIMITIVES_GENERATED);
|
||||
assert(key->query_resolve.num_subqueries <= 3); /* Fourth state var is an output offset */
|
||||
key->query_resolve.pipe_query_type == PIPE_QUERY_PRIMITIVES_GENERATED ||
|
||||
key->query_resolve.pipe_query_type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE);
|
||||
assert(key->query_resolve.num_subqueries <= 4);
|
||||
|
||||
nir_variable *inputs[3];
|
||||
nir_variable *inputs[4];
|
||||
for (uint32_t i = 0; i < key->query_resolve.num_subqueries; ++i) {
|
||||
/* Inputs are always 64-bit */
|
||||
inputs[i] = nir_variable_create(b.shader, nir_var_mem_ssbo, glsl_array_type(glsl_uint64_t_type(), 0, 8), "input");
|
||||
@@ -239,8 +240,9 @@ get_query_resolve(const nir_shader_compiler_options *options, const d3d12_comput
|
||||
}
|
||||
|
||||
/* How many entries in each sub-query is passed via root constants */
|
||||
nir_variable *state_var = nullptr;
|
||||
nir_variable *state_var = nullptr, *state_var1 = nullptr;
|
||||
nir_def *state_var_data = d3d12_get_state_var(&b, D3D12_STATE_VAR_TRANSFORM_GENERIC0, "state_var", glsl_uvec4_type(), &state_var);
|
||||
nir_def *state_var_data1 = d3d12_get_state_var(&b, D3D12_STATE_VAR_TRANSFORM_GENERIC1, "state_var1", glsl_uvec4_type(), &state_var1);
|
||||
|
||||
/* For in-place resolves, we resolve each field of the query. Otherwise, resolve one field into the dest */
|
||||
nir_variable *results[sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(UINT64)];
|
||||
@@ -280,6 +282,8 @@ get_query_resolve(const nir_shader_compiler_options *options, const d3d12_comput
|
||||
break;
|
||||
case PIPE_QUERY_SO_STATISTICS:
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
|
||||
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
|
||||
stride = sizeof(D3D12_QUERY_DATA_SO_STATISTICS) / sizeof(UINT64);
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
@@ -324,11 +328,19 @@ get_query_resolve(const nir_shader_compiler_options *options, const d3d12_comput
|
||||
assert(j == 0 && i == 0);
|
||||
nir_def *start = nir_load_ssbo(&b, 1, 64, nir_imm_int(&b, i), nir_imul_imm(&b, array_index, 8));
|
||||
nir_def *end = nir_load_ssbo(&b, 1, 64, nir_imm_int(&b, i), nir_imul_imm(&b, nir_iadd_imm(&b, array_index, 1), 8));
|
||||
new_value = nir_isub(&b, end, start);
|
||||
new_value = nir_iadd(&b, nir_load_var(&b, results[j]), nir_isub(&b, end, start));
|
||||
} else if (key->query_resolve.pipe_query_type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
|
||||
key->query_resolve.pipe_query_type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
|
||||
/* These predicates are true if the primitives emitted != primitives stored */
|
||||
assert(j == 0);
|
||||
nir_def *val_a = nir_load_ssbo(&b, 1, 64, nir_imm_int(&b, i), nir_imul_imm(&b, array_index, 8));
|
||||
nir_def *val_b = nir_load_ssbo(&b, 1, 64, nir_imm_int(&b, i), nir_imul_imm(&b, nir_iadd_imm(&b, array_index, 1), 8));
|
||||
new_value = nir_ior(&b, nir_load_var(&b, results[j]), nir_u2uN(&b, nir_ine(&b, val_a, val_b), var_bit_size));
|
||||
} else {
|
||||
new_value = nir_u2uN(&b, nir_load_ssbo(&b, 1, 64, nir_imm_int(&b, i), nir_imul_imm(&b, nir_iadd_imm(&b, array_index, j), 8)), var_bit_size);
|
||||
new_value = nir_iadd(&b, nir_load_var(&b, results[j]), new_value);
|
||||
}
|
||||
nir_store_var(&b, results[j], nir_iadd(&b, nir_load_var(&b, results[j]), new_value), 1);
|
||||
nir_store_var(&b, results[j], new_value, 1);
|
||||
}
|
||||
|
||||
nir_store_var(&b, loop_counter, nir_iadd_imm(&b, loop_counter_value, 1), 1);
|
||||
@@ -336,7 +348,7 @@ get_query_resolve(const nir_shader_compiler_options *options, const d3d12_comput
|
||||
}
|
||||
|
||||
/* Results are accumulated, now store the final values */
|
||||
nir_def *output_base_index = nir_channel(&b, state_var_data, 3);
|
||||
nir_def *output_base_index = nir_channel(&b, state_var_data1, 0);
|
||||
for (uint32_t i = 0; i < num_result_values; ++i) {
|
||||
/* When resolving in-place, resolve each field, otherwise just write the one result */
|
||||
uint32_t field_offset = key->query_resolve.is_resolve_in_place ? i : 0;
|
||||
@@ -483,11 +495,16 @@ d3d12_save_compute_transform_state(struct d3d12_context *ctx, d3d12_compute_tran
|
||||
pipe_resource_reference(&save->ssbos[i].buffer, ctx->ssbo_views[PIPE_SHADER_COMPUTE][i].buffer);
|
||||
save->ssbos[i] = ctx->ssbo_views[PIPE_SHADER_COMPUTE][i];
|
||||
}
|
||||
|
||||
save->queries_disabled = ctx->queries_disabled;
|
||||
ctx->base.set_active_query_state(&ctx->base, false);
|
||||
}
|
||||
|
||||
void
|
||||
d3d12_restore_compute_transform_state(struct d3d12_context *ctx, d3d12_compute_transform_save_restore *save)
|
||||
{
|
||||
ctx->base.set_active_query_state(&ctx->base, !save->queries_disabled);
|
||||
|
||||
ctx->base.bind_compute_state(&ctx->base, save->cs);
|
||||
|
||||
ctx->base.set_constant_buffer(&ctx->base, PIPE_SHADER_COMPUTE, 1, true, &save->cbuf0);
|
||||
|
@@ -73,14 +73,14 @@ struct d3d12_compute_transform_key
|
||||
struct {
|
||||
/* true means the accumulation should be done as uint64, else uint32. */
|
||||
uint8_t is_64bit : 1;
|
||||
/* Indicates how many subqueries to accumulate together into a final result. When
|
||||
* set to 1, single_subquery_index determines where the data comes from. */
|
||||
uint8_t num_subqueries : 3;
|
||||
uint8_t pipe_query_type : 4;
|
||||
/* true means output is written where input[0] was, else output is a separate buffer.
|
||||
* true also means all fields are accumulated, else single_result_field_offset determines
|
||||
* which field is resolved. Implies num_subqueries == 1. */
|
||||
uint8_t is_resolve_in_place : 1;
|
||||
/* Indicates how many subqueries to accumulate together into a final result. When
|
||||
* set to 1, single_subquery_index determines where the data comes from. */
|
||||
uint8_t num_subqueries : 2;
|
||||
uint8_t pipe_query_type : 4;
|
||||
uint8_t single_subquery_index : 2;
|
||||
uint8_t single_result_field_offset : 4;
|
||||
uint8_t is_signed : 1;
|
||||
@@ -102,7 +102,8 @@ struct d3d12_compute_transform_save_restore
|
||||
{
|
||||
struct d3d12_shader_selector *cs;
|
||||
struct pipe_constant_buffer cbuf0;
|
||||
struct pipe_shader_buffer ssbos[4];
|
||||
struct pipe_shader_buffer ssbos[5];
|
||||
bool queries_disabled;
|
||||
};
|
||||
|
||||
void
|
||||
|
@@ -277,8 +277,9 @@ struct d3d12_context {
|
||||
|
||||
struct d3d12_resource *current_predication;
|
||||
bool predication_condition;
|
||||
bool queries_suspended;
|
||||
|
||||
uint32_t transform_state_vars[4];
|
||||
uint32_t transform_state_vars[8];
|
||||
|
||||
#ifdef __cplusplus
|
||||
ResourceStateManager *resource_state_manager;
|
||||
|
@@ -443,7 +443,8 @@ fill_compute_state_vars(struct d3d12_context *ctx,
|
||||
cmd_sig_key->params_root_const_offset = size;
|
||||
size += 4;
|
||||
break;
|
||||
case D3D12_STATE_VAR_TRANSFORM_GENERIC0: {
|
||||
case D3D12_STATE_VAR_TRANSFORM_GENERIC0:
|
||||
case D3D12_STATE_VAR_TRANSFORM_GENERIC1: {
|
||||
unsigned idx = shader->state_vars[j].var - D3D12_STATE_VAR_TRANSFORM_GENERIC0;
|
||||
ptr[0] = ctx->transform_state_vars[idx * 4];
|
||||
ptr[1] = ctx->transform_state_vars[idx * 4 + 1];
|
||||
|
@@ -42,6 +42,8 @@ num_sub_queries(unsigned query_type, unsigned index)
|
||||
switch (query_type) {
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
return index == 0 ? 3 : 1;
|
||||
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
|
||||
return 4;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
@@ -63,6 +65,8 @@ d3d12_query_heap_type(unsigned query_type, unsigned sub_query)
|
||||
D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS;
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
case PIPE_QUERY_SO_STATISTICS:
|
||||
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
|
||||
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
|
||||
return D3D12_QUERY_HEAP_TYPE_SO_STATISTICS;
|
||||
case PIPE_QUERY_TIMESTAMP:
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
@@ -92,7 +96,10 @@ d3d12_query_type(unsigned query_type, unsigned sub_query, unsigned index)
|
||||
FALLTHROUGH;
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
case PIPE_QUERY_SO_STATISTICS:
|
||||
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
|
||||
return (D3D12_QUERY_TYPE)(D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0 + index);
|
||||
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
|
||||
return (D3D12_QUERY_TYPE)(D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0 + sub_query);
|
||||
case PIPE_QUERY_TIMESTAMP:
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
return D3D12_QUERY_TYPE_TIMESTAMP;
|
||||
@@ -245,8 +252,13 @@ accumulate_subresult_cpu(struct d3d12_context *ctx, struct d3d12_query *q_parent
|
||||
case D3D12_QUERY_TYPE_SO_STATISTICS_STREAM1:
|
||||
case D3D12_QUERY_TYPE_SO_STATISTICS_STREAM2:
|
||||
case D3D12_QUERY_TYPE_SO_STATISTICS_STREAM3:
|
||||
result->so_statistics.num_primitives_written += results_so[i].NumPrimitivesWritten;
|
||||
result->so_statistics.primitives_storage_needed += results_so[i].PrimitivesStorageNeeded;
|
||||
if (q_parent->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
|
||||
q_parent->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
|
||||
result->b = results_so[i].NumPrimitivesWritten != results_so[i].PrimitivesStorageNeeded;
|
||||
} else {
|
||||
result->so_statistics.num_primitives_written += results_so[i].NumPrimitivesWritten;
|
||||
result->so_statistics.primitives_storage_needed += results_so[i].PrimitivesStorageNeeded;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
@@ -291,6 +303,14 @@ accumulate_result_cpu(struct d3d12_context *ctx, struct d3d12_query *q,
|
||||
return false;
|
||||
result->u64 = local_result.so_statistics.num_primitives_written;
|
||||
return true;
|
||||
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
|
||||
result->b = false;
|
||||
for (uint32_t i = 0; i < num_sub_queries(q->type, q->index); ++i) {
|
||||
if (!accumulate_subresult_cpu(ctx, q, i, &local_result))
|
||||
return false;
|
||||
result->b |= local_result.b;
|
||||
}
|
||||
return true;
|
||||
default:
|
||||
assert(num_sub_queries(q->type, q->index) == 1);
|
||||
return accumulate_subresult_cpu(ctx, q, 0, result);
|
||||
@@ -361,6 +381,7 @@ accumulate_subresult_gpu(struct d3d12_context *ctx, struct d3d12_query *q_parent
|
||||
ctx->transform_state_vars[1] = 0;
|
||||
ctx->transform_state_vars[2] = 0;
|
||||
ctx->transform_state_vars[3] = 0;
|
||||
ctx->transform_state_vars[4] = 0;
|
||||
|
||||
pipe_shader_buffer new_cs_ssbos[1];
|
||||
new_cs_ssbos[0].buffer = q_parent->subqueries[sub_query].buffer;
|
||||
@@ -396,7 +417,7 @@ accumulate_result_gpu(struct d3d12_context *ctx, struct d3d12_query *q,
|
||||
key.query_resolve.timestamp_multiplier = d3d12_screen(ctx->base.screen)->timestamp_multiplier;
|
||||
ctx->base.bind_compute_state(&ctx->base, d3d12_get_compute_transform(ctx, &key));
|
||||
|
||||
pipe_shader_buffer new_cs_ssbos[4];
|
||||
pipe_shader_buffer new_cs_ssbos[5];
|
||||
uint32_t num_ssbos = 0;
|
||||
for (uint32_t i = 0; i < key.query_resolve.num_subqueries; ++i) {
|
||||
ctx->transform_state_vars[i] = q->subqueries[i].curr_query;
|
||||
@@ -407,7 +428,7 @@ accumulate_result_gpu(struct d3d12_context *ctx, struct d3d12_query *q,
|
||||
}
|
||||
|
||||
assert(dst_offset % (key.query_resolve.is_64bit ? 8 : 4) == 0);
|
||||
ctx->transform_state_vars[3] = dst_offset / (key.query_resolve.is_64bit ? 8 : 4);
|
||||
ctx->transform_state_vars[4] = dst_offset / (key.query_resolve.is_64bit ? 8 : 4);
|
||||
|
||||
new_cs_ssbos[num_ssbos].buffer = dst;
|
||||
new_cs_ssbos[num_ssbos].buffer_offset = 0;
|
||||
@@ -670,7 +691,6 @@ d3d12_render_condition(struct pipe_context *pctx,
|
||||
return;
|
||||
}
|
||||
|
||||
assert(num_sub_queries(query->type, query->index) == 1);
|
||||
if (!query->predicate)
|
||||
query->predicate = d3d12_resource(pipe_buffer_create(pctx->screen, 0,
|
||||
PIPE_USAGE_DEFAULT, sizeof(uint64_t)));
|
||||
|
@@ -41,7 +41,7 @@ d3d12_validate_queries(struct d3d12_context *ctx);
|
||||
void
|
||||
d3d12_enable_predication(struct d3d12_context *ctx);
|
||||
|
||||
constexpr unsigned MAX_SUBQUERIES = 3;
|
||||
constexpr unsigned MAX_SUBQUERIES = 4;
|
||||
|
||||
struct d3d12_query_impl {
|
||||
ID3D12QueryHeap* query_heap;
|
||||
|
@@ -343,6 +343,8 @@ d3d12_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
case PIPE_CAP_GL_SPIRV:
|
||||
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
|
||||
case PIPE_CAP_SHADER_GROUP_VOTE:
|
||||
case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
|
||||
case PIPE_CAP_QUERY_SO_OVERFLOW:
|
||||
return 1;
|
||||
|
||||
case PIPE_CAP_QUERY_BUFFER_OBJECT:
|
||||
|
@@ -6228,7 +6228,8 @@ optimize_nir(struct nir_shader *s, const struct nir_to_dxil_options *opts)
|
||||
NIR_PASS(progress, s, dxil_nir_lower_16bit_conv);
|
||||
NIR_PASS(progress, s, nir_opt_remove_phis);
|
||||
NIR_PASS(progress, s, nir_opt_dce);
|
||||
NIR_PASS(progress, s, nir_opt_if, nir_opt_if_aggressive_last_continue | nir_opt_if_optimize_phi_true_false);
|
||||
NIR_PASS(progress, s, nir_opt_if,
|
||||
nir_opt_if_aggressive_last_continue | nir_opt_if_optimize_phi_true_false | nir_opt_if_avoid_64bit_phis);
|
||||
NIR_PASS(progress, s, nir_opt_dead_cf);
|
||||
NIR_PASS(progress, s, nir_opt_cse);
|
||||
NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
|
||||
|
Reference in New Issue
Block a user