nir: add amd shared append/consume

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31075>
This commit is contained in:
Georg Lehmann
2024-09-07 14:22:11 +02:00
committed by Marge Bot
parent 213f5e9152
commit e0bcab953d
5 changed files with 13 additions and 0 deletions

View File

@@ -133,6 +133,8 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
case nir_intrinsic_first_invocation:
case nir_intrinsic_last_invocation:
case nir_intrinsic_load_subgroup_id:
case nir_intrinsic_shared_append_amd:
case nir_intrinsic_shared_consume_amd:
/* VS/TES/GS invocations of the same primitive can be in different
* subgroups, so subgroup ops are always divergent between vertices of
* the same primitive.

View File

@@ -1535,6 +1535,11 @@ store("global_amd", [1, 1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET, WRIT
# Same as shared_atomic_add, but with GDS. src[] = {store_val, gds_addr, m0}
intrinsic("gds_atomic_add_amd", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
# Optimized shared_atomic_add (1/-1) with constant address
# returning the uniform pre-op value for all invocations.
intrinsic("shared_append_amd", src_comp=[], dest_comp=1, bit_sizes=[32], indices=[BASE])
intrinsic("shared_consume_amd", src_comp=[], dest_comp=1, bit_sizes=[32], indices=[BASE])
# src[] = { sample_id, num_samples }
intrinsic("load_sample_positions_amd", src_comp=[1, 1], dest_comp=2, flags=[CAN_ELIMINATE, CAN_REORDER])

View File

@@ -64,6 +64,8 @@ shader_writes_to_memory(nir_shader *shader)
case nir_intrinsic_store_shared2_amd:
case nir_intrinsic_shared_atomic:
case nir_intrinsic_shared_atomic_swap:
case nir_intrinsic_shared_append_amd:
case nir_intrinsic_shared_consume_amd:
case nir_intrinsic_task_payload_atomic:
case nir_intrinsic_task_payload_atomic_swap:
case nir_intrinsic_image_deref_store:

View File

@@ -109,6 +109,8 @@ get_info(nir_intrinsic_op op)
INFO(nir_var_mem_ubo, ldcx_nv, false, 0, 1, -1, -1, 1)
LOAD(nir_var_uniform, const_ir3, -1, 0, -1, 4)
STORE(nir_var_uniform, const_ir3, -1, -1, -1, 0, 4)
INFO(nir_var_mem_shared, shared_append_amd, true, -1, -1, -1, -1, 1)
INFO(nir_var_mem_shared, shared_consume_amd, true, -1, -1, -1, -1, 1)
default:
break;
#undef ATOMIC

View File

@@ -395,6 +395,8 @@ nir_schedule_intrinsic_deps(nir_deps_state *state,
case nir_intrinsic_shared_atomic:
case nir_intrinsic_shared_atomic_swap:
case nir_intrinsic_shared_append_amd:
case nir_intrinsic_shared_consume_amd:
case nir_intrinsic_store_shared:
case nir_intrinsic_store_shared2_amd:
add_write_dep(state, &state->store_shared, n);