nir: add load_{scalar,vector}_arg_amd and load_smem_amd intrinsics
load_smem_gcn is similar to load_global/load_global_constant, but it's guaranteed to use SMEM and it's much easier to utilize the format's 32-bit offset source. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12773>
This commit is contained in:
@@ -159,6 +159,8 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
|
||||
case nir_intrinsic_load_force_vrs_rates_amd:
|
||||
case nir_intrinsic_load_tess_level_inner_default:
|
||||
case nir_intrinsic_load_tess_level_outer_default:
|
||||
case nir_intrinsic_load_scalar_arg_amd:
|
||||
case nir_intrinsic_load_smem_amd:
|
||||
is_divergent = false;
|
||||
break;
|
||||
|
||||
@@ -542,6 +544,7 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
|
||||
case nir_intrinsic_gds_atomic_add_amd:
|
||||
case nir_intrinsic_load_rt_arg_scratch_offset_amd:
|
||||
case nir_intrinsic_load_intersection_opaque_amd:
|
||||
case nir_intrinsic_load_vector_arg_amd:
|
||||
is_divergent = true;
|
||||
break;
|
||||
|
||||
|
@@ -1330,6 +1330,14 @@ system_value("intersection_opaque_amd", 1, bit_sizes=[1])
|
||||
# Load forced VRS rates.
|
||||
intrinsic("load_force_vrs_rates_amd", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
|
||||
intrinsic("load_scalar_arg_amd", dest_comp=0, bit_sizes=[32], indices=[BASE], flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
intrinsic("load_vector_arg_amd", dest_comp=0, bit_sizes=[32], indices=[BASE], flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
|
||||
# src[] = { 64-bit base address, 32-bit offset }.
|
||||
intrinsic("load_smem_amd", src_comp=[1, 1], dest_comp=0, bit_sizes=[32],
|
||||
indices=[ALIGN_MUL, ALIGN_OFFSET],
|
||||
flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
|
||||
# V3D-specific instrinc for tile buffer color reads.
|
||||
#
|
||||
# The hardware requires that we read the samples and components of a pixel
|
||||
|
Reference in New Issue
Block a user