nir: Add load/store/atomic global intrinsics

These correspond roughly to reading/writing OpenCL global pointers.  The
idea is that they just take a bare address and load/store from it.  Of
course, exactly what this address means is driver-dependent.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Karol Herbst <kherbst@redhat.com>
This commit is contained in:
Jason Ekstrand
2018-11-19 13:40:35 -06:00
parent 6380fedb60
commit e461926ef2
3 changed files with 39 additions and 1 deletions

View File

@@ -458,6 +458,34 @@ intrinsic("shared_atomic_fmin", src_comp=[1, 1], dest_comp=1, indices=[BASE])
intrinsic("shared_atomic_fmax", src_comp=[1, 1], dest_comp=1, indices=[BASE])
intrinsic("shared_atomic_fcomp_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
# Global atomic intrinsics
#
# All of the shared variable atomic memory operations read a value from
# memory, compute a new value using one of the operations below, write the
# new value to memory, and return the original value read.
#
# All operations take 2 sources except CompSwap that takes 3. These
# sources represent:
#
# 0: The memory address that the atomic operation will operate on.
# 1: The data parameter to the atomic function (i.e. the value to add
# in shared_atomic_add, etc).
# 2: For CompSwap only: the second data parameter.
intrinsic("global_atomic_add", src_comp=[1, 1], dest_comp=1, indices=[BASE])
intrinsic("global_atomic_imin", src_comp=[1, 1], dest_comp=1, indices=[BASE])
intrinsic("global_atomic_umin", src_comp=[1, 1], dest_comp=1, indices=[BASE])
intrinsic("global_atomic_imax", src_comp=[1, 1], dest_comp=1, indices=[BASE])
intrinsic("global_atomic_umax", src_comp=[1, 1], dest_comp=1, indices=[BASE])
intrinsic("global_atomic_and", src_comp=[1, 1], dest_comp=1, indices=[BASE])
intrinsic("global_atomic_or", src_comp=[1, 1], dest_comp=1, indices=[BASE])
intrinsic("global_atomic_xor", src_comp=[1, 1], dest_comp=1, indices=[BASE])
intrinsic("global_atomic_exchange", src_comp=[1, 1], dest_comp=1, indices=[BASE])
intrinsic("global_atomic_comp_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
intrinsic("global_atomic_fadd", src_comp=[1, 1], dest_comp=1, indices=[BASE])
intrinsic("global_atomic_fmin", src_comp=[1, 1], dest_comp=1, indices=[BASE])
intrinsic("global_atomic_fmax", src_comp=[1, 1], dest_comp=1, indices=[BASE])
intrinsic("global_atomic_fcomp_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
def system_value(name, dest_comp, indices=[], bit_sizes=[32]):
intrinsic("load_" + name, [], dest_comp, indices,
flags=[CAN_ELIMINATE, CAN_REORDER], sysval=True,
@@ -590,6 +618,9 @@ load("shared", 1, [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
load("push_constant", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER])
# src[] = { offset }. const_index[] = { base, range }
load("constant", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER])
# src[] = { address }.
# const_index[] = { access, align_mul, align_offset }
load("global", 1, [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
# Stores work the same way as loads, except now the first source is the value
# to store and the second (and possibly third) source specify where to store
@@ -610,3 +641,6 @@ store("ssbo", 3, [WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
# src[] = { value, offset }.
# const_index[] = { base, write_mask, align_mul, align_offset }
store("shared", 2, [BASE, WRMASK, ALIGN_MUL, ALIGN_OFFSET])
# src[] = { value, address }.
# const_index[] = { write_mask, align_mul, align_offset }
store("global", 2, [WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])

View File

@@ -944,6 +944,7 @@ nir_get_io_offset_src(nir_intrinsic_instr *instr)
case nir_intrinsic_load_output:
case nir_intrinsic_load_shared:
case nir_intrinsic_load_uniform:
case nir_intrinsic_load_global:
return &instr->src[0];
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ssbo:
@@ -952,6 +953,7 @@ nir_get_io_offset_src(nir_intrinsic_instr *instr)
case nir_intrinsic_load_interpolated_input:
case nir_intrinsic_store_output:
case nir_intrinsic_store_shared:
case nir_intrinsic_store_global:
return &instr->src[1];
case nir_intrinsic_store_ssbo:
case nir_intrinsic_store_per_vertex_output:

View File

@@ -88,7 +88,8 @@ is_phi_src_scalarizable(nir_phi_src *src,
return deref->mode == nir_var_shader_in ||
deref->mode == nir_var_uniform ||
deref->mode == nir_var_mem_ubo ||
deref->mode == nir_var_mem_ssbo;
deref->mode == nir_var_mem_ssbo ||
deref->mode == nir_var_mem_global;
}
case nir_intrinsic_interp_deref_at_centroid:
@@ -97,6 +98,7 @@ is_phi_src_scalarizable(nir_phi_src *src,
case nir_intrinsic_load_uniform:
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ssbo:
case nir_intrinsic_load_global:
case nir_intrinsic_load_input:
return true;
default: