nir: Vectorize intrinsics
We used to have the number of components built into the intrinsic. This meant that all of our load/store intrinsics had vec1, vec2, vec3, and vec4 variants. This lead to piles of switch statements to generate the correct intrinsic names, and introspection to figure out the number of components. We can make things much nicer by allowing "vectorized" intrinsics. Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
This commit is contained in:
@@ -42,19 +42,9 @@
|
||||
#define ARR(...) { __VA_ARGS__ }
|
||||
|
||||
|
||||
INTRINSIC(load_var_vec1, 0, ARR(), true, 1, 1, 0,
|
||||
NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
INTRINSIC(load_var_vec2, 0, ARR(), true, 2, 1, 0,
|
||||
NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
INTRINSIC(load_var_vec3, 0, ARR(), true, 3, 1, 0,
|
||||
NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
INTRINSIC(load_var_vec4, 0, ARR(), true, 4, 1, 0,
|
||||
NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
INTRINSIC(store_var_vec1, 1, ARR(1), false, 0, 1, 0, 0)
|
||||
INTRINSIC(store_var_vec2, 1, ARR(2), false, 0, 1, 0, 0)
|
||||
INTRINSIC(store_var_vec3, 1, ARR(3), false, 0, 1, 0, 0)
|
||||
INTRINSIC(store_var_vec4, 1, ARR(4), false, 0, 1, 0, 0)
|
||||
INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
|
||||
INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 0, 0)
|
||||
INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
|
||||
|
||||
/*
|
||||
* a barrier is an intrinsic with no inputs/outputs but which can't be moved
|
||||
@@ -94,27 +84,6 @@ SYSTEM_VALUE(sample_pos, 2)
|
||||
SYSTEM_VALUE(sample_mask_in, 1)
|
||||
SYSTEM_VALUE(invocation_id, 1)
|
||||
|
||||
#define LOAD_OR_INTERP(name, num_srcs, src_comps, num_indices, flags) \
|
||||
INTRINSIC(name##_vec1, num_srcs, ARR(src_comps), true, 1, \
|
||||
0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
|
||||
INTRINSIC(name##_vec2, num_srcs, ARR(src_comps), true, 2, \
|
||||
0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
|
||||
INTRINSIC(name##_vec3, num_srcs, ARR(src_comps), true, 3, \
|
||||
0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
|
||||
INTRINSIC(name##_vec4, num_srcs, ARR(src_comps), true, 4, \
|
||||
0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
|
||||
INTRINSIC(name##_vec1_indirect, 1 + num_srcs, ARR(1, src_comps), true, 1, \
|
||||
0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
|
||||
INTRINSIC(name##_vec2_indirect, 1 + num_srcs, ARR(1, src_comps), true, 2, \
|
||||
0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
|
||||
INTRINSIC(name##_vec3_indirect, 1 + num_srcs, ARR(1, src_comps), true, 3, \
|
||||
0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
|
||||
INTRINSIC(name##_vec4_indirect, 1 + num_srcs, ARR(1, src_comps), true, 4, \
|
||||
0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags)
|
||||
|
||||
#define LOAD(name, num_indices, flags) \
|
||||
LOAD_OR_INTERP(load_##name, 0, 0, num_indices, flags)
|
||||
|
||||
/*
|
||||
* The first index is the address to load from, and the second index is the
|
||||
* number of array elements to load. For UBO's (and SSBO's), the first index
|
||||
@@ -129,6 +98,12 @@ SYSTEM_VALUE(invocation_id, 1)
|
||||
* elements begin immediately after the previous array element.
|
||||
*/
|
||||
|
||||
#define LOAD(name, num_indices, flags) \
|
||||
INTRINSIC(load_##name, 0, ARR(), true, 0, 0, num_indices, \
|
||||
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \
|
||||
INTRINSIC(load_##name##_indirect, 1, ARR(1), true, 0, 0, num_indices, \
|
||||
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \
|
||||
|
||||
LOAD(uniform, 2, NIR_INTRINSIC_CAN_REORDER)
|
||||
LOAD(ubo, 3, NIR_INTRINSIC_CAN_REORDER)
|
||||
LOAD(input, 2, NIR_INTRINSIC_CAN_REORDER)
|
||||
@@ -140,29 +115,16 @@ LOAD(input, 2, NIR_INTRINSIC_CAN_REORDER)
|
||||
* interp_at_offset* intrinsics take a second source that is either a
|
||||
* sample id or a vec2 position offset.
|
||||
*/
|
||||
#define INTERP(name, flags) \
|
||||
LOAD_OR_INTERP(interp_##name, 0, 0, 2, flags)
|
||||
|
||||
#define INTERP_WITH_ARG(name, src_comps, flags) \
|
||||
LOAD_OR_INTERP(interp_##name, 1, src_comps, 2, flags)
|
||||
#define INTERP(name, num_srcs, src_comps) \
|
||||
INTRINSIC(interp_##name, num_srcs, ARR(src_comps), true, \
|
||||
0, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \
|
||||
INTRINSIC(interp_##name##_indirect, 1 + num_srcs, ARR(1, src_comps), true, \
|
||||
0, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
|
||||
INTERP(at_centroid, NIR_INTRINSIC_CAN_REORDER)
|
||||
INTERP_WITH_ARG(at_sample, 1, NIR_INTRINSIC_CAN_REORDER)
|
||||
INTERP_WITH_ARG(at_offset, 1, NIR_INTRINSIC_CAN_REORDER)
|
||||
|
||||
#define STORE(name, num_indices, flags) \
|
||||
INTRINSIC(store_##name##_vec1, 1, ARR(1), false, 0, 0, num_indices, flags) \
|
||||
INTRINSIC(store_##name##_vec2, 1, ARR(2), false, 0, 0, num_indices, flags) \
|
||||
INTRINSIC(store_##name##_vec3, 1, ARR(3), false, 0, 0, num_indices, flags) \
|
||||
INTRINSIC(store_##name##_vec4, 1, ARR(4), false, 0, 0, num_indices, flags) \
|
||||
INTRINSIC(store_##name##_vec1_indirect, 2, ARR(1, 1), false, 0, 0, \
|
||||
num_indices, flags) \
|
||||
INTRINSIC(store_##name##_vec2_indirect, 2, ARR(2, 1), false, 0, 0, \
|
||||
num_indices, flags) \
|
||||
INTRINSIC(store_##name##_vec3_indirect, 2, ARR(3, 1), false, 0, 0, \
|
||||
num_indices, flags) \
|
||||
INTRINSIC(store_##name##_vec4_indirect, 2, ARR(4, 1), false, 0, 0, \
|
||||
num_indices, flags) \
|
||||
INTERP(at_centroid, 0, 0)
|
||||
INTERP(at_sample, 1, 1)
|
||||
INTERP(at_offset, 1, 1)
|
||||
|
||||
/*
|
||||
* Stores work the same way as loads, except now the first register input is
|
||||
@@ -170,7 +132,12 @@ INTERP_WITH_ARG(at_offset, 1, NIR_INTRINSIC_CAN_REORDER)
|
||||
* offset.
|
||||
*/
|
||||
|
||||
#define STORE(name, num_indices, flags) \
|
||||
INTRINSIC(store_##name, 1, ARR(0), false, 0, 0, num_indices, flags) \
|
||||
INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \
|
||||
num_indices, flags) \
|
||||
|
||||
STORE(output, 2, 0)
|
||||
/* STORE(ssbo, 3, 0) */
|
||||
|
||||
LAST_INTRINSIC(store_output_vec4_indirect)
|
||||
LAST_INTRINSIC(store_output_indirect)
|
||||
|
Reference in New Issue
Block a user