nir: Vectorize intrinsics

We used to have the number of components built into the intrinsic.  This
meant that all of our load/store intrinsics had vec1, vec2, vec3, and vec4
variants.  This lead to piles of switch statements to generate the correct
intrinsic names, and introspection to figure out the number of components.
We can make things much nicer by allowing "vectorized" intrinsics.

Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
This commit is contained in:
Jason Ekstrand
2014-12-03 17:03:19 -08:00
parent d1d12efb36
commit 27663dbe8e
9 changed files with 123 additions and 315 deletions

View File

@@ -42,19 +42,9 @@
#define ARR(...) { __VA_ARGS__ }
INTRINSIC(load_var_vec1, 0, ARR(), true, 1, 1, 0,
NIR_INTRINSIC_CAN_ELIMINATE)
INTRINSIC(load_var_vec2, 0, ARR(), true, 2, 1, 0,
NIR_INTRINSIC_CAN_ELIMINATE)
INTRINSIC(load_var_vec3, 0, ARR(), true, 3, 1, 0,
NIR_INTRINSIC_CAN_ELIMINATE)
INTRINSIC(load_var_vec4, 0, ARR(), true, 4, 1, 0,
NIR_INTRINSIC_CAN_ELIMINATE)
INTRINSIC(store_var_vec1, 1, ARR(1), false, 0, 1, 0, 0)
INTRINSIC(store_var_vec2, 1, ARR(2), false, 0, 1, 0, 0)
INTRINSIC(store_var_vec3, 1, ARR(3), false, 0, 1, 0, 0)
INTRINSIC(store_var_vec4, 1, ARR(4), false, 0, 1, 0, 0)
INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE)
INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 0, 0)
INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
/*
* a barrier is an intrinsic with no inputs/outputs but which can't be moved
@@ -94,27 +84,6 @@ SYSTEM_VALUE(sample_pos, 2)
SYSTEM_VALUE(sample_mask_in, 1)
SYSTEM_VALUE(invocation_id, 1)
#define LOAD_OR_INTERP(name, num_srcs, src_comps, num_indices, flags) \
INTRINSIC(name##_vec1, num_srcs, ARR(src_comps), true, 1, \
0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
INTRINSIC(name##_vec2, num_srcs, ARR(src_comps), true, 2, \
0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
INTRINSIC(name##_vec3, num_srcs, ARR(src_comps), true, 3, \
0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
INTRINSIC(name##_vec4, num_srcs, ARR(src_comps), true, 4, \
0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
INTRINSIC(name##_vec1_indirect, 1 + num_srcs, ARR(1, src_comps), true, 1, \
0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
INTRINSIC(name##_vec2_indirect, 1 + num_srcs, ARR(1, src_comps), true, 2, \
0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
INTRINSIC(name##_vec3_indirect, 1 + num_srcs, ARR(1, src_comps), true, 3, \
0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
INTRINSIC(name##_vec4_indirect, 1 + num_srcs, ARR(1, src_comps), true, 4, \
0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags)
#define LOAD(name, num_indices, flags) \
LOAD_OR_INTERP(load_##name, 0, 0, num_indices, flags)
/*
* The first index is the address to load from, and the second index is the
* number of array elements to load. For UBO's (and SSBO's), the first index
@@ -129,6 +98,12 @@ SYSTEM_VALUE(invocation_id, 1)
* elements begin immediately after the previous array element.
*/
#define LOAD(name, num_indices, flags) \
INTRINSIC(load_##name, 0, ARR(), true, 0, 0, num_indices, \
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \
INTRINSIC(load_##name##_indirect, 1, ARR(1), true, 0, 0, num_indices, \
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \
LOAD(uniform, 2, NIR_INTRINSIC_CAN_REORDER)
LOAD(ubo, 3, NIR_INTRINSIC_CAN_REORDER)
LOAD(input, 2, NIR_INTRINSIC_CAN_REORDER)
@@ -140,29 +115,16 @@ LOAD(input, 2, NIR_INTRINSIC_CAN_REORDER)
* interp_at_offset* intrinsics take a second source that is either a
* sample id or a vec2 position offset.
*/
#define INTERP(name, flags) \
LOAD_OR_INTERP(interp_##name, 0, 0, 2, flags)
#define INTERP_WITH_ARG(name, src_comps, flags) \
LOAD_OR_INTERP(interp_##name, 1, src_comps, 2, flags)
#define INTERP(name, num_srcs, src_comps) \
INTRINSIC(interp_##name, num_srcs, ARR(src_comps), true, \
0, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \
INTRINSIC(interp_##name##_indirect, 1 + num_srcs, ARR(1, src_comps), true, \
0, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
INTERP(at_centroid, NIR_INTRINSIC_CAN_REORDER)
INTERP_WITH_ARG(at_sample, 1, NIR_INTRINSIC_CAN_REORDER)
INTERP_WITH_ARG(at_offset, 1, NIR_INTRINSIC_CAN_REORDER)
#define STORE(name, num_indices, flags) \
INTRINSIC(store_##name##_vec1, 1, ARR(1), false, 0, 0, num_indices, flags) \
INTRINSIC(store_##name##_vec2, 1, ARR(2), false, 0, 0, num_indices, flags) \
INTRINSIC(store_##name##_vec3, 1, ARR(3), false, 0, 0, num_indices, flags) \
INTRINSIC(store_##name##_vec4, 1, ARR(4), false, 0, 0, num_indices, flags) \
INTRINSIC(store_##name##_vec1_indirect, 2, ARR(1, 1), false, 0, 0, \
num_indices, flags) \
INTRINSIC(store_##name##_vec2_indirect, 2, ARR(2, 1), false, 0, 0, \
num_indices, flags) \
INTRINSIC(store_##name##_vec3_indirect, 2, ARR(3, 1), false, 0, 0, \
num_indices, flags) \
INTRINSIC(store_##name##_vec4_indirect, 2, ARR(4, 1), false, 0, 0, \
num_indices, flags) \
INTERP(at_centroid, 0, 0)
INTERP(at_sample, 1, 1)
INTERP(at_offset, 1, 1)
/*
* Stores work the same way as loads, except now the first register input is
@@ -170,7 +132,12 @@ INTERP_WITH_ARG(at_offset, 1, NIR_INTRINSIC_CAN_REORDER)
* offset.
*/
#define STORE(name, num_indices, flags) \
INTRINSIC(store_##name, 1, ARR(0), false, 0, 0, num_indices, flags) \
INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \
num_indices, flags) \
STORE(output, 2, 0)
/* STORE(ssbo, 3, 0) */
LAST_INTRINSIC(store_output_vec4_indirect)
LAST_INTRINSIC(store_output_indirect)