nir: Add system values from ARB_shader_ballot
We already had a channel_num system value, which I'm renaming to subgroup_invocation to match the rest of the new system values. Note that while ballotARB(true) will return zeros in the high 32-bits on systems where gl_SubGroupSizeARB <= 32, the gl_SubGroup??MaskARB variables do not consider whether channels are enabled. See issue (1) of ARB_shader_ballot. Reviewed-by: Connor Abbott <cwabbott0@gmail.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
@@ -1908,6 +1908,20 @@ nir_intrinsic_from_system_value(gl_system_value val)
|
|||||||
return nir_intrinsic_load_helper_invocation;
|
return nir_intrinsic_load_helper_invocation;
|
||||||
case SYSTEM_VALUE_VIEW_INDEX:
|
case SYSTEM_VALUE_VIEW_INDEX:
|
||||||
return nir_intrinsic_load_view_index;
|
return nir_intrinsic_load_view_index;
|
||||||
|
case SYSTEM_VALUE_SUBGROUP_SIZE:
|
||||||
|
return nir_intrinsic_load_subgroup_size;
|
||||||
|
case SYSTEM_VALUE_SUBGROUP_INVOCATION:
|
||||||
|
return nir_intrinsic_load_subgroup_invocation;
|
||||||
|
case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
|
||||||
|
return nir_intrinsic_load_subgroup_eq_mask;
|
||||||
|
case SYSTEM_VALUE_SUBGROUP_GE_MASK:
|
||||||
|
return nir_intrinsic_load_subgroup_ge_mask;
|
||||||
|
case SYSTEM_VALUE_SUBGROUP_GT_MASK:
|
||||||
|
return nir_intrinsic_load_subgroup_gt_mask;
|
||||||
|
case SYSTEM_VALUE_SUBGROUP_LE_MASK:
|
||||||
|
return nir_intrinsic_load_subgroup_le_mask;
|
||||||
|
case SYSTEM_VALUE_SUBGROUP_LT_MASK:
|
||||||
|
return nir_intrinsic_load_subgroup_lt_mask;
|
||||||
default:
|
default:
|
||||||
unreachable("system value does not directly correspond to intrinsic");
|
unreachable("system value does not directly correspond to intrinsic");
|
||||||
}
|
}
|
||||||
@@ -1961,6 +1975,20 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
|
|||||||
return SYSTEM_VALUE_HELPER_INVOCATION;
|
return SYSTEM_VALUE_HELPER_INVOCATION;
|
||||||
case nir_intrinsic_load_view_index:
|
case nir_intrinsic_load_view_index:
|
||||||
return SYSTEM_VALUE_VIEW_INDEX;
|
return SYSTEM_VALUE_VIEW_INDEX;
|
||||||
|
case SYSTEM_VALUE_SUBGROUP_SIZE:
|
||||||
|
return nir_intrinsic_load_subgroup_size;
|
||||||
|
case SYSTEM_VALUE_SUBGROUP_INVOCATION:
|
||||||
|
return nir_intrinsic_load_subgroup_invocation;
|
||||||
|
case nir_intrinsic_load_subgroup_eq_mask:
|
||||||
|
return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
|
||||||
|
case nir_intrinsic_load_subgroup_ge_mask:
|
||||||
|
return SYSTEM_VALUE_SUBGROUP_GE_MASK;
|
||||||
|
case nir_intrinsic_load_subgroup_gt_mask:
|
||||||
|
return SYSTEM_VALUE_SUBGROUP_GT_MASK;
|
||||||
|
case nir_intrinsic_load_subgroup_le_mask:
|
||||||
|
return SYSTEM_VALUE_SUBGROUP_LE_MASK;
|
||||||
|
case nir_intrinsic_load_subgroup_lt_mask:
|
||||||
|
return SYSTEM_VALUE_SUBGROUP_LT_MASK;
|
||||||
default:
|
default:
|
||||||
unreachable("intrinsic doesn't produce a system value");
|
unreachable("intrinsic doesn't produce a system value");
|
||||||
}
|
}
|
||||||
|
@@ -1822,6 +1822,7 @@ typedef struct nir_shader_compiler_options {
|
|||||||
bool lower_extract_word;
|
bool lower_extract_word;
|
||||||
|
|
||||||
bool lower_vote_trivial;
|
bool lower_vote_trivial;
|
||||||
|
bool lower_subgroup_masks;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Does the driver support real 32-bit integers? (Otherwise, integers
|
* Does the driver support real 32-bit integers? (Otherwise, integers
|
||||||
|
@@ -344,10 +344,16 @@ SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx)
|
|||||||
SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx)
|
SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx)
|
||||||
SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx)
|
SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx)
|
||||||
SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
|
SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
|
||||||
SYSTEM_VALUE(channel_num, 1, 0, xx, xx, xx)
|
|
||||||
SYSTEM_VALUE(alpha_ref_float, 1, 0, xx, xx, xx)
|
SYSTEM_VALUE(alpha_ref_float, 1, 0, xx, xx, xx)
|
||||||
SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx)
|
SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx)
|
||||||
SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx)
|
SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx)
|
||||||
|
SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx)
|
||||||
|
SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx)
|
||||||
|
SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx)
|
||||||
|
SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx)
|
||||||
|
SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx)
|
||||||
|
SYSTEM_VALUE(subgroup_le_mask, 1, 0, xx, xx, xx)
|
||||||
|
SYSTEM_VALUE(subgroup_lt_mask, 1, 0, xx, xx, xx)
|
||||||
|
|
||||||
/* Blend constant color values. Float values are clamped. */
|
/* Blend constant color values. Float values are clamped. */
|
||||||
SYSTEM_VALUE(blend_const_color_r_float, 1, 0, xx, xx, xx)
|
SYSTEM_VALUE(blend_const_color_r_float, 1, 0, xx, xx, xx)
|
||||||
|
@@ -116,6 +116,20 @@ convert_block(nir_block *block, nir_builder *b)
|
|||||||
nir_load_base_instance(b));
|
nir_load_base_instance(b));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
|
||||||
|
case SYSTEM_VALUE_SUBGROUP_GE_MASK:
|
||||||
|
case SYSTEM_VALUE_SUBGROUP_GT_MASK:
|
||||||
|
case SYSTEM_VALUE_SUBGROUP_LE_MASK:
|
||||||
|
case SYSTEM_VALUE_SUBGROUP_LT_MASK: {
|
||||||
|
nir_intrinsic_op op =
|
||||||
|
nir_intrinsic_from_system_value(var->data.location);
|
||||||
|
nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
|
||||||
|
nir_ssa_dest_init(&load->instr, &load->dest, 1, 64, NULL);
|
||||||
|
nir_builder_instr_insert(b, &load->instr);
|
||||||
|
sysval = &load->dest.ssa;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@@ -62,6 +62,37 @@ opt_intrinsics_impl(nir_function_impl *impl)
|
|||||||
replacement = nir_imm_int(&b, NIR_TRUE);
|
replacement = nir_imm_int(&b, NIR_TRUE);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case nir_intrinsic_load_subgroup_eq_mask:
|
||||||
|
case nir_intrinsic_load_subgroup_ge_mask:
|
||||||
|
case nir_intrinsic_load_subgroup_gt_mask:
|
||||||
|
case nir_intrinsic_load_subgroup_le_mask:
|
||||||
|
case nir_intrinsic_load_subgroup_lt_mask: {
|
||||||
|
if (!b.shader->options->lower_subgroup_masks)
|
||||||
|
break;
|
||||||
|
|
||||||
|
nir_ssa_def *count = nir_load_subgroup_invocation(&b);
|
||||||
|
|
||||||
|
switch (intrin->intrinsic) {
|
||||||
|
case nir_intrinsic_load_subgroup_eq_mask:
|
||||||
|
replacement = nir_ishl(&b, nir_imm_int64(&b, 1ull), count);
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_load_subgroup_ge_mask:
|
||||||
|
replacement = nir_ishl(&b, nir_imm_int64(&b, ~0ull), count);
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_load_subgroup_gt_mask:
|
||||||
|
replacement = nir_ishl(&b, nir_imm_int64(&b, ~1ull), count);
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_load_subgroup_le_mask:
|
||||||
|
replacement = nir_inot(&b, nir_ishl(&b, nir_imm_int64(&b, ~1ull), count));
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_load_subgroup_lt_mask:
|
||||||
|
replacement = nir_inot(&b, nir_ishl(&b, nir_imm_int64(&b, ~0ull), count));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
unreachable("you seriously can't tell this is unreachable?");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@@ -4103,7 +4103,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case nir_intrinsic_load_channel_num: {
|
case nir_intrinsic_load_subgroup_invocation: {
|
||||||
fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UW);
|
fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UW);
|
||||||
dest = retype(dest, BRW_REGISTER_TYPE_UD);
|
dest = retype(dest, BRW_REGISTER_TYPE_UD);
|
||||||
const fs_builder allbld8 = bld.group(8, 0).exec_all();
|
const fs_builder allbld8 = bld.group(8, 0).exec_all();
|
||||||
|
@@ -88,10 +88,10 @@ lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state,
|
|||||||
/* We construct the local invocation index from:
|
/* We construct the local invocation index from:
|
||||||
*
|
*
|
||||||
* gl_LocalInvocationIndex =
|
* gl_LocalInvocationIndex =
|
||||||
* cs_thread_local_id + channel_num;
|
* cs_thread_local_id + subgroup_invocation;
|
||||||
*/
|
*/
|
||||||
nir_ssa_def *thread_local_id = read_thread_local_id(state);
|
nir_ssa_def *thread_local_id = read_thread_local_id(state);
|
||||||
nir_ssa_def *channel = nir_load_channel_num(b);
|
nir_ssa_def *channel = nir_load_subgroup_invocation(b);
|
||||||
sysval = nir_iadd(b, channel, thread_local_id);
|
sysval = nir_iadd(b, channel, thread_local_id);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user