intel/cs: Make thread_local_id a regular builtin param
This is a lot more natural than special casing it all over the place. We still have to do a bit of special-casing in assign_constant_locations but it's not special-cased quite as bad as it was before. Reviewed-by: Jordan Justen <jordan.l.justen@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
@@ -542,6 +542,8 @@ enum brw_param_builtin {
|
|||||||
BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_W,
|
BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_W,
|
||||||
BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X,
|
BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X,
|
||||||
BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y,
|
BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y,
|
||||||
|
|
||||||
|
BRW_PARAM_BUILTIN_THREAD_LOCAL_ID,
|
||||||
};
|
};
|
||||||
|
|
||||||
#define BRW_PARAM_BUILTIN_CLIP_PLANE(idx, comp) \
|
#define BRW_PARAM_BUILTIN_CLIP_PLANE(idx, comp) \
|
||||||
@@ -738,7 +740,6 @@ struct brw_cs_prog_data {
|
|||||||
unsigned threads;
|
unsigned threads;
|
||||||
bool uses_barrier;
|
bool uses_barrier;
|
||||||
bool uses_num_work_groups;
|
bool uses_num_work_groups;
|
||||||
int thread_local_id_index;
|
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
struct brw_push_const_block cross_thread;
|
struct brw_push_const_block cross_thread;
|
||||||
|
@@ -1935,6 +1935,20 @@ set_push_pull_constant_loc(unsigned uniform, int *chunk_start,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
get_thread_local_id_param_index(const brw_stage_prog_data *prog_data)
|
||||||
|
{
|
||||||
|
if (prog_data->nr_params == 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
/* The local thread id is always the last parameter in the list */
|
||||||
|
uint32_t last_param = prog_data->param[prog_data->nr_params - 1];
|
||||||
|
if (last_param == BRW_PARAM_BUILTIN_THREAD_LOCAL_ID)
|
||||||
|
return prog_data->nr_params - 1;
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Assign UNIFORM file registers to either push constants or pull constants.
|
* Assign UNIFORM file registers to either push constants or pull constants.
|
||||||
*
|
*
|
||||||
@@ -1963,10 +1977,6 @@ fs_visitor::assign_constant_locations()
|
|||||||
bool contiguous[uniforms];
|
bool contiguous[uniforms];
|
||||||
memset(contiguous, 0, sizeof(contiguous));
|
memset(contiguous, 0, sizeof(contiguous));
|
||||||
|
|
||||||
int thread_local_id_index =
|
|
||||||
(stage == MESA_SHADER_COMPUTE) ?
|
|
||||||
brw_cs_prog_data(stage_prog_data)->thread_local_id_index : -1;
|
|
||||||
|
|
||||||
/* First, we walk through the instructions and do two things:
|
/* First, we walk through the instructions and do two things:
|
||||||
*
|
*
|
||||||
* 1) Figure out which uniforms are live.
|
* 1) Figure out which uniforms are live.
|
||||||
@@ -2009,8 +2019,7 @@ fs_visitor::assign_constant_locations()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (thread_local_id_index >= 0 && !is_live[thread_local_id_index])
|
int thread_local_id_index = get_thread_local_id_param_index(stage_prog_data);
|
||||||
thread_local_id_index = -1;
|
|
||||||
|
|
||||||
/* Only allow 16 registers (128 uniform components) as push constants.
|
/* Only allow 16 registers (128 uniform components) as push constants.
|
||||||
*
|
*
|
||||||
@@ -2118,22 +2127,15 @@ fs_visitor::assign_constant_locations()
|
|||||||
* push_constant_loc[i] <= i and we can do it in one smooth loop without
|
* push_constant_loc[i] <= i and we can do it in one smooth loop without
|
||||||
* having to make a copy.
|
* having to make a copy.
|
||||||
*/
|
*/
|
||||||
int new_thread_local_id_index = -1;
|
|
||||||
for (unsigned int i = 0; i < uniforms; i++) {
|
for (unsigned int i = 0; i < uniforms; i++) {
|
||||||
uint32_t value = param[i];
|
uint32_t value = param[i];
|
||||||
if (pull_constant_loc[i] != -1) {
|
if (pull_constant_loc[i] != -1) {
|
||||||
stage_prog_data->pull_param[pull_constant_loc[i]] = value;
|
stage_prog_data->pull_param[pull_constant_loc[i]] = value;
|
||||||
} else if (push_constant_loc[i] != -1) {
|
} else if (push_constant_loc[i] != -1) {
|
||||||
stage_prog_data->param[push_constant_loc[i]] = value;
|
stage_prog_data->param[push_constant_loc[i]] = value;
|
||||||
if (thread_local_id_index == (int)i)
|
|
||||||
new_thread_local_id_index = push_constant_loc[i];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ralloc_free(param);
|
ralloc_free(param);
|
||||||
|
|
||||||
if (stage == MESA_SHADER_COMPUTE)
|
|
||||||
brw_cs_prog_data(stage_prog_data)->thread_local_id_index =
|
|
||||||
new_thread_local_id_index;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
@@ -6698,24 +6700,20 @@ cs_fill_push_const_info(const struct gen_device_info *devinfo,
|
|||||||
struct brw_cs_prog_data *cs_prog_data)
|
struct brw_cs_prog_data *cs_prog_data)
|
||||||
{
|
{
|
||||||
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
|
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
|
||||||
bool fill_thread_id =
|
int thread_local_id_index = get_thread_local_id_param_index(prog_data);
|
||||||
cs_prog_data->thread_local_id_index >= 0 &&
|
|
||||||
cs_prog_data->thread_local_id_index < (int)prog_data->nr_params;
|
|
||||||
bool cross_thread_supported = devinfo->gen > 7 || devinfo->is_haswell;
|
bool cross_thread_supported = devinfo->gen > 7 || devinfo->is_haswell;
|
||||||
|
|
||||||
/* The thread ID should be stored in the last param dword */
|
/* The thread ID should be stored in the last param dword */
|
||||||
assert(prog_data->nr_params > 0 || !fill_thread_id);
|
assert(thread_local_id_index == -1 ||
|
||||||
assert(!fill_thread_id ||
|
thread_local_id_index == (int)prog_data->nr_params - 1);
|
||||||
cs_prog_data->thread_local_id_index ==
|
|
||||||
(int)prog_data->nr_params - 1);
|
|
||||||
|
|
||||||
unsigned cross_thread_dwords, per_thread_dwords;
|
unsigned cross_thread_dwords, per_thread_dwords;
|
||||||
if (!cross_thread_supported) {
|
if (!cross_thread_supported) {
|
||||||
cross_thread_dwords = 0u;
|
cross_thread_dwords = 0u;
|
||||||
per_thread_dwords = prog_data->nr_params;
|
per_thread_dwords = prog_data->nr_params;
|
||||||
} else if (fill_thread_id) {
|
} else if (thread_local_id_index >= 0) {
|
||||||
/* Fill all but the last register with cross-thread payload */
|
/* Fill all but the last register with cross-thread payload */
|
||||||
cross_thread_dwords = 8 * (cs_prog_data->thread_local_id_index / 8);
|
cross_thread_dwords = 8 * (thread_local_id_index / 8);
|
||||||
per_thread_dwords = prog_data->nr_params - cross_thread_dwords;
|
per_thread_dwords = prog_data->nr_params - cross_thread_dwords;
|
||||||
assert(per_thread_dwords > 0 && per_thread_dwords <= 8);
|
assert(per_thread_dwords > 0 && per_thread_dwords <= 8);
|
||||||
} else {
|
} else {
|
||||||
|
@@ -30,6 +30,7 @@ struct lower_intrinsics_state {
|
|||||||
nir_function_impl *impl;
|
nir_function_impl *impl;
|
||||||
bool progress;
|
bool progress;
|
||||||
nir_builder builder;
|
nir_builder builder;
|
||||||
|
int thread_local_id_index;
|
||||||
};
|
};
|
||||||
|
|
||||||
static nir_ssa_def *
|
static nir_ssa_def *
|
||||||
@@ -47,12 +48,13 @@ read_thread_local_id(struct lower_intrinsics_state *state)
|
|||||||
if (group_size <= 8)
|
if (group_size <= 8)
|
||||||
return nir_imm_int(b, 0);
|
return nir_imm_int(b, 0);
|
||||||
|
|
||||||
if (prog_data->thread_local_id_index == -1) {
|
if (state->thread_local_id_index == -1) {
|
||||||
prog_data->thread_local_id_index = prog_data->base.nr_params;
|
state->thread_local_id_index = prog_data->base.nr_params;
|
||||||
brw_stage_prog_data_add_params(&prog_data->base, 1);
|
uint32_t *param = brw_stage_prog_data_add_params(&prog_data->base, 1);
|
||||||
|
*param = BRW_PARAM_BUILTIN_THREAD_LOCAL_ID;
|
||||||
nir->num_uniforms += 4;
|
nir->num_uniforms += 4;
|
||||||
}
|
}
|
||||||
unsigned id_index = prog_data->thread_local_id_index;
|
unsigned id_index = state->thread_local_id_index;
|
||||||
|
|
||||||
nir_intrinsic_instr *load =
|
nir_intrinsic_instr *load =
|
||||||
nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
|
nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
|
||||||
@@ -165,7 +167,7 @@ brw_nir_lower_cs_intrinsics(nir_shader *nir,
|
|||||||
state.nir = nir;
|
state.nir = nir;
|
||||||
state.prog_data = prog_data;
|
state.prog_data = prog_data;
|
||||||
|
|
||||||
state.prog_data->thread_local_id_index = -1;
|
state.thread_local_id_index = -1;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
state.progress = false;
|
state.progress = false;
|
||||||
|
@@ -707,12 +707,10 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
|
|||||||
uint32_t *u32_map = state.map;
|
uint32_t *u32_map = state.map;
|
||||||
|
|
||||||
if (cs_prog_data->push.cross_thread.size > 0) {
|
if (cs_prog_data->push.cross_thread.size > 0) {
|
||||||
assert(cs_prog_data->thread_local_id_index < 0 ||
|
|
||||||
cs_prog_data->thread_local_id_index >=
|
|
||||||
cs_prog_data->push.cross_thread.dwords);
|
|
||||||
for (unsigned i = 0;
|
for (unsigned i = 0;
|
||||||
i < cs_prog_data->push.cross_thread.dwords;
|
i < cs_prog_data->push.cross_thread.dwords;
|
||||||
i++) {
|
i++) {
|
||||||
|
assert(prog_data->param[i] != BRW_PARAM_BUILTIN_THREAD_LOCAL_ID);
|
||||||
u32_map[i] = anv_push_constant_value(data, prog_data->param[i]);
|
u32_map[i] = anv_push_constant_value(data, prog_data->param[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -724,11 +722,11 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
|
|||||||
cs_prog_data->push.cross_thread.regs);
|
cs_prog_data->push.cross_thread.regs);
|
||||||
unsigned src = cs_prog_data->push.cross_thread.dwords;
|
unsigned src = cs_prog_data->push.cross_thread.dwords;
|
||||||
for ( ; src < prog_data->nr_params; src++, dst++) {
|
for ( ; src < prog_data->nr_params; src++, dst++) {
|
||||||
if (src != cs_prog_data->thread_local_id_index) {
|
if (prog_data->param[src] == BRW_PARAM_BUILTIN_THREAD_LOCAL_ID) {
|
||||||
|
u32_map[dst] = t * cs_prog_data->simd_size;
|
||||||
|
} else {
|
||||||
u32_map[dst] =
|
u32_map[dst] =
|
||||||
anv_push_constant_value(data, prog_data->param[src]);
|
anv_push_constant_value(data, prog_data->param[src]);
|
||||||
} else {
|
|
||||||
u32_map[dst] = t * cs_prog_data->simd_size;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -305,12 +305,10 @@ brw_upload_cs_push_constants(struct brw_context *brw,
|
|||||||
|
|
||||||
if (cs_prog_data->push.cross_thread.size > 0) {
|
if (cs_prog_data->push.cross_thread.size > 0) {
|
||||||
uint32_t *param_copy = param;
|
uint32_t *param_copy = param;
|
||||||
assert(cs_prog_data->thread_local_id_index < 0 ||
|
|
||||||
cs_prog_data->thread_local_id_index >=
|
|
||||||
cs_prog_data->push.cross_thread.dwords);
|
|
||||||
for (unsigned i = 0;
|
for (unsigned i = 0;
|
||||||
i < cs_prog_data->push.cross_thread.dwords;
|
i < cs_prog_data->push.cross_thread.dwords;
|
||||||
i++) {
|
i++) {
|
||||||
|
assert(prog_data->param[i] != BRW_PARAM_BUILTIN_THREAD_LOCAL_ID);
|
||||||
param_copy[i] = brw_param_value(brw, prog, stage_state,
|
param_copy[i] = brw_param_value(brw, prog, stage_state,
|
||||||
prog_data->param[i]);
|
prog_data->param[i]);
|
||||||
}
|
}
|
||||||
@@ -323,11 +321,11 @@ brw_upload_cs_push_constants(struct brw_context *brw,
|
|||||||
cs_prog_data->push.cross_thread.regs);
|
cs_prog_data->push.cross_thread.regs);
|
||||||
unsigned src = cs_prog_data->push.cross_thread.dwords;
|
unsigned src = cs_prog_data->push.cross_thread.dwords;
|
||||||
for ( ; src < prog_data->nr_params; src++, dst++) {
|
for ( ; src < prog_data->nr_params; src++, dst++) {
|
||||||
if (src != cs_prog_data->thread_local_id_index) {
|
if (prog_data->param[src] == BRW_PARAM_BUILTIN_THREAD_LOCAL_ID) {
|
||||||
|
param[dst] = t * cs_prog_data->simd_size;
|
||||||
|
} else {
|
||||||
param[dst] = brw_param_value(brw, prog, stage_state,
|
param[dst] = brw_param_value(brw, prog, stage_state,
|
||||||
prog_data->param[src]);
|
prog_data->param[src]);
|
||||||
} else {
|
|
||||||
param[dst] = t * cs_prog_data->simd_size;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user