radv: Handle clip+cull distances more generally as compact arrays.

Needed for https://gitlab.freedesktop.org/mesa/mesa/merge_requests/248 .

That MR keeps the clip and cull arrays split.

So we have to handle
 - compact arrays with location_frac != 0
 - VARYING_SLOT_CLIP_DIST1

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
Bas Nieuwenhuizen
2019-02-16 02:24:14 +01:00
parent 8cfc17bdda
commit 1ef2855692
4 changed files with 82 additions and 98 deletions

View File

@@ -1936,14 +1936,18 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
if (var) { if (var) {
bool vs_in = ctx->stage == MESA_SHADER_VERTEX && bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
var->data.mode == nir_var_shader_in; var->data.mode == nir_var_shader_in;
if (var->data.compact)
stride = 1;
idx = var->data.driver_location; idx = var->data.driver_location;
comp = var->data.location_frac; comp = var->data.location_frac;
mode = var->data.mode; mode = var->data.mode;
get_deref_offset(ctx, deref, vs_in, NULL, NULL, get_deref_offset(ctx, deref, vs_in, NULL, NULL,
&const_index, &indir_index); &const_index, &indir_index);
if (var->data.compact) {
stride = 1;
const_index += comp;
comp = 0;
}
} }
if (instr->dest.ssa.bit_size == 64 && if (instr->dest.ssa.bit_size == 64 &&
@@ -2091,6 +2095,11 @@ visit_store_var(struct ac_nir_context *ctx,
NULL, NULL, &const_index, &indir_index); NULL, NULL, &const_index, &indir_index);
idx = var->data.driver_location; idx = var->data.driver_location;
comp = var->data.location_frac; comp = var->data.location_frac;
if (var->data.compact) {
const_index += comp;
comp = 0;
}
} }
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64 && if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64 &&

View File

@@ -1503,7 +1503,7 @@ store_tcs_output(struct ac_shader_abi *abi,
{ {
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
const unsigned location = var->data.location; const unsigned location = var->data.location;
const unsigned component = var->data.location_frac; unsigned component = var->data.location_frac;
const bool is_patch = var->data.patch; const bool is_patch = var->data.patch;
const bool is_compact = var->data.compact; const bool is_compact = var->data.compact;
LLVMValueRef dw_addr; LLVMValueRef dw_addr;
@@ -1521,11 +1521,15 @@ store_tcs_output(struct ac_shader_abi *abi,
} }
param = shader_io_get_unique_index(location); param = shader_io_get_unique_index(location);
if (location == VARYING_SLOT_CLIP_DIST0 && if ((location == VARYING_SLOT_CLIP_DIST0 || location == VARYING_SLOT_CLIP_DIST1) && is_compact) {
is_compact && const_index > 3) { const_index += component;
const_index -= 3; component = 0;
if (const_index >= 4) {
const_index -= 4;
param++; param++;
} }
}
if (!is_patch) { if (!is_patch) {
stride = get_tcs_out_vertex_stride(ctx); stride = get_tcs_out_vertex_stride(ctx);
@@ -1591,10 +1595,14 @@ load_tes_input(struct ac_shader_abi *abi,
LLVMValueRef result; LLVMValueRef result;
unsigned param = shader_io_get_unique_index(location); unsigned param = shader_io_get_unique_index(location);
if (location == VARYING_SLOT_CLIP_DIST0 && is_compact && const_index > 3) { if ((location == VARYING_SLOT_CLIP_DIST0 || location == VARYING_SLOT_CLIP_DIST1) && is_compact) {
const_index -= 3; const_index += component;
component = 0;
if (const_index >= 4) {
const_index -= 4;
param++; param++;
} }
}
buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index,
is_compact, vertex_index, param_index); is_compact, vertex_index, param_index);
@@ -2248,6 +2256,14 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
uint64_t mask; uint64_t mask;
variable->data.driver_location = idx * 4; variable->data.driver_location = idx * 4;
if (variable->data.compact) {
unsigned component_count = variable->data.location_frac +
glsl_get_length(variable->type);
attrib_count = (component_count + 3) / 4;
}
mask = ((1ull << attrib_count) - 1) << variable->data.location; mask = ((1ull << attrib_count) - 1) << variable->data.location;
if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) { if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) {
@@ -2269,14 +2285,6 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
for (unsigned i = 0; i < attrib_count; ++i) for (unsigned i = 0; i < attrib_count; ++i)
ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp; ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp;
if (idx == VARYING_SLOT_CLIP_DIST0) {
/* Do not account for the number of components inside the array
* of clip/cull distances because this might wrongly set other
* bits like primitive ID or layer.
*/
mask = 1ull << VARYING_SLOT_CLIP_DIST0;
}
ctx->input_mask |= mask; ctx->input_mask |= mask;
} }
@@ -2388,6 +2396,12 @@ scan_shader_output_decl(struct radv_shader_context *ctx,
if (stage == MESA_SHADER_TESS_CTRL) if (stage == MESA_SHADER_TESS_CTRL)
return; return;
if (variable->data.compact) {
unsigned component_count = variable->data.location_frac +
glsl_get_length(variable->type);
attrib_count = (component_count + 3) / 4;
}
mask_attribs = ((1ull << attrib_count) - 1) << idx; mask_attribs = ((1ull << attrib_count) - 1) << idx;
if (stage == MESA_SHADER_VERTEX || if (stage == MESA_SHADER_VERTEX ||
stage == MESA_SHADER_TESS_EVAL || stage == MESA_SHADER_TESS_EVAL ||
@@ -2403,8 +2417,6 @@ scan_shader_output_decl(struct radv_shader_context *ctx,
ctx->shader_info->tes.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1; ctx->shader_info->tes.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1;
ctx->shader_info->tes.outinfo.cull_dist_mask <<= shader->info.clip_distance_array_size; ctx->shader_info->tes.outinfo.cull_dist_mask <<= shader->info.clip_distance_array_size;
} }
mask_attribs = 1ull << idx;
} }
} }
@@ -2749,51 +2761,41 @@ handle_vs_outputs_post(struct radv_shader_context *ctx,
memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED, memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
sizeof(outinfo->vs_output_param_offset)); sizeof(outinfo->vs_output_param_offset));
if (ctx->output_mask & (1ull << VARYING_SLOT_CLIP_DIST0)) { for(unsigned location = VARYING_SLOT_CLIP_DIST0; location <= VARYING_SLOT_CLIP_DIST1; ++location) {
if (ctx->output_mask & (1ull << location)) {
unsigned output_usage_mask, length; unsigned output_usage_mask, length;
LLVMValueRef slots[8]; LLVMValueRef slots[4];
unsigned j; unsigned j;
if (ctx->stage == MESA_SHADER_VERTEX && if (ctx->stage == MESA_SHADER_VERTEX &&
!ctx->is_gs_copy_shader) { !ctx->is_gs_copy_shader) {
output_usage_mask = output_usage_mask =
ctx->shader_info->info.vs.output_usage_mask[VARYING_SLOT_CLIP_DIST0]; ctx->shader_info->info.vs.output_usage_mask[location];
} else if (ctx->stage == MESA_SHADER_TESS_EVAL) { } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
output_usage_mask = output_usage_mask =
ctx->shader_info->info.tes.output_usage_mask[VARYING_SLOT_CLIP_DIST0]; ctx->shader_info->info.tes.output_usage_mask[location];
} else { } else {
assert(ctx->is_gs_copy_shader); assert(ctx->is_gs_copy_shader);
output_usage_mask = output_usage_mask =
ctx->shader_info->info.gs.output_usage_mask[VARYING_SLOT_CLIP_DIST0]; ctx->shader_info->info.gs.output_usage_mask[location];
} }
length = util_last_bit(output_usage_mask); length = util_last_bit(output_usage_mask);
i = VARYING_SLOT_CLIP_DIST0;
for (j = 0; j < length; j++) for (j = 0; j < length; j++)
slots[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j)); slots[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, location, j));
for (i = length; i < 8; i++) for (i = length; i < 4; i++)
slots[i] = LLVMGetUndef(ctx->ac.f32); slots[i] = LLVMGetUndef(ctx->ac.f32);
if (length > 4) { target = V_008DFC_SQ_EXP_POS + 2 + (location - VARYING_SLOT_CLIP_DIST0);
target = V_008DFC_SQ_EXP_POS + 3;
si_llvm_init_export_args(ctx, &slots[4], 0xf, target, &args);
memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
&args, sizeof(args));
}
target = V_008DFC_SQ_EXP_POS + 2;
si_llvm_init_export_args(ctx, &slots[0], 0xf, target, &args); si_llvm_init_export_args(ctx, &slots[0], 0xf, target, &args);
memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
&args, sizeof(args)); &args, sizeof(args));
/* Export the clip/cull distances values to the next stage. */ /* Export the clip/cull distances values to the next stage. */
radv_export_param(ctx, param_count, &slots[0], 0xf); radv_export_param(ctx, param_count, &slots[0], 0xf);
outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0] = param_count++; outinfo->vs_output_param_offset[location] = param_count++;
if (length > 4) {
radv_export_param(ctx, param_count, &slots[4], 0xf);
outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1] = param_count++;
} }
} }
@@ -2954,28 +2956,14 @@ handle_es_outputs_post(struct radv_shader_context *ctx,
LLVMValueRef lds_base = NULL; LLVMValueRef lds_base = NULL;
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
unsigned output_usage_mask;
int param_index; int param_index;
int length = 4;
if (!(ctx->output_mask & (1ull << i))) if (!(ctx->output_mask & (1ull << i)))
continue; continue;
if (ctx->stage == MESA_SHADER_VERTEX) {
output_usage_mask =
ctx->shader_info->info.vs.output_usage_mask[i];
} else {
assert(ctx->stage == MESA_SHADER_TESS_EVAL);
output_usage_mask =
ctx->shader_info->info.tes.output_usage_mask[i];
}
if (i == VARYING_SLOT_CLIP_DIST0)
length = util_last_bit(output_usage_mask);
param_index = shader_io_get_unique_index(i); param_index = shader_io_get_unique_index(i);
max_output_written = MAX2(param_index + (length > 4), max_output_written); max_output_written = MAX2(param_index, max_output_written);
} }
outinfo->esgs_itemsize = (max_output_written + 1) * 16; outinfo->esgs_itemsize = (max_output_written + 1) * 16;
@@ -2996,7 +2984,6 @@ handle_es_outputs_post(struct radv_shader_context *ctx,
LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4]; LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4];
unsigned output_usage_mask; unsigned output_usage_mask;
int param_index; int param_index;
int length = 4;
if (!(ctx->output_mask & (1ull << i))) if (!(ctx->output_mask & (1ull << i)))
continue; continue;
@@ -3010,9 +2997,6 @@ handle_es_outputs_post(struct radv_shader_context *ctx,
ctx->shader_info->info.tes.output_usage_mask[i]; ctx->shader_info->info.tes.output_usage_mask[i];
} }
if (i == VARYING_SLOT_CLIP_DIST0)
length = util_last_bit(output_usage_mask);
param_index = shader_io_get_unique_index(i); param_index = shader_io_get_unique_index(i);
if (lds_base) { if (lds_base) {
@@ -3021,7 +3005,7 @@ handle_es_outputs_post(struct radv_shader_context *ctx,
""); "");
} }
for (j = 0; j < length; j++) { for (j = 0; j < 4; j++) {
if (!(output_usage_mask & (1 << j))) if (!(output_usage_mask & (1 << j)))
continue; continue;
@@ -3058,22 +3042,16 @@ handle_ls_outputs_post(struct radv_shader_context *ctx)
vertex_dw_stride, ""); vertex_dw_stride, "");
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
unsigned output_usage_mask =
ctx->shader_info->info.vs.output_usage_mask[i];
LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4]; LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4];
int length = 4;
if (!(ctx->output_mask & (1ull << i))) if (!(ctx->output_mask & (1ull << i)))
continue; continue;
if (i == VARYING_SLOT_CLIP_DIST0)
length = util_last_bit(output_usage_mask);
int param = shader_io_get_unique_index(i); int param = shader_io_get_unique_index(i);
LLVMValueRef dw_addr = LLVMBuildAdd(ctx->ac.builder, base_dw_addr, LLVMValueRef dw_addr = LLVMBuildAdd(ctx->ac.builder, base_dw_addr,
LLVMConstInt(ctx->ac.i32, param * 4, false), LLVMConstInt(ctx->ac.i32, param * 4, false),
""); "");
for (unsigned j = 0; j < length; j++) { for (unsigned j = 0; j < 4; j++) {
LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, out_ptr[j], ""); LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, out_ptr[j], "");
value = ac_to_integer(&ctx->ac, value); value = ac_to_integer(&ctx->ac, value);
value = LLVMBuildZExtOrBitCast(ctx->ac.builder, value, ctx->ac.i32, ""); value = LLVMBuildZExtOrBitCast(ctx->ac.builder, value, ctx->ac.i32, "");

View File

@@ -410,6 +410,8 @@ static inline unsigned shader_io_get_unique_index(gl_varying_slot slot)
return 1; return 1;
if (slot == VARYING_SLOT_CLIP_DIST0) if (slot == VARYING_SLOT_CLIP_DIST0)
return 2; return 2;
if (slot == VARYING_SLOT_CLIP_DIST1)
return 3;
/* 3 is reserved for clip dist as well */ /* 3 is reserved for clip dist as well */
if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31) if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31)
return 4 + (slot - VARYING_SLOT_VAR0); return 4 + (slot - VARYING_SLOT_VAR0);

View File

@@ -129,11 +129,9 @@ set_output_usage_mask(const nir_shader *nir, const nir_intrinsic_instr *instr,
get_deref_offset(deref_instr, &const_offset); get_deref_offset(deref_instr, &const_offset);
if (idx == VARYING_SLOT_CLIP_DIST0) { if (var->data.compact) {
/* Special case for clip/cull distances because there are const_offset += comp;
* combined into a single array that contains both. output_usage_mask[idx + const_offset / 4] |= 1 << (const_offset % 4);
*/
output_usage_mask[idx] |= 1 << const_offset;
return; return;
} }
@@ -174,13 +172,9 @@ gather_intrinsic_store_deref_info(const nir_shader *nir,
type = glsl_get_array_element(var->type); type = glsl_get_array_element(var->type);
unsigned slots = unsigned slots =
var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4) var->data.compact ? DIV_ROUND_UP(var->data.location_frac + glsl_get_length(type), 4)
: glsl_count_attribute_slots(type, false); : glsl_count_attribute_slots(type, false);
if (idx == VARYING_SLOT_CLIP_DIST0)
slots = (nir->info.clip_distance_array_size +
nir->info.cull_distance_array_size > 4) ? 2 : 1;
mark_tess_output(info, var->data.patch, param, slots); mark_tess_output(info, var->data.patch, param, slots);
break; break;
} }
@@ -400,7 +394,8 @@ gather_info_input_decl_ps(const nir_shader *nir, const nir_variable *var,
info->ps.layer_input = true; info->ps.layer_input = true;
break; break;
case VARYING_SLOT_CLIP_DIST0: case VARYING_SLOT_CLIP_DIST0:
info->ps.num_input_clips_culls = attrib_count; case VARYING_SLOT_CLIP_DIST1:
info->ps.num_input_clips_culls += attrib_count;
break; break;
default: default:
break; break;
@@ -435,8 +430,8 @@ gather_info_output_decl_ls(const nir_shader *nir, const nir_variable *var,
int idx = var->data.location; int idx = var->data.location;
unsigned param = shader_io_get_unique_index(idx); unsigned param = shader_io_get_unique_index(idx);
int num_slots = glsl_count_attribute_slots(var->type, false); int num_slots = glsl_count_attribute_slots(var->type, false);
if (idx == VARYING_SLOT_CLIP_DIST0) if (var->data.compact)
num_slots = (nir->info.clip_distance_array_size + nir->info.cull_distance_array_size > 4) ? 2 : 1; num_slots = DIV_ROUND_UP(var->data.location_frac + glsl_get_length(var->type), 4);
mark_ls_output(info, param, num_slots); mark_ls_output(info, param, num_slots);
} }